summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp169
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp87
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h5
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp9
4 files changed, 107 insertions, 163 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index fb151b4ffdc..cc16d931dcf 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -302,6 +302,52 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
return true;
}
+static bool phiHasVGPROperands(const MachineInstr &PHI,
+ const MachineRegisterInfo &MRI,
+ const SIRegisterInfo *TRI,
+ const SIInstrInfo *TII) {
+ for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
+ unsigned Reg = PHI.getOperand(i).getReg();
+ if (TRI->hasVGPRs(MRI.getRegClass(Reg)))
+ return true;
+ }
+ return false;
+}
+
+static bool phiHasBreakDef(const MachineInstr &PHI,
+ const MachineRegisterInfo &MRI,
+ SmallSet<unsigned, 8> &Visited) {
+ for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
+ unsigned Reg = PHI.getOperand(i).getReg();
+ if (Visited.count(Reg))
+ continue;
+
+ Visited.insert(Reg);
+
+ MachineInstr *DefInstr = MRI.getVRegDef(Reg);
+ switch (DefInstr->getOpcode()) {
+ default:
+ break;
+ case AMDGPU::SI_IF_BREAK:
+ return true;
+ case AMDGPU::PHI:
+ if (phiHasBreakDef(*DefInstr, MRI, Visited))
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool hasTerminatorThatModifiesExec(const MachineBasicBlock &MBB,
+ const TargetRegisterInfo &TRI) {
+ for (MachineBasicBlock::const_iterator I = MBB.getFirstTerminator(),
+ E = MBB.end(); I != E; ++I) {
+ if (I->modifiesRegister(AMDGPU::EXEC, &TRI))
+ return true;
+ }
+ return false;
+}
+
static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy,
const MachineInstr *MoveImm,
const SIInstrInfo *TII,
@@ -363,6 +409,12 @@ bool searchPredecessors(const MachineBasicBlock *MBB,
return false;
}
+static bool predsHasDivergentTerminator(MachineBasicBlock *MBB,
+ const TargetRegisterInfo *TRI) {
+ return searchPredecessors(MBB, nullptr, [TRI](MachineBasicBlock *MBB) {
+ return hasTerminatorThatModifiesExec(*MBB, *TRI); });
+}
+
// Checks if there is potential path From instruction To instruction.
// If CutOff is specified and it sits in between of that path we ignore
// a higher portion of the path and report it is not reachable.
@@ -569,77 +621,62 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
break;
}
case AMDGPU::PHI: {
- unsigned hasVGPRUses = 0;
- SetVector<const MachineInstr *> worklist;
- worklist.insert(&MI);
- while (!worklist.empty()) {
- const MachineInstr *Instr = worklist.pop_back_val();
- unsigned Reg = Instr->getOperand(0).getReg();
- for (const auto &Use : MRI.use_operands(Reg)) {
- const MachineInstr *UseMI = Use.getParent();
- if (UseMI->isCopy() || UseMI->isRegSequence()) {
- if (UseMI->isCopy() &&
- TRI->isPhysicalRegister(UseMI->getOperand(0).getReg()) &&
- !TRI->isSGPRReg(MRI, UseMI->getOperand(0).getReg())) {
- hasVGPRUses++;
- }
- worklist.insert(UseMI);
- continue;
- }
-
- if (UseMI->isPHI()) {
- const TargetRegisterClass *UseRC = MRI.getRegClass(Use.getReg());
- if (!TRI->isSGPRReg(MRI, Use.getReg()) &&
- UseRC != &AMDGPU::VReg_1RegClass)
- hasVGPRUses++;
- continue;
- }
-
- unsigned OpNo = UseMI->getOperandNo(&Use);
- const MCInstrDesc &Desc = TII->get(UseMI->getOpcode());
- if (!Desc.isPseudo() && Desc.OpInfo &&
- OpNo < Desc.getNumOperands() &&
- Desc.OpInfo[OpNo].RegClass != -1) {
- const TargetRegisterClass *OpRC =
- TRI->getRegClass(Desc.OpInfo[OpNo].RegClass);
- if (!TRI->isSGPRClass(OpRC) && OpRC != &AMDGPU::VS_32RegClass &&
- OpRC != &AMDGPU::VS_64RegClass) {
- hasVGPRUses++;
- }
- }
- }
- }
- bool hasVGPRInput = false;
- for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
- unsigned InputReg = MI.getOperand(i).getReg();
- MachineInstr *Def = MRI.getVRegDef(InputReg);
- if (TRI->isVGPR(MRI, InputReg)) {
- if (Def->isCopy()) {
- unsigned SrcReg = Def->getOperand(1).getReg();
- const TargetRegisterClass *RC =
- TRI->isVirtualRegister(SrcReg) ? MRI.getRegClass(SrcReg)
- : TRI->getPhysRegClass(SrcReg);
- if (TRI->isSGPRClass(RC))
- continue;
- }
- hasVGPRInput = true;
- break;
- } else if (Def->isCopy() &&
- TRI->isVGPR(MRI, Def->getOperand(1).getReg())) {
- hasVGPRInput = true;
+ unsigned Reg = MI.getOperand(0).getReg();
+ if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
+ break;
+
+ // We don't need to fix the PHI if the common dominator of the
+ // two incoming blocks terminates with a uniform branch.
+ bool HasVGPROperand = phiHasVGPROperands(MI, MRI, TRI, TII);
+ if (MI.getNumExplicitOperands() == 5 && !HasVGPROperand) {
+ MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB();
+ MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB();
+
+ if (!predsHasDivergentTerminator(MBB0, TRI) &&
+ !predsHasDivergentTerminator(MBB1, TRI)) {
+ LLVM_DEBUG(dbgs()
+ << "Not fixing PHI for uniform branch: " << MI << '\n');
break;
}
}
- unsigned PHIRes = MI.getOperand(0).getReg();
- const TargetRegisterClass *RC0 = MRI.getRegClass(PHIRes);
- if ((!TRI->isVGPR(MRI, PHIRes) && RC0 != &AMDGPU::VReg_1RegClass) &&
- (hasVGPRInput || hasVGPRUses > 1)) {
+ // If a PHI node defines an SGPR and any of its operands are VGPRs,
+ // then we need to move it to the VALU.
+ //
+ // Also, if a PHI node defines an SGPR and has all SGPR operands
+ // we must move it to the VALU, because the SGPR operands will
+ // all end up being assigned the same register, which means
+ // there is a potential for a conflict if different threads take
+ // different control flow paths.
+ //
+ // For Example:
+ //
+ // sgpr0 = def;
+ // ...
+ // sgpr1 = def;
+ // ...
+ // sgpr2 = PHI sgpr0, sgpr1
+ // use sgpr2;
+ //
+ // Will Become:
+ //
+ // sgpr2 = def;
+ // ...
+ // sgpr2 = def;
+ // ...
+ // use sgpr2
+ //
+ // The one exception to this rule is when one of the operands
+ // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
+ // instruction. In this case, there we know the program will
+ // never enter the second block (the loop) without entering
+ // the first block (where the condition is computed), so there
+ // is no chance for values to be over-written.
+
+ SmallSet<unsigned, 8> Visited;
+ if (HasVGPROperand || !phiHasBreakDef(MI, MRI, Visited)) {
LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI);
- TII->moveToVALU(MI);
- } else {
- LLVM_DEBUG(dbgs() << "Legalizing PHI: " << MI);
- TII->legalizeOperands(MI, MDT);
+ TII->moveToVALU(MI, MDT);
}
break;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index f5fcb7cdfe0..5a8e8b14be3 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -10194,90 +10194,3 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
return AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(RMW);
}
-
-const TargetRegisterClass *
-SITargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
- const TargetRegisterClass *RC = TargetLoweringBase::getRegClassFor(VT, false);
- const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
- if (RC == &AMDGPU::VReg_1RegClass && !isDivergent)
- return &AMDGPU::SReg_64RegClass;
- if (!TRI->isSGPRClass(RC) && !isDivergent)
- return TRI->getEquivalentSGPRClass(RC);
- else if (TRI->isSGPRClass(RC) && isDivergent)
- return TRI->getEquivalentVGPRClass(RC);
-
- return RC;
-}
-
-static bool hasIfBreakUser(const Value *V, SetVector<const Value *> &Visited) {
- if (Visited.count(V))
- return false;
- Visited.insert(V);
- bool Result = false;
- for (auto U : V->users()) {
- if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(U)) {
- if ((Intrinsic->getIntrinsicID() == Intrinsic::amdgcn_if_break) &&
- (V == U->getOperand(1)))
- Result = true;
- } else {
- Result = hasIfBreakUser(U, Visited);
- }
- if (Result)
- break;
- }
- return Result;
-}
-
-bool SITargetLowering::requiresUniformRegister(MachineFunction &MF,
- const Value *V) const {
- if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) {
- switch (Intrinsic->getIntrinsicID()) {
- default:
- return false;
- case Intrinsic::amdgcn_if_break:
- return true;
- }
- }
- if (const ExtractValueInst *ExtValue = dyn_cast<ExtractValueInst>(V)) {
- if (const IntrinsicInst *Intrinsic =
- dyn_cast<IntrinsicInst>(ExtValue->getOperand(0))) {
- switch (Intrinsic->getIntrinsicID()) {
- default:
- return false;
- case Intrinsic::amdgcn_if:
- case Intrinsic::amdgcn_else: {
- ArrayRef<unsigned> Indices = ExtValue->getIndices();
- if (Indices.size() == 1 && Indices[0] == 1) {
- return true;
- }
- }
- }
- }
- }
- if (const CallInst *CI = dyn_cast<CallInst>(V)) {
- if (isa<InlineAsm>(CI->getCalledValue())) {
- const SIRegisterInfo *SIRI = Subtarget->getRegisterInfo();
- ImmutableCallSite CS(CI);
- TargetLowering::AsmOperandInfoVector TargetConstraints = ParseConstraints(
- MF.getDataLayout(), Subtarget->getRegisterInfo(), CS);
- for (auto &TC : TargetConstraints) {
- if (TC.Type == InlineAsm::isOutput) {
- ComputeConstraintToUse(TC, SDValue());
- unsigned AssignedReg;
- const TargetRegisterClass *RC;
- std::tie(AssignedReg, RC) = getRegForInlineAsmConstraint(
- SIRI, TC.ConstraintCode, TC.ConstraintVT);
- if (RC) {
- MachineRegisterInfo &MRI = MF.getRegInfo();
- if (AssignedReg != 0 && SIRI->isSGPRReg(MRI, AssignedReg))
- return true;
- else if (SIRI->isSGPRClass(RC))
- return true;
- }
- }
- }
- }
- }
- SetVector<const Value *> Visited;
- return hasIfBreakUser(V, Visited);
-}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 4d7dac91cd1..fde722df72e 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -367,10 +367,7 @@ public:
bool SNaN = false,
unsigned Depth = 0) const override;
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
- virtual const TargetRegisterClass *
- getRegClassFor(MVT VT, bool isDivergent) const override;
- virtual bool requiresUniformRegister(MachineFunction &MF,
- const Value *V) const override;
+
unsigned getPrefLoopAlignment(MachineLoop *ML) const override;
};
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 48257b01b86..0b489b090cc 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3917,7 +3917,7 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
return;
// Try to eliminate the copy if it is copying an immediate value.
- if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
+ if (Def->isMoveImmediate())
FoldImmediate(*Copy, *Def, OpReg, &MRI);
}
@@ -4151,10 +4151,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
if (VRC || !RI.isSGPRClass(getOpRegClass(MI, 0))) {
if (!VRC) {
assert(SRC);
- if (getOpRegClass(MI, 0) == &AMDGPU::VReg_1RegClass) {
- VRC = &AMDGPU::VReg_1RegClass;
- } else
- VRC = RI.getEquivalentVGPRClass(SRC);
+ VRC = RI.getEquivalentVGPRClass(SRC);
}
RC = VRC;
} else {
@@ -5316,7 +5313,7 @@ const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
case AMDGPU::INSERT_SUBREG:
case AMDGPU::WQM:
case AMDGPU::WWM:
- if (RI.hasVGPRs(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
+ if (RI.hasVGPRs(NewDstRC))
return nullptr;
NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
OpenPOWER on IntegriCloud