summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp166
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp91
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h5
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp13
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.h5
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp4
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.h3
7 files changed, 176 insertions, 111 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 94b1e636c7b..8ad7a52c92b 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -302,52 +302,6 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
return true;
}
-static bool phiHasVGPROperands(const MachineInstr &PHI,
- const MachineRegisterInfo &MRI,
- const SIRegisterInfo *TRI,
- const SIInstrInfo *TII) {
- for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
- unsigned Reg = PHI.getOperand(i).getReg();
- if (TRI->hasVGPRs(MRI.getRegClass(Reg)))
- return true;
- }
- return false;
-}
-
-static bool phiHasBreakDef(const MachineInstr &PHI,
- const MachineRegisterInfo &MRI,
- SmallSet<unsigned, 8> &Visited) {
- for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
- unsigned Reg = PHI.getOperand(i).getReg();
- if (Visited.count(Reg))
- continue;
-
- Visited.insert(Reg);
-
- MachineInstr *DefInstr = MRI.getVRegDef(Reg);
- switch (DefInstr->getOpcode()) {
- default:
- break;
- case AMDGPU::SI_IF_BREAK:
- return true;
- case AMDGPU::PHI:
- if (phiHasBreakDef(*DefInstr, MRI, Visited))
- return true;
- }
- }
- return false;
-}
-
-static bool hasTerminatorThatModifiesExec(const MachineBasicBlock &MBB,
- const TargetRegisterInfo &TRI) {
- for (MachineBasicBlock::const_iterator I = MBB.getFirstTerminator(),
- E = MBB.end(); I != E; ++I) {
- if (I->modifiesRegister(AMDGPU::EXEC, &TRI))
- return true;
- }
- return false;
-}
-
static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy,
const MachineInstr *MoveImm,
const SIInstrInfo *TII,
@@ -409,12 +363,6 @@ bool searchPredecessors(const MachineBasicBlock *MBB,
return false;
}
-static bool predsHasDivergentTerminator(MachineBasicBlock *MBB,
- const TargetRegisterInfo *TRI) {
- return searchPredecessors(MBB, nullptr, [TRI](MachineBasicBlock *MBB) {
- return hasTerminatorThatModifiesExec(*MBB, *TRI); });
-}
-
// Checks if there is potential path From instruction To instruction.
// If CutOff is specified and it sits in between of that path we ignore
// a higher portion of the path and report it is not reachable.
@@ -621,63 +569,73 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
break;
}
case AMDGPU::PHI: {
- unsigned Reg = MI.getOperand(0).getReg();
- if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
- break;
-
- // We don't need to fix the PHI if the common dominator of the
- // two incoming blocks terminates with a uniform branch.
- bool HasVGPROperand = phiHasVGPROperands(MI, MRI, TRI, TII);
- if (MI.getNumExplicitOperands() == 5 && !HasVGPROperand) {
- MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB();
- MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB();
-
- if (!predsHasDivergentTerminator(MBB0, TRI) &&
- !predsHasDivergentTerminator(MBB1, TRI)) {
- LLVM_DEBUG(dbgs()
- << "Not fixing PHI for uniform branch: " << MI << '\n');
+ unsigned hasVGPRUses = 0;
+ SetVector<const MachineInstr *> worklist;
+ worklist.insert(&MI);
+ while (!worklist.empty()) {
+ const MachineInstr *Instr = worklist.pop_back_val();
+ unsigned Reg = Instr->getOperand(0).getReg();
+ for (const auto &Use : MRI.use_operands(Reg)) {
+ const MachineInstr *UseMI = Use.getParent();
+ if (UseMI->isCopy() || UseMI->isRegSequence()) {
+ if (UseMI->isCopy() &&
+ TRI->isPhysicalRegister(UseMI->getOperand(0).getReg()) &&
+ !TRI->isSGPRReg(MRI, UseMI->getOperand(0).getReg())) {
+ hasVGPRUses++;
+ }
+ worklist.insert(UseMI);
+ continue;
+ }
+
+ if (UseMI->isPHI()) {
+ if (!TRI->isSGPRReg(MRI, Use.getReg()))
+ hasVGPRUses++;
+ continue;
+ }
+
+ unsigned OpNo = UseMI->getOperandNo(&Use);
+ const MCInstrDesc &Desc = TII->get(UseMI->getOpcode());
+ if (Desc.OpInfo && Desc.OpInfo[OpNo].RegClass != -1) {
+ const TargetRegisterClass *OpRC =
+ TRI->getRegClass(Desc.OpInfo[OpNo].RegClass);
+ if (!TRI->isSGPRClass(OpRC) && OpRC != &AMDGPU::VS_32RegClass &&
+ OpRC != &AMDGPU::VS_64RegClass) {
+ hasVGPRUses++;
+ }
+ }
+ }
+ }
+ bool hasVGPRInput = false;
+ for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
+ unsigned InputReg = MI.getOperand(i).getReg();
+ MachineInstr *Def = MRI.getVRegDef(InputReg);
+ if (TRI->isVGPR(MRI, InputReg)) {
+ if (Def->isCopy()) {
+ unsigned SrcReg = Def->getOperand(1).getReg();
+ const TargetRegisterClass *RC =
+ TRI->isVirtualRegister(SrcReg) ? MRI.getRegClass(SrcReg)
+ : TRI->getPhysRegClass(SrcReg);
+ if (TRI->isSGPRClass(RC))
+ continue;
+ }
+ hasVGPRInput = true;
+ break;
+ } else if (Def->isCopy() &&
+ TRI->isVGPR(MRI, Def->getOperand(1).getReg())) {
+ hasVGPRInput = true;
break;
}
}
+ unsigned PHIRes = MI.getOperand(0).getReg();
+ const TargetRegisterClass *RC0 = MRI.getRegClass(PHIRes);
- // If a PHI node defines an SGPR and any of its operands are VGPRs,
- // then we need to move it to the VALU.
- //
- // Also, if a PHI node defines an SGPR and has all SGPR operands
- // we must move it to the VALU, because the SGPR operands will
- // all end up being assigned the same register, which means
- // there is a potential for a conflict if different threads take
- // different control flow paths.
- //
- // For Example:
- //
- // sgpr0 = def;
- // ...
- // sgpr1 = def;
- // ...
- // sgpr2 = PHI sgpr0, sgpr1
- // use sgpr2;
- //
- // Will Become:
- //
- // sgpr2 = def;
- // ...
- // sgpr2 = def;
- // ...
- // use sgpr2
- //
- // The one exception to this rule is when one of the operands
- // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
- // instruction. In this case, there we know the program will
- // never enter the second block (the loop) without entering
- // the first block (where the condition is computed), so there
- // is no chance for values to be over-written.
-
- SmallSet<unsigned, 8> Visited;
- if (HasVGPROperand || !phiHasBreakDef(MI, MRI, Visited)) {
- LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI);
- TII->moveToVALU(MI, MDT);
+ if ((!TRI->isVGPR(MRI, PHIRes) && RC0 != &AMDGPU::VReg_1RegClass) &&
+ (hasVGPRInput || hasVGPRUses > 1)) {
+ TII->moveToVALU(MI);
+ } else {
+ TII->legalizeOperands(MI, MDT);
}
+
break;
}
case AMDGPU::REG_SEQUENCE:
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index c2cda5ef4d7..8f93c63046c 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -9637,7 +9637,8 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
break;
MVT VT = Src0.getValueType().getSimpleVT();
- const TargetRegisterClass *RC = getRegClassFor(VT);
+ const TargetRegisterClass *RC =
+ getRegClassFor(VT, Src0.getNode()->isDivergent());
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
SDValue UndefReg = DAG.getRegister(MRI.createVirtualRegister(RC), VT);
@@ -10171,3 +10172,91 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
return AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(RMW);
}
+
+const TargetRegisterClass *
+SITargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
+ const TargetRegisterClass *RC = TargetLoweringBase::getRegClassFor(VT, false);
+ const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
+ if (RC == &AMDGPU::VReg_1RegClass && !isDivergent)
+ return &AMDGPU::SReg_64RegClass;
+ if (!TRI->isSGPRClass(RC) && !isDivergent)
+ return TRI->getEquivalentSGPRClass(RC);
+ else if (TRI->isSGPRClass(RC) && isDivergent)
+ return TRI->getEquivalentVGPRClass(RC);
+
+ return RC;
+}
+
+static bool hasIfBreakUser(const Value *V, SetVector<const Value *> &Visited) {
+ if (Visited.count(V))
+ return false;
+ Visited.insert(V);
+ bool Result = false;
+ for (auto U : V->users()) {
+ if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(U)) {
+ if ((Intrinsic->getIntrinsicID() == Intrinsic::amdgcn_if_break) &&
+ (V == U->getOperand(1)))
+ Result = true;
+ } else {
+ Result = hasIfBreakUser(U, Visited);
+ }
+ if (Result)
+ break;
+ }
+ return Result;
+}
+
+bool SITargetLowering::requiresUniformRegister(MachineFunction &MF,
+ const Value *V) const {
+ if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) {
+ switch (Intrinsic->getIntrinsicID()) {
+ default:
+ return false;
+ case Intrinsic::amdgcn_if_break:
+ return true;
+ }
+ }
+ if (const ExtractValueInst *ExtValue = dyn_cast<ExtractValueInst>(V)) {
+ if (const IntrinsicInst *Intrinsic =
+ dyn_cast<IntrinsicInst>(ExtValue->getOperand(0))) {
+ switch (Intrinsic->getIntrinsicID()) {
+ default:
+ return false;
+ case Intrinsic::amdgcn_if:
+ case Intrinsic::amdgcn_else: {
+ ArrayRef<unsigned> Indices = ExtValue->getIndices();
+ if (Indices.size() == 1 && Indices[0] == 1) {
+ return true;
+ }
+ }
+ }
+ }
+ }
+ if (const CallInst *CI = dyn_cast<CallInst>(V)) {
+ if (isa<InlineAsm>(CI->getCalledValue())) {
+ const SIRegisterInfo *SIRI = Subtarget->getRegisterInfo();
+ ImmutableCallSite CS(CI);
+ TargetLowering::AsmOperandInfoVector TargetConstraints = ParseConstraints(
+ MF.getDataLayout(), Subtarget->getRegisterInfo(), CS);
+ for (auto &TC : TargetConstraints) {
+ if (TC.Type == InlineAsm::isOutput) {
+ ComputeConstraintToUse(TC, SDValue());
+ unsigned AssignedReg;
+ const TargetRegisterClass *RC;
+ std::tie(AssignedReg, RC) = getRegForInlineAsmConstraint(
+ SIRI, TC.ConstraintCode,
+ getSimpleValueType(MF.getDataLayout(), CS.getType()));
+ if (RC) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ if (AssignedReg != 0 && SIRI->isSGPRReg(MRI, AssignedReg))
+ return true;
+ else if (SIRI->isSGPRClass(RC))
+ return true;
+ }
+ }
+ }
+ }
+ }
+ SetVector<const Value *> Visited;
+ return hasIfBreakUser(V, Visited);
+}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 60a474f51e5..094a0b054e2 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -367,7 +367,10 @@ public:
bool SNaN = false,
unsigned Depth = 0) const override;
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
-
+ virtual const TargetRegisterClass *
+ getRegClassFor(MVT VT, bool isDivergent) const override;
+ virtual bool requiresUniformRegister(MachineFunction &MF,
+ const Value *V) const override;
unsigned getPrefLoopAlignment(MachineLoop *ML) const override;
};
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index e42ed3505cf..14f5dbe6ad4 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2219,6 +2219,10 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
// These come before src2.
removeModOperands(UseMI);
UseMI.setDesc(get(NewOpc));
+ // It might happen that UseMI was commuted
+ // and we now have SGPR as SRC1. If so 2 inlined
+ // constant and SGPR are illegal.
+ legalizeOperands(UseMI);
bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
if (DeleteDef)
@@ -3913,7 +3917,7 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
return;
// Try to eliminate the copy if it is copying an immediate value.
- if (Def->isMoveImmediate())
+ if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
FoldImmediate(*Copy, *Def, OpReg, &MRI);
}
@@ -4147,7 +4151,10 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
if (VRC || !RI.isSGPRClass(getOpRegClass(MI, 0))) {
if (!VRC) {
assert(SRC);
- VRC = RI.getEquivalentVGPRClass(SRC);
+ if (getOpRegClass(MI, 0) == &AMDGPU::VReg_1RegClass) {
+ VRC = &AMDGPU::VReg_1RegClass;
+ } else
+ VRC = RI.getEquivalentVGPRClass(SRC);
}
RC = VRC;
} else {
@@ -5309,7 +5316,7 @@ const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
case AMDGPU::INSERT_SUBREG:
case AMDGPU::WQM:
case AMDGPU::WWM:
- if (RI.hasVGPRs(NewDstRC))
+ if (RI.hasVGPRs(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
return nullptr;
NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index bfdc1ef9645..e2df3ae5ea7 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -195,6 +195,11 @@ public:
unsigned Reg) const;
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const;
+ virtual bool
+ isDivergentRegClass(const TargetRegisterClass *RC) const override {
+ return !isSGPRClass(RC);
+ }
+
bool isSGPRPressureSet(unsigned SetID) const {
return SGPRPressureSets.test(SetID) && !VGPRPressureSets.test(SetID);
}
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 677e4d5b2e8..88d318e7bb3 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1447,7 +1447,9 @@ EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
/// getRegClassFor - Return the register class that should be used for the
/// specified value type.
-const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
+const TargetRegisterClass *
+ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
+ (void)isDivergent;
// Map v4i64 to QQ registers but do not make the type legal. Similarly map
// v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
// load / store 4 to 8 consecutive D registers.
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 3b94cb0dcb0..8e254d75b1c 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -456,7 +456,8 @@ class VectorType;
/// getRegClassFor - Return the register class that should be used for the
/// specified value type.
- const TargetRegisterClass *getRegClassFor(MVT VT) const override;
+ const TargetRegisterClass *
+ getRegClassFor(MVT VT, bool isDivergent = false) const override;
/// Returns true if a cast between SrcAS and DestAS is a noop.
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
OpenPOWER on IntegriCloud