diff options
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp | 169 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 87 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.h | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 9 |
4 files changed, 107 insertions, 163 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index fb151b4ffdc..cc16d931dcf 100644 --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -302,6 +302,52 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, return true; } +static bool phiHasVGPROperands(const MachineInstr &PHI, + const MachineRegisterInfo &MRI, + const SIRegisterInfo *TRI, + const SIInstrInfo *TII) { + for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) { + unsigned Reg = PHI.getOperand(i).getReg(); + if (TRI->hasVGPRs(MRI.getRegClass(Reg))) + return true; + } + return false; +} + +static bool phiHasBreakDef(const MachineInstr &PHI, + const MachineRegisterInfo &MRI, + SmallSet<unsigned, 8> &Visited) { + for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) { + unsigned Reg = PHI.getOperand(i).getReg(); + if (Visited.count(Reg)) + continue; + + Visited.insert(Reg); + + MachineInstr *DefInstr = MRI.getVRegDef(Reg); + switch (DefInstr->getOpcode()) { + default: + break; + case AMDGPU::SI_IF_BREAK: + return true; + case AMDGPU::PHI: + if (phiHasBreakDef(*DefInstr, MRI, Visited)) + return true; + } + } + return false; +} + +static bool hasTerminatorThatModifiesExec(const MachineBasicBlock &MBB, + const TargetRegisterInfo &TRI) { + for (MachineBasicBlock::const_iterator I = MBB.getFirstTerminator(), + E = MBB.end(); I != E; ++I) { + if (I->modifiesRegister(AMDGPU::EXEC, &TRI)) + return true; + } + return false; +} + static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy, const MachineInstr *MoveImm, const SIInstrInfo *TII, @@ -363,6 +409,12 @@ bool searchPredecessors(const MachineBasicBlock *MBB, return false; } +static bool predsHasDivergentTerminator(MachineBasicBlock *MBB, + const TargetRegisterInfo *TRI) { + return searchPredecessors(MBB, nullptr, [TRI](MachineBasicBlock *MBB) { + return hasTerminatorThatModifiesExec(*MBB, *TRI); }); +} + // Checks if there is potential path From instruction To instruction. // If CutOff is specified and it sits in between of that path we ignore // a higher portion of the path and report it is not reachable. @@ -569,77 +621,62 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { break; } case AMDGPU::PHI: { - unsigned hasVGPRUses = 0; - SetVector<const MachineInstr *> worklist; - worklist.insert(&MI); - while (!worklist.empty()) { - const MachineInstr *Instr = worklist.pop_back_val(); - unsigned Reg = Instr->getOperand(0).getReg(); - for (const auto &Use : MRI.use_operands(Reg)) { - const MachineInstr *UseMI = Use.getParent(); - if (UseMI->isCopy() || UseMI->isRegSequence()) { - if (UseMI->isCopy() && - TRI->isPhysicalRegister(UseMI->getOperand(0).getReg()) && - !TRI->isSGPRReg(MRI, UseMI->getOperand(0).getReg())) { - hasVGPRUses++; - } - worklist.insert(UseMI); - continue; - } - - if (UseMI->isPHI()) { - const TargetRegisterClass *UseRC = MRI.getRegClass(Use.getReg()); - if (!TRI->isSGPRReg(MRI, Use.getReg()) && - UseRC != &AMDGPU::VReg_1RegClass) - hasVGPRUses++; - continue; - } - - unsigned OpNo = UseMI->getOperandNo(&Use); - const MCInstrDesc &Desc = TII->get(UseMI->getOpcode()); - if (!Desc.isPseudo() && Desc.OpInfo && - OpNo < Desc.getNumOperands() && - Desc.OpInfo[OpNo].RegClass != -1) { - const TargetRegisterClass *OpRC = - TRI->getRegClass(Desc.OpInfo[OpNo].RegClass); - if (!TRI->isSGPRClass(OpRC) && OpRC != &AMDGPU::VS_32RegClass && - OpRC != &AMDGPU::VS_64RegClass) { - hasVGPRUses++; - } - } - } - } - bool hasVGPRInput = false; - for (unsigned i = 1; i < MI.getNumOperands(); i += 2) { - unsigned InputReg = MI.getOperand(i).getReg(); - MachineInstr *Def = MRI.getVRegDef(InputReg); - if (TRI->isVGPR(MRI, InputReg)) { - if (Def->isCopy()) { - unsigned SrcReg = Def->getOperand(1).getReg(); - const TargetRegisterClass *RC = - TRI->isVirtualRegister(SrcReg) ? MRI.getRegClass(SrcReg) - : TRI->getPhysRegClass(SrcReg); - if (TRI->isSGPRClass(RC)) - continue; - } - hasVGPRInput = true; - break; - } else if (Def->isCopy() && - TRI->isVGPR(MRI, Def->getOperand(1).getReg())) { - hasVGPRInput = true; + unsigned Reg = MI.getOperand(0).getReg(); + if (!TRI->isSGPRClass(MRI.getRegClass(Reg))) + break; + + // We don't need to fix the PHI if the common dominator of the + // two incoming blocks terminates with a uniform branch. + bool HasVGPROperand = phiHasVGPROperands(MI, MRI, TRI, TII); + if (MI.getNumExplicitOperands() == 5 && !HasVGPROperand) { + MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB(); + MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB(); + + if (!predsHasDivergentTerminator(MBB0, TRI) && + !predsHasDivergentTerminator(MBB1, TRI)) { + LLVM_DEBUG(dbgs() + << "Not fixing PHI for uniform branch: " << MI << '\n'); break; } } - unsigned PHIRes = MI.getOperand(0).getReg(); - const TargetRegisterClass *RC0 = MRI.getRegClass(PHIRes); - if ((!TRI->isVGPR(MRI, PHIRes) && RC0 != &AMDGPU::VReg_1RegClass) && - (hasVGPRInput || hasVGPRUses > 1)) { + // If a PHI node defines an SGPR and any of its operands are VGPRs, + // then we need to move it to the VALU. + // + // Also, if a PHI node defines an SGPR and has all SGPR operands + // we must move it to the VALU, because the SGPR operands will + // all end up being assigned the same register, which means + // there is a potential for a conflict if different threads take + // different control flow paths. + // + // For Example: + // + // sgpr0 = def; + // ... + // sgpr1 = def; + // ... + // sgpr2 = PHI sgpr0, sgpr1 + // use sgpr2; + // + // Will Become: + // + // sgpr2 = def; + // ... + // sgpr2 = def; + // ... + // use sgpr2 + // + // The one exception to this rule is when one of the operands + // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK + // instruction. In this case, there we know the program will + // never enter the second block (the loop) without entering + // the first block (where the condition is computed), so there + // is no chance for values to be over-written. + + SmallSet<unsigned, 8> Visited; + if (HasVGPROperand || !phiHasBreakDef(MI, MRI, Visited)) { LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI); - TII->moveToVALU(MI); - } else { - LLVM_DEBUG(dbgs() << "Legalizing PHI: " << MI); - TII->legalizeOperands(MI, MDT); + TII->moveToVALU(MI, MDT); } break; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index f5fcb7cdfe0..5a8e8b14be3 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -10194,90 +10194,3 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { return AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(RMW); } - -const TargetRegisterClass * -SITargetLowering::getRegClassFor(MVT VT, bool isDivergent) const { - const TargetRegisterClass *RC = TargetLoweringBase::getRegClassFor(VT, false); - const SIRegisterInfo *TRI = Subtarget->getRegisterInfo(); - if (RC == &AMDGPU::VReg_1RegClass && !isDivergent) - return &AMDGPU::SReg_64RegClass; - if (!TRI->isSGPRClass(RC) && !isDivergent) - return TRI->getEquivalentSGPRClass(RC); - else if (TRI->isSGPRClass(RC) && isDivergent) - return TRI->getEquivalentVGPRClass(RC); - - return RC; -} - -static bool hasIfBreakUser(const Value *V, SetVector<const Value *> &Visited) { - if (Visited.count(V)) - return false; - Visited.insert(V); - bool Result = false; - for (auto U : V->users()) { - if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(U)) { - if ((Intrinsic->getIntrinsicID() == Intrinsic::amdgcn_if_break) && - (V == U->getOperand(1))) - Result = true; - } else { - Result = hasIfBreakUser(U, Visited); - } - if (Result) - break; - } - return Result; -} - -bool SITargetLowering::requiresUniformRegister(MachineFunction &MF, - const Value *V) const { - if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) { - switch (Intrinsic->getIntrinsicID()) { - default: - return false; - case Intrinsic::amdgcn_if_break: - return true; - } - } - if (const ExtractValueInst *ExtValue = dyn_cast<ExtractValueInst>(V)) { - if (const IntrinsicInst *Intrinsic = - dyn_cast<IntrinsicInst>(ExtValue->getOperand(0))) { - switch (Intrinsic->getIntrinsicID()) { - default: - return false; - case Intrinsic::amdgcn_if: - case Intrinsic::amdgcn_else: { - ArrayRef<unsigned> Indices = ExtValue->getIndices(); - if (Indices.size() == 1 && Indices[0] == 1) { - return true; - } - } - } - } - } - if (const CallInst *CI = dyn_cast<CallInst>(V)) { - if (isa<InlineAsm>(CI->getCalledValue())) { - const SIRegisterInfo *SIRI = Subtarget->getRegisterInfo(); - ImmutableCallSite CS(CI); - TargetLowering::AsmOperandInfoVector TargetConstraints = ParseConstraints( - MF.getDataLayout(), Subtarget->getRegisterInfo(), CS); - for (auto &TC : TargetConstraints) { - if (TC.Type == InlineAsm::isOutput) { - ComputeConstraintToUse(TC, SDValue()); - unsigned AssignedReg; - const TargetRegisterClass *RC; - std::tie(AssignedReg, RC) = getRegForInlineAsmConstraint( - SIRI, TC.ConstraintCode, TC.ConstraintVT); - if (RC) { - MachineRegisterInfo &MRI = MF.getRegInfo(); - if (AssignedReg != 0 && SIRI->isSGPRReg(MRI, AssignedReg)) - return true; - else if (SIRI->isSGPRClass(RC)) - return true; - } - } - } - } - } - SetVector<const Value *> Visited; - return hasIfBreakUser(V, Visited); -} diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 4d7dac91cd1..fde722df72e 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -367,10 +367,7 @@ public: bool SNaN = false, unsigned Depth = 0) const override; AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override; - virtual const TargetRegisterClass * - getRegClassFor(MVT VT, bool isDivergent) const override; - virtual bool requiresUniformRegister(MachineFunction &MF, - const Value *V) const override; + unsigned getPrefLoopAlignment(MachineLoop *ML) const override; }; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 48257b01b86..0b489b090cc 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3917,7 +3917,7 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB, return; // Try to eliminate the copy if it is copying an immediate value. - if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass) + if (Def->isMoveImmediate()) FoldImmediate(*Copy, *Def, OpReg, &MRI); } @@ -4151,10 +4151,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI, if (VRC || !RI.isSGPRClass(getOpRegClass(MI, 0))) { if (!VRC) { assert(SRC); - if (getOpRegClass(MI, 0) == &AMDGPU::VReg_1RegClass) { - VRC = &AMDGPU::VReg_1RegClass; - } else - VRC = RI.getEquivalentVGPRClass(SRC); + VRC = RI.getEquivalentVGPRClass(SRC); } RC = VRC; } else { @@ -5316,7 +5313,7 @@ const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass( case AMDGPU::INSERT_SUBREG: case AMDGPU::WQM: case AMDGPU::WWM: - if (RI.hasVGPRs(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass) + if (RI.hasVGPRs(NewDstRC)) return nullptr; NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); |

