diff options
author | Alexander Timofeev <Alexander.Timofeev@amd.com> | 2019-05-26 20:33:26 +0000 |
---|---|---|
committer | Alexander Timofeev <Alexander.Timofeev@amd.com> | 2019-05-26 20:33:26 +0000 |
commit | ba447bae7448435c9986eece0811da1423972fdd (patch) | |
tree | 688b9be43648fbc0f5c07b422b417ef8b69353b3 /llvm/lib | |
parent | c2493ce4a40be025054087fde59dd0f339baf6c0 (diff) | |
download | bcm5719-llvm-ba447bae7448435c9986eece0811da1423972fdd.tar.gz bcm5719-llvm-ba447bae7448435c9986eece0811da1423972fdd.zip |
[AMDGPU] Divergence driven ISel. Assign register class for cross block values according to the divergence.
Details: To make instruction selection really divergence driven it is necessary to assign
the correct register classes to the cross block values beforehand. For the divergent targets
same value type requires different register classes dependent on the value divergence.
Reviewers: rampitec, nhaehnle
Differential Revision: https://reviews.llvm.org/D59990
This commit was reverted because of the build failure.
The reason was mlformed patch.
Build failure fixed.
llvm-svn: 361741
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp | 14 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 33 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h | 2 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp | 166 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 91 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.h | 5 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 13 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.h | 5 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.h | 3 |
13 files changed, 213 insertions, 135 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 117654bc7a3..d53ee3134d5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -13919,9 +13919,11 @@ struct LoadedSlice { assert(DAG && "Missing context"); const TargetLowering &TLI = DAG->getTargetLoweringInfo(); EVT ResVT = Use->getValueType(0); - const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT()); + const TargetRegisterClass *ResRC = + TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent()); const TargetRegisterClass *ArgRC = - TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT()); + TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(), + Use->getOperand(0)->isDivergent()); if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT)) return false; diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index d8ef10f58aa..8b405562904 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -85,6 +85,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, RegInfo = &MF->getRegInfo(); const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); unsigned StackAlign = TFI->getStackAlignment(); + DA = DAG->getDivergenceAnalysis(); // Check whether the function can return without sret-demotion. SmallVector<ISD::OutputArg, 4> Outs; @@ -345,9 +346,9 @@ void FunctionLoweringInfo::clear() { } /// CreateReg - Allocate a single virtual register for the given type. -unsigned FunctionLoweringInfo::CreateReg(MVT VT) { +unsigned FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) { return RegInfo->createVirtualRegister( - MF->getSubtarget().getTargetLowering()->getRegClassFor(VT)); + MF->getSubtarget().getTargetLowering()->getRegClassFor(VT, isDivergent)); } /// CreateRegs - Allocate the appropriate number of virtual registers of @@ -357,7 +358,7 @@ unsigned FunctionLoweringInfo::CreateReg(MVT VT) { /// In the case that the given value has struct or array type, this function /// will assign registers for each member or element. /// -unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) { +unsigned FunctionLoweringInfo::CreateRegs(Type *Ty, bool isDivergent) { const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); SmallVector<EVT, 4> ValueVTs; @@ -370,13 +371,18 @@ unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) { unsigned NumRegs = TLI->getNumRegisters(Ty->getContext(), ValueVT); for (unsigned i = 0; i != NumRegs; ++i) { - unsigned R = CreateReg(RegisterVT); + unsigned R = CreateReg(RegisterVT, isDivergent); if (!FirstReg) FirstReg = R; } } return FirstReg; } +unsigned FunctionLoweringInfo::CreateRegs(const Value *V) { + return CreateRegs(V->getType(), DA && !TLI->requiresUniformRegister(*MF, V) && + DA->isDivergent(V)); +} + /// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the /// register is a PHI destination and the PHI's LiveOutInfo is not valid. If /// the register's LiveOutInfo is for a smaller bit width, it is extended to diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 059e5f7c8dd..4b78d1bb6b1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -105,7 +105,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, // Stick to the preferred register classes for legal types. if (TLI->isTypeLegal(VT)) - UseRC = TLI->getRegClassFor(VT); + UseRC = TLI->getRegClassFor(VT, Node->isDivergent()); if (!IsClone && !IsCloned) for (SDNode *User : Node->uses()) { @@ -164,7 +164,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, "Incompatible phys register def and uses!"); DstRC = UseRC; } else { - DstRC = TLI->getRegClassFor(VT); + DstRC = TLI->getRegClassFor(VT, Node->isDivergent()); } // If all uses are reading from the src physical register and copying the @@ -225,8 +225,9 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, // type correctly. For example, a 64-bit float (X86::FR64) can't live in // the 32-bit float super-class (X86::FR32). if (i < NumResults && TLI->isTypeLegal(Node->getSimpleValueType(i))) { - const TargetRegisterClass *VTRC = - TLI->getRegClassFor(Node->getSimpleValueType(i)); + const TargetRegisterClass *VTRC = TLI->getRegClassFor( + Node->getSimpleValueType(i), + (Node->isDivergent() || (RC && TRI->isDivergentRegClass(RC)))); if (RC) VTRC = TRI->getCommonSubClass(RC, VTRC); if (VTRC) @@ -289,8 +290,8 @@ unsigned InstrEmitter::getVR(SDValue Op, // IMPLICIT_DEF can produce any type of result so its MCInstrDesc // does not include operand register class info. if (!VReg) { - const TargetRegisterClass *RC = - TLI->getRegClassFor(Op.getSimpleValueType()); + const TargetRegisterClass *RC = TLI->getRegClassFor( + Op.getSimpleValueType(), Op.getNode()->isDivergent()); VReg = MRI->createVirtualRegister(RC); } BuildMI(*MBB, InsertPos, Op.getDebugLoc(), @@ -395,11 +396,15 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) { unsigned VReg = R->getReg(); MVT OpVT = Op.getSimpleValueType(); - const TargetRegisterClass *OpRC = - TLI->isTypeLegal(OpVT) ? TLI->getRegClassFor(OpVT) : nullptr; const TargetRegisterClass *IIRC = II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum, TRI, *MF)) : nullptr; + const TargetRegisterClass *OpRC = + TLI->isTypeLegal(OpVT) + ? TLI->getRegClassFor(OpVT, + Op.getNode()->isDivergent() || + (IIRC && TRI->isDivergentRegClass(IIRC))) + : nullptr; if (OpRC && IIRC && OpRC != IIRC && TargetRegisterInfo::isVirtualRegister(VReg)) { @@ -464,7 +469,7 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, } unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx, - MVT VT, const DebugLoc &DL) { + MVT VT, bool isDivergent, const DebugLoc &DL) { const TargetRegisterClass *VRC = MRI->getRegClass(VReg); const TargetRegisterClass *RC = TRI->getSubClassWithSubReg(VRC, SubIdx); @@ -479,7 +484,7 @@ unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx, // VReg couldn't be reasonably constrained. Emit a COPY to a new virtual // register instead. - RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT), SubIdx); + RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT, isDivergent), SubIdx); assert(RC && "No legal register class for VT supports that SubIdx"); unsigned NewReg = MRI->createVirtualRegister(RC); BuildMI(*MBB, InsertPos, DL, TII->get(TargetOpcode::COPY), NewReg) @@ -514,7 +519,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, // classes. unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); const TargetRegisterClass *TRC = - TLI->getRegClassFor(Node->getSimpleValueType(0)); + TLI->getRegClassFor(Node->getSimpleValueType(0), Node->isDivergent()); unsigned Reg; MachineInstr *DefMI; @@ -548,8 +553,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, if (TargetRegisterInfo::isVirtualRegister(Reg)) Reg = ConstrainForSubReg(Reg, SubIdx, Node->getOperand(0).getSimpleValueType(), - Node->getDebugLoc()); - + Node->isDivergent(), Node->getDebugLoc()); // Create the destreg if it is missing. if (VRBase == 0) VRBase = MRI->createVirtualRegister(TRC); @@ -584,7 +588,8 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, // // There is no constraint on the %src register class. // - const TargetRegisterClass *SRC = TLI->getRegClassFor(Node->getSimpleValueType(0)); + const TargetRegisterClass *SRC = + TLI->getRegClassFor(Node->getSimpleValueType(0), Node->isDivergent()); SRC = TRI->getSubClassWithSubReg(SRC, SubIdx); assert(SRC && "No register class supports VT and SubIdx for INSERT_SUBREG"); diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h index 3188c2678f1..42f7846fe7c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -83,7 +83,7 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { /// supports SubIdx sub-registers. Emit a copy if that isn't possible. /// Return the virtual register to use. unsigned ConstrainForSubReg(unsigned VReg, unsigned SubIdx, MVT VT, - const DebugLoc &DL); + bool isDivergent, const DebugLoc &DL); /// EmitSubregNode - Generate machine code for subreg nodes. /// diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 76e5847ba11..a5274877ece 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -9844,7 +9844,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { if (const Constant *C = dyn_cast<Constant>(PHIOp)) { unsigned &RegOut = ConstantsOut[C]; if (RegOut == 0) { - RegOut = FuncInfo.CreateRegs(C->getType()); + RegOut = FuncInfo.CreateRegs(C); CopyValueToVirtualRegister(C, RegOut); } Reg = RegOut; @@ -9857,7 +9857,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { assert(isa<AllocaInst>(PHIOp) && FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) && "Didn't codegen value into a register!??"); - Reg = FuncInfo.CreateRegs(PHIOp->getType()); + Reg = FuncInfo.CreateRegs(PHIOp); CopyValueToVirtualRegister(PHIOp, Reg); } } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 6c9a1cd646e..6f55f98c51f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1485,7 +1485,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { !Inst->use_empty()) { unsigned &R = FuncInfo->ValueMap[Inst]; if (!R) - R = FuncInfo->CreateRegs(Inst->getType()); + R = FuncInfo->CreateRegs(Inst); } bool HadTailCall = false; diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 94b1e636c7b..8ad7a52c92b 100644 --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -302,52 +302,6 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, return true; } -static bool phiHasVGPROperands(const MachineInstr &PHI, - const MachineRegisterInfo &MRI, - const SIRegisterInfo *TRI, - const SIInstrInfo *TII) { - for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) { - unsigned Reg = PHI.getOperand(i).getReg(); - if (TRI->hasVGPRs(MRI.getRegClass(Reg))) - return true; - } - return false; -} - -static bool phiHasBreakDef(const MachineInstr &PHI, - const MachineRegisterInfo &MRI, - SmallSet<unsigned, 8> &Visited) { - for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) { - unsigned Reg = PHI.getOperand(i).getReg(); - if (Visited.count(Reg)) - continue; - - Visited.insert(Reg); - - MachineInstr *DefInstr = MRI.getVRegDef(Reg); - switch (DefInstr->getOpcode()) { - default: - break; - case AMDGPU::SI_IF_BREAK: - return true; - case AMDGPU::PHI: - if (phiHasBreakDef(*DefInstr, MRI, Visited)) - return true; - } - } - return false; -} - -static bool hasTerminatorThatModifiesExec(const MachineBasicBlock &MBB, - const TargetRegisterInfo &TRI) { - for (MachineBasicBlock::const_iterator I = MBB.getFirstTerminator(), - E = MBB.end(); I != E; ++I) { - if (I->modifiesRegister(AMDGPU::EXEC, &TRI)) - return true; - } - return false; -} - static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy, const MachineInstr *MoveImm, const SIInstrInfo *TII, @@ -409,12 +363,6 @@ bool searchPredecessors(const MachineBasicBlock *MBB, return false; } -static bool predsHasDivergentTerminator(MachineBasicBlock *MBB, - const TargetRegisterInfo *TRI) { - return searchPredecessors(MBB, nullptr, [TRI](MachineBasicBlock *MBB) { - return hasTerminatorThatModifiesExec(*MBB, *TRI); }); -} - // Checks if there is potential path From instruction To instruction. // If CutOff is specified and it sits in between of that path we ignore // a higher portion of the path and report it is not reachable. @@ -621,63 +569,73 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { break; } case AMDGPU::PHI: { - unsigned Reg = MI.getOperand(0).getReg(); - if (!TRI->isSGPRClass(MRI.getRegClass(Reg))) - break; - - // We don't need to fix the PHI if the common dominator of the - // two incoming blocks terminates with a uniform branch. - bool HasVGPROperand = phiHasVGPROperands(MI, MRI, TRI, TII); - if (MI.getNumExplicitOperands() == 5 && !HasVGPROperand) { - MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB(); - MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB(); - - if (!predsHasDivergentTerminator(MBB0, TRI) && - !predsHasDivergentTerminator(MBB1, TRI)) { - LLVM_DEBUG(dbgs() - << "Not fixing PHI for uniform branch: " << MI << '\n'); + unsigned hasVGPRUses = 0; + SetVector<const MachineInstr *> worklist; + worklist.insert(&MI); + while (!worklist.empty()) { + const MachineInstr *Instr = worklist.pop_back_val(); + unsigned Reg = Instr->getOperand(0).getReg(); + for (const auto &Use : MRI.use_operands(Reg)) { + const MachineInstr *UseMI = Use.getParent(); + if (UseMI->isCopy() || UseMI->isRegSequence()) { + if (UseMI->isCopy() && + TRI->isPhysicalRegister(UseMI->getOperand(0).getReg()) && + !TRI->isSGPRReg(MRI, UseMI->getOperand(0).getReg())) { + hasVGPRUses++; + } + worklist.insert(UseMI); + continue; + } + + if (UseMI->isPHI()) { + if (!TRI->isSGPRReg(MRI, Use.getReg())) + hasVGPRUses++; + continue; + } + + unsigned OpNo = UseMI->getOperandNo(&Use); + const MCInstrDesc &Desc = TII->get(UseMI->getOpcode()); + if (Desc.OpInfo && Desc.OpInfo[OpNo].RegClass != -1) { + const TargetRegisterClass *OpRC = + TRI->getRegClass(Desc.OpInfo[OpNo].RegClass); + if (!TRI->isSGPRClass(OpRC) && OpRC != &AMDGPU::VS_32RegClass && + OpRC != &AMDGPU::VS_64RegClass) { + hasVGPRUses++; + } + } + } + } + bool hasVGPRInput = false; + for (unsigned i = 1; i < MI.getNumOperands(); i += 2) { + unsigned InputReg = MI.getOperand(i).getReg(); + MachineInstr *Def = MRI.getVRegDef(InputReg); + if (TRI->isVGPR(MRI, InputReg)) { + if (Def->isCopy()) { + unsigned SrcReg = Def->getOperand(1).getReg(); + const TargetRegisterClass *RC = + TRI->isVirtualRegister(SrcReg) ? MRI.getRegClass(SrcReg) + : TRI->getPhysRegClass(SrcReg); + if (TRI->isSGPRClass(RC)) + continue; + } + hasVGPRInput = true; + break; + } else if (Def->isCopy() && + TRI->isVGPR(MRI, Def->getOperand(1).getReg())) { + hasVGPRInput = true; break; } } + unsigned PHIRes = MI.getOperand(0).getReg(); + const TargetRegisterClass *RC0 = MRI.getRegClass(PHIRes); - // If a PHI node defines an SGPR and any of its operands are VGPRs, - // then we need to move it to the VALU. - // - // Also, if a PHI node defines an SGPR and has all SGPR operands - // we must move it to the VALU, because the SGPR operands will - // all end up being assigned the same register, which means - // there is a potential for a conflict if different threads take - // different control flow paths. - // - // For Example: - // - // sgpr0 = def; - // ... - // sgpr1 = def; - // ... - // sgpr2 = PHI sgpr0, sgpr1 - // use sgpr2; - // - // Will Become: - // - // sgpr2 = def; - // ... - // sgpr2 = def; - // ... - // use sgpr2 - // - // The one exception to this rule is when one of the operands - // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK - // instruction. In this case, there we know the program will - // never enter the second block (the loop) without entering - // the first block (where the condition is computed), so there - // is no chance for values to be over-written. - - SmallSet<unsigned, 8> Visited; - if (HasVGPROperand || !phiHasBreakDef(MI, MRI, Visited)) { - LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI); - TII->moveToVALU(MI, MDT); + if ((!TRI->isVGPR(MRI, PHIRes) && RC0 != &AMDGPU::VReg_1RegClass) && + (hasVGPRInput || hasVGPRUses > 1)) { + TII->moveToVALU(MI); + } else { + TII->legalizeOperands(MI, MDT); } + break; } case AMDGPU::REG_SEQUENCE: diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index c2cda5ef4d7..8f93c63046c 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -9637,7 +9637,8 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, break; MVT VT = Src0.getValueType().getSimpleVT(); - const TargetRegisterClass *RC = getRegClassFor(VT); + const TargetRegisterClass *RC = + getRegClassFor(VT, Src0.getNode()->isDivergent()); MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); SDValue UndefReg = DAG.getRegister(MRI.createVirtualRegister(RC), VT); @@ -10171,3 +10172,91 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { return AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(RMW); } + +const TargetRegisterClass * +SITargetLowering::getRegClassFor(MVT VT, bool isDivergent) const { + const TargetRegisterClass *RC = TargetLoweringBase::getRegClassFor(VT, false); + const SIRegisterInfo *TRI = Subtarget->getRegisterInfo(); + if (RC == &AMDGPU::VReg_1RegClass && !isDivergent) + return &AMDGPU::SReg_64RegClass; + if (!TRI->isSGPRClass(RC) && !isDivergent) + return TRI->getEquivalentSGPRClass(RC); + else if (TRI->isSGPRClass(RC) && isDivergent) + return TRI->getEquivalentVGPRClass(RC); + + return RC; +} + +static bool hasIfBreakUser(const Value *V, SetVector<const Value *> &Visited) { + if (Visited.count(V)) + return false; + Visited.insert(V); + bool Result = false; + for (auto U : V->users()) { + if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(U)) { + if ((Intrinsic->getIntrinsicID() == Intrinsic::amdgcn_if_break) && + (V == U->getOperand(1))) + Result = true; + } else { + Result = hasIfBreakUser(U, Visited); + } + if (Result) + break; + } + return Result; +} + +bool SITargetLowering::requiresUniformRegister(MachineFunction &MF, + const Value *V) const { + if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) { + switch (Intrinsic->getIntrinsicID()) { + default: + return false; + case Intrinsic::amdgcn_if_break: + return true; + } + } + if (const ExtractValueInst *ExtValue = dyn_cast<ExtractValueInst>(V)) { + if (const IntrinsicInst *Intrinsic = + dyn_cast<IntrinsicInst>(ExtValue->getOperand(0))) { + switch (Intrinsic->getIntrinsicID()) { + default: + return false; + case Intrinsic::amdgcn_if: + case Intrinsic::amdgcn_else: { + ArrayRef<unsigned> Indices = ExtValue->getIndices(); + if (Indices.size() == 1 && Indices[0] == 1) { + return true; + } + } + } + } + } + if (const CallInst *CI = dyn_cast<CallInst>(V)) { + if (isa<InlineAsm>(CI->getCalledValue())) { + const SIRegisterInfo *SIRI = Subtarget->getRegisterInfo(); + ImmutableCallSite CS(CI); + TargetLowering::AsmOperandInfoVector TargetConstraints = ParseConstraints( + MF.getDataLayout(), Subtarget->getRegisterInfo(), CS); + for (auto &TC : TargetConstraints) { + if (TC.Type == InlineAsm::isOutput) { + ComputeConstraintToUse(TC, SDValue()); + unsigned AssignedReg; + const TargetRegisterClass *RC; + std::tie(AssignedReg, RC) = getRegForInlineAsmConstraint( + SIRI, TC.ConstraintCode, + getSimpleValueType(MF.getDataLayout(), CS.getType())); + if (RC) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + if (AssignedReg != 0 && SIRI->isSGPRReg(MRI, AssignedReg)) + return true; + else if (SIRI->isSGPRClass(RC)) + return true; + } + } + } + } + } + SetVector<const Value *> Visited; + return hasIfBreakUser(V, Visited); +} diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 60a474f51e5..094a0b054e2 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -367,7 +367,10 @@ public: bool SNaN = false, unsigned Depth = 0) const override; AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override; - + virtual const TargetRegisterClass * + getRegClassFor(MVT VT, bool isDivergent) const override; + virtual bool requiresUniformRegister(MachineFunction &MF, + const Value *V) const override; unsigned getPrefLoopAlignment(MachineLoop *ML) const override; }; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index e42ed3505cf..14f5dbe6ad4 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2219,6 +2219,10 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, // These come before src2. removeModOperands(UseMI); UseMI.setDesc(get(NewOpc)); + // It might happen that UseMI was commuted + // and we now have SGPR as SRC1. If so 2 inlined + // constant and SGPR are illegal. + legalizeOperands(UseMI); bool DeleteDef = MRI->hasOneNonDBGUse(Reg); if (DeleteDef) @@ -3913,7 +3917,7 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB, return; // Try to eliminate the copy if it is copying an immediate value. - if (Def->isMoveImmediate()) + if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass) FoldImmediate(*Copy, *Def, OpReg, &MRI); } @@ -4147,7 +4151,10 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI, if (VRC || !RI.isSGPRClass(getOpRegClass(MI, 0))) { if (!VRC) { assert(SRC); - VRC = RI.getEquivalentVGPRClass(SRC); + if (getOpRegClass(MI, 0) == &AMDGPU::VReg_1RegClass) { + VRC = &AMDGPU::VReg_1RegClass; + } else + VRC = RI.getEquivalentVGPRClass(SRC); } RC = VRC; } else { @@ -5309,7 +5316,7 @@ const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass( case AMDGPU::INSERT_SUBREG: case AMDGPU::WQM: case AMDGPU::WWM: - if (RI.hasVGPRs(NewDstRC)) + if (RI.hasVGPRs(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass) return nullptr; NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index bfdc1ef9645..e2df3ae5ea7 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -195,6 +195,11 @@ public: unsigned Reg) const; bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const; + virtual bool + isDivergentRegClass(const TargetRegisterClass *RC) const override { + return !isSGPRClass(RC); + } + bool isSGPRPressureSet(unsigned SetID) const { return SGPRPressureSets.test(SetID) && !VGPRPressureSets.test(SetID); } diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 677e4d5b2e8..88d318e7bb3 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1447,7 +1447,9 @@ EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, /// getRegClassFor - Return the register class that should be used for the /// specified value type. -const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const { +const TargetRegisterClass * +ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const { + (void)isDivergent; // Map v4i64 to QQ registers but do not make the type legal. Similarly map // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to // load / store 4 to 8 consecutive D registers. diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 3b94cb0dcb0..8e254d75b1c 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -456,7 +456,8 @@ class VectorType; /// getRegClassFor - Return the register class that should be used for the /// specified value type. - const TargetRegisterClass *getRegClassFor(MVT VT) const override; + const TargetRegisterClass * + getRegClassFor(MVT VT, bool isDivergent = false) const override; /// Returns true if a cast between SrcAS and DestAS is a noop. bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { |