diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp | 14 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 33 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h | 2 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp | 166 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 91 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.h | 5 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 13 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.h | 5 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.h | 3 |
13 files changed, 213 insertions, 135 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 117654bc7a3..d53ee3134d5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -13919,9 +13919,11 @@ struct LoadedSlice { assert(DAG && "Missing context"); const TargetLowering &TLI = DAG->getTargetLoweringInfo(); EVT ResVT = Use->getValueType(0); - const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT()); + const TargetRegisterClass *ResRC = + TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent()); const TargetRegisterClass *ArgRC = - TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT()); + TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(), + Use->getOperand(0)->isDivergent()); if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT)) return false; diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index d8ef10f58aa..8b405562904 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -85,6 +85,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, RegInfo = &MF->getRegInfo(); const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); unsigned StackAlign = TFI->getStackAlignment(); + DA = DAG->getDivergenceAnalysis(); // Check whether the function can return without sret-demotion. SmallVector<ISD::OutputArg, 4> Outs; @@ -345,9 +346,9 @@ void FunctionLoweringInfo::clear() { } /// CreateReg - Allocate a single virtual register for the given type. -unsigned FunctionLoweringInfo::CreateReg(MVT VT) { +unsigned FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) { return RegInfo->createVirtualRegister( - MF->getSubtarget().getTargetLowering()->getRegClassFor(VT)); + MF->getSubtarget().getTargetLowering()->getRegClassFor(VT, isDivergent)); } /// CreateRegs - Allocate the appropriate number of virtual registers of @@ -357,7 +358,7 @@ unsigned FunctionLoweringInfo::CreateReg(MVT VT) { /// In the case that the given value has struct or array type, this function /// will assign registers for each member or element. /// -unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) { +unsigned FunctionLoweringInfo::CreateRegs(Type *Ty, bool isDivergent) { const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); SmallVector<EVT, 4> ValueVTs; @@ -370,13 +371,18 @@ unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) { unsigned NumRegs = TLI->getNumRegisters(Ty->getContext(), ValueVT); for (unsigned i = 0; i != NumRegs; ++i) { - unsigned R = CreateReg(RegisterVT); + unsigned R = CreateReg(RegisterVT, isDivergent); if (!FirstReg) FirstReg = R; } } return FirstReg; } +unsigned FunctionLoweringInfo::CreateRegs(const Value *V) { + return CreateRegs(V->getType(), DA && !TLI->requiresUniformRegister(*MF, V) && + DA->isDivergent(V)); +} + /// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the /// register is a PHI destination and the PHI's LiveOutInfo is not valid. If /// the register's LiveOutInfo is for a smaller bit width, it is extended to diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 059e5f7c8dd..4b78d1bb6b1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -105,7 +105,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, // Stick to the preferred register classes for legal types. if (TLI->isTypeLegal(VT)) - UseRC = TLI->getRegClassFor(VT); + UseRC = TLI->getRegClassFor(VT, Node->isDivergent()); if (!IsClone && !IsCloned) for (SDNode *User : Node->uses()) { @@ -164,7 +164,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, "Incompatible phys register def and uses!"); DstRC = UseRC; } else { - DstRC = TLI->getRegClassFor(VT); + DstRC = TLI->getRegClassFor(VT, Node->isDivergent()); } // If all uses are reading from the src physical register and copying the @@ -225,8 +225,9 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, // type correctly. For example, a 64-bit float (X86::FR64) can't live in // the 32-bit float super-class (X86::FR32). if (i < NumResults && TLI->isTypeLegal(Node->getSimpleValueType(i))) { - const TargetRegisterClass *VTRC = - TLI->getRegClassFor(Node->getSimpleValueType(i)); + const TargetRegisterClass *VTRC = TLI->getRegClassFor( + Node->getSimpleValueType(i), + (Node->isDivergent() || (RC && TRI->isDivergentRegClass(RC)))); if (RC) VTRC = TRI->getCommonSubClass(RC, VTRC); if (VTRC) @@ -289,8 +290,8 @@ unsigned InstrEmitter::getVR(SDValue Op, // IMPLICIT_DEF can produce any type of result so its MCInstrDesc // does not include operand register class info. if (!VReg) { - const TargetRegisterClass *RC = - TLI->getRegClassFor(Op.getSimpleValueType()); + const TargetRegisterClass *RC = TLI->getRegClassFor( + Op.getSimpleValueType(), Op.getNode()->isDivergent()); VReg = MRI->createVirtualRegister(RC); } BuildMI(*MBB, InsertPos, Op.getDebugLoc(), @@ -395,11 +396,15 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) { unsigned VReg = R->getReg(); MVT OpVT = Op.getSimpleValueType(); - const TargetRegisterClass *OpRC = - TLI->isTypeLegal(OpVT) ? TLI->getRegClassFor(OpVT) : nullptr; const TargetRegisterClass *IIRC = II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum, TRI, *MF)) : nullptr; + const TargetRegisterClass *OpRC = + TLI->isTypeLegal(OpVT) + ? TLI->getRegClassFor(OpVT, + Op.getNode()->isDivergent() || + (IIRC && TRI->isDivergentRegClass(IIRC))) + : nullptr; if (OpRC && IIRC && OpRC != IIRC && TargetRegisterInfo::isVirtualRegister(VReg)) { @@ -464,7 +469,7 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, } unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx, - MVT VT, const DebugLoc &DL) { + MVT VT, bool isDivergent, const DebugLoc &DL) { const TargetRegisterClass *VRC = MRI->getRegClass(VReg); const TargetRegisterClass *RC = TRI->getSubClassWithSubReg(VRC, SubIdx); @@ -479,7 +484,7 @@ unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx, // VReg couldn't be reasonably constrained. Emit a COPY to a new virtual // register instead. - RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT), SubIdx); + RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT, isDivergent), SubIdx); assert(RC && "No legal register class for VT supports that SubIdx"); unsigned NewReg = MRI->createVirtualRegister(RC); BuildMI(*MBB, InsertPos, DL, TII->get(TargetOpcode::COPY), NewReg) @@ -514,7 +519,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, // classes. unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); const TargetRegisterClass *TRC = - TLI->getRegClassFor(Node->getSimpleValueType(0)); + TLI->getRegClassFor(Node->getSimpleValueType(0), Node->isDivergent()); unsigned Reg; MachineInstr *DefMI; @@ -548,8 +553,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, if (TargetRegisterInfo::isVirtualRegister(Reg)) Reg = ConstrainForSubReg(Reg, SubIdx, Node->getOperand(0).getSimpleValueType(), - Node->getDebugLoc()); - + Node->isDivergent(), Node->getDebugLoc()); // Create the destreg if it is missing. if (VRBase == 0) VRBase = MRI->createVirtualRegister(TRC); @@ -584,7 +588,8 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, // // There is no constraint on the %src register class. // - const TargetRegisterClass *SRC = TLI->getRegClassFor(Node->getSimpleValueType(0)); + const TargetRegisterClass *SRC = + TLI->getRegClassFor(Node->getSimpleValueType(0), Node->isDivergent()); SRC = TRI->getSubClassWithSubReg(SRC, SubIdx); assert(SRC && "No register class supports VT and SubIdx for INSERT_SUBREG"); diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h index 3188c2678f1..42f7846fe7c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -83,7 +83,7 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { /// supports SubIdx sub-registers. Emit a copy if that isn't possible. /// Return the virtual register to use. unsigned ConstrainForSubReg(unsigned VReg, unsigned SubIdx, MVT VT, - const DebugLoc &DL); + bool isDivergent, const DebugLoc &DL); /// EmitSubregNode - Generate machine code for subreg nodes. /// diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 76e5847ba11..a5274877ece 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -9844,7 +9844,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { if (const Constant *C = dyn_cast<Constant>(PHIOp)) { unsigned &RegOut = ConstantsOut[C]; if (RegOut == 0) { - RegOut = FuncInfo.CreateRegs(C->getType()); + RegOut = FuncInfo.CreateRegs(C); CopyValueToVirtualRegister(C, RegOut); } Reg = RegOut; @@ -9857,7 +9857,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { assert(isa<AllocaInst>(PHIOp) && FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) && "Didn't codegen value into a register!??"); - Reg = FuncInfo.CreateRegs(PHIOp->getType()); + Reg = FuncInfo.CreateRegs(PHIOp); CopyValueToVirtualRegister(PHIOp, Reg); } } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 6c9a1cd646e..6f55f98c51f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1485,7 +1485,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { !Inst->use_empty()) { unsigned &R = FuncInfo->ValueMap[Inst]; if (!R) - R = FuncInfo->CreateRegs(Inst->getType()); + R = FuncInfo->CreateRegs(Inst); } bool HadTailCall = false; diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 94b1e636c7b..8ad7a52c92b 100644 --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -302,52 +302,6 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, return true; } -static bool phiHasVGPROperands(const MachineInstr &PHI, - const MachineRegisterInfo &MRI, - const SIRegisterInfo *TRI, - const SIInstrInfo *TII) { - for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) { - unsigned Reg = PHI.getOperand(i).getReg(); - if (TRI->hasVGPRs(MRI.getRegClass(Reg))) - return true; - } - return false; -} - -static bool phiHasBreakDef(const MachineInstr &PHI, - const MachineRegisterInfo &MRI, - SmallSet<unsigned, 8> &Visited) { - for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) { - unsigned Reg = PHI.getOperand(i).getReg(); - if (Visited.count(Reg)) - continue; - - Visited.insert(Reg); - - MachineInstr *DefInstr = MRI.getVRegDef(Reg); - switch (DefInstr->getOpcode()) { - default: - break; - case AMDGPU::SI_IF_BREAK: - return true; - case AMDGPU::PHI: - if (phiHasBreakDef(*DefInstr, MRI, Visited)) - return true; - } - } - return false; -} - -static bool hasTerminatorThatModifiesExec(const MachineBasicBlock &MBB, - const TargetRegisterInfo &TRI) { - for (MachineBasicBlock::const_iterator I = MBB.getFirstTerminator(), - E = MBB.end(); I != E; ++I) { - if (I->modifiesRegister(AMDGPU::EXEC, &TRI)) - return true; - } - return false; -} - static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy, const MachineInstr *MoveImm, const SIInstrInfo *TII, @@ -409,12 +363,6 @@ bool searchPredecessors(const MachineBasicBlock *MBB, return false; } -static bool predsHasDivergentTerminator(MachineBasicBlock *MBB, - const TargetRegisterInfo *TRI) { - return searchPredecessors(MBB, nullptr, [TRI](MachineBasicBlock *MBB) { - return hasTerminatorThatModifiesExec(*MBB, *TRI); }); -} - // Checks if there is potential path From instruction To instruction. // If CutOff is specified and it sits in between of that path we ignore // a higher portion of the path and report it is not reachable. @@ -621,63 +569,73 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { break; } case AMDGPU::PHI: { - unsigned Reg = MI.getOperand(0).getReg(); - if (!TRI->isSGPRClass(MRI.getRegClass(Reg))) - break; - - // We don't need to fix the PHI if the common dominator of the - // two incoming blocks terminates with a uniform branch. - bool HasVGPROperand = phiHasVGPROperands(MI, MRI, TRI, TII); - if (MI.getNumExplicitOperands() == 5 && !HasVGPROperand) { - MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB(); - MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB(); - - if (!predsHasDivergentTerminator(MBB0, TRI) && - !predsHasDivergentTerminator(MBB1, TRI)) { - LLVM_DEBUG(dbgs() - << "Not fixing PHI for uniform branch: " << MI << '\n'); + unsigned hasVGPRUses = 0; + SetVector<const MachineInstr *> worklist; + worklist.insert(&MI); + while (!worklist.empty()) { + const MachineInstr *Instr = worklist.pop_back_val(); + unsigned Reg = Instr->getOperand(0).getReg(); + for (const auto &Use : MRI.use_operands(Reg)) { + const MachineInstr *UseMI = Use.getParent(); + if (UseMI->isCopy() || UseMI->isRegSequence()) { + if (UseMI->isCopy() && + TRI->isPhysicalRegister(UseMI->getOperand(0).getReg()) && + !TRI->isSGPRReg(MRI, UseMI->getOperand(0).getReg())) { + hasVGPRUses++; + } + worklist.insert(UseMI); + continue; + } + + if (UseMI->isPHI()) { + if (!TRI->isSGPRReg(MRI, Use.getReg())) + hasVGPRUses++; + continue; + } + + unsigned OpNo = UseMI->getOperandNo(&Use); + const MCInstrDesc &Desc = TII->get(UseMI->getOpcode()); + if (Desc.OpInfo && Desc.OpInfo[OpNo].RegClass != -1) { + const TargetRegisterClass *OpRC = + TRI->getRegClass(Desc.OpInfo[OpNo].RegClass); + if (!TRI->isSGPRClass(OpRC) && OpRC != &AMDGPU::VS_32RegClass && + OpRC != &AMDGPU::VS_64RegClass) { + hasVGPRUses++; + } + } + } + } + bool hasVGPRInput = false; + for (unsigned i = 1; i < MI.getNumOperands(); i += 2) { + unsigned InputReg = MI.getOperand(i).getReg(); + MachineInstr *Def = MRI.getVRegDef(InputReg); + if (TRI->isVGPR(MRI, InputReg)) { + if (Def->isCopy()) { + unsigned SrcReg = Def->getOperand(1).getReg(); + const TargetRegisterClass *RC = + TRI->isVirtualRegister(SrcReg) ? MRI.getRegClass(SrcReg) + : TRI->getPhysRegClass(SrcReg); + if (TRI->isSGPRClass(RC)) + continue; + } + hasVGPRInput = true; + break; + } else if (Def->isCopy() && + TRI->isVGPR(MRI, Def->getOperand(1).getReg())) { + hasVGPRInput = true; break; } } + unsigned PHIRes = MI.getOperand(0).getReg(); + const TargetRegisterClass *RC0 = MRI.getRegClass(PHIRes); - // If a PHI node defines an SGPR and any of its operands are VGPRs, - // then we need to move it to the VALU. - // - // Also, if a PHI node defines an SGPR and has all SGPR operands - // we must move it to the VALU, because the SGPR operands will - // all end up being assigned the same register, which means - // there is a potential for a conflict if different threads take - // different control flow paths. - // - // For Example: - // - // sgpr0 = def; - // ... - // sgpr1 = def; - // ... - // sgpr2 = PHI sgpr0, sgpr1 - // use sgpr2; - // - // Will Become: - // - // sgpr2 = def; - // ... - // sgpr2 = def; - // ... - // use sgpr2 - // - // The one exception to this rule is when one of the operands - // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK - // instruction. In this case, there we know the program will - // never enter the second block (the loop) without entering - // the first block (where the condition is computed), so there - // is no chance for values to be over-written. - - SmallSet<unsigned, 8> Visited; - if (HasVGPROperand || !phiHasBreakDef(MI, MRI, Visited)) { - LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI); - TII->moveToVALU(MI, MDT); + if ((!TRI->isVGPR(MRI, PHIRes) && RC0 != &AMDGPU::VReg_1RegClass) && + (hasVGPRInput || hasVGPRUses > 1)) { + TII->moveToVALU(MI); + } else { + TII->legalizeOperands(MI, MDT); } + break; } case AMDGPU::REG_SEQUENCE: diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index c2cda5ef4d7..8f93c63046c 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -9637,7 +9637,8 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, break; MVT VT = Src0.getValueType().getSimpleVT(); - const TargetRegisterClass *RC = getRegClassFor(VT); + const TargetRegisterClass *RC = + getRegClassFor(VT, Src0.getNode()->isDivergent()); MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); SDValue UndefReg = DAG.getRegister(MRI.createVirtualRegister(RC), VT); @@ -10171,3 +10172,91 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { return AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(RMW); } + +const TargetRegisterClass * +SITargetLowering::getRegClassFor(MVT VT, bool isDivergent) const { + const TargetRegisterClass *RC = TargetLoweringBase::getRegClassFor(VT, false); + const SIRegisterInfo *TRI = Subtarget->getRegisterInfo(); + if (RC == &AMDGPU::VReg_1RegClass && !isDivergent) + return &AMDGPU::SReg_64RegClass; + if (!TRI->isSGPRClass(RC) && !isDivergent) + return TRI->getEquivalentSGPRClass(RC); + else if (TRI->isSGPRClass(RC) && isDivergent) + return TRI->getEquivalentVGPRClass(RC); + + return RC; +} + +static bool hasIfBreakUser(const Value *V, SetVector<const Value *> &Visited) { + if (Visited.count(V)) + return false; + Visited.insert(V); + bool Result = false; + for (auto U : V->users()) { + if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(U)) { + if ((Intrinsic->getIntrinsicID() == Intrinsic::amdgcn_if_break) && + (V == U->getOperand(1))) + Result = true; + } else { + Result = hasIfBreakUser(U, Visited); + } + if (Result) + break; + } + return Result; +} + +bool SITargetLowering::requiresUniformRegister(MachineFunction &MF, + const Value *V) const { + if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) { + switch (Intrinsic->getIntrinsicID()) { + default: + return false; + case Intrinsic::amdgcn_if_break: + return true; + } + } + if (const ExtractValueInst *ExtValue = dyn_cast<ExtractValueInst>(V)) { + if (const IntrinsicInst *Intrinsic = + dyn_cast<IntrinsicInst>(ExtValue->getOperand(0))) { + switch (Intrinsic->getIntrinsicID()) { + default: + return false; + case Intrinsic::amdgcn_if: + case Intrinsic::amdgcn_else: { + ArrayRef<unsigned> Indices = ExtValue->getIndices(); + if (Indices.size() == 1 && Indices[0] == 1) { + return true; + } + } + } + } + } + if (const CallInst *CI = dyn_cast<CallInst>(V)) { + if (isa<InlineAsm>(CI->getCalledValue())) { + const SIRegisterInfo *SIRI = Subtarget->getRegisterInfo(); + ImmutableCallSite CS(CI); + TargetLowering::AsmOperandInfoVector TargetConstraints = ParseConstraints( + MF.getDataLayout(), Subtarget->getRegisterInfo(), CS); + for (auto &TC : TargetConstraints) { + if (TC.Type == InlineAsm::isOutput) { + ComputeConstraintToUse(TC, SDValue()); + unsigned AssignedReg; + const TargetRegisterClass *RC; + std::tie(AssignedReg, RC) = getRegForInlineAsmConstraint( + SIRI, TC.ConstraintCode, + getSimpleValueType(MF.getDataLayout(), CS.getType())); + if (RC) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + if (AssignedReg != 0 && SIRI->isSGPRReg(MRI, AssignedReg)) + return true; + else if (SIRI->isSGPRClass(RC)) + return true; + } + } + } + } + } + SetVector<const Value *> Visited; + return hasIfBreakUser(V, Visited); +} diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 60a474f51e5..094a0b054e2 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -367,7 +367,10 @@ public: bool SNaN = false, unsigned Depth = 0) const override; AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override; - + virtual const TargetRegisterClass * + getRegClassFor(MVT VT, bool isDivergent) const override; + virtual bool requiresUniformRegister(MachineFunction &MF, + const Value *V) const override; unsigned getPrefLoopAlignment(MachineLoop *ML) const override; }; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index e42ed3505cf..14f5dbe6ad4 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2219,6 +2219,10 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, // These come before src2. removeModOperands(UseMI); UseMI.setDesc(get(NewOpc)); + // It might happen that UseMI was commuted + // and we now have SGPR as SRC1. If so 2 inlined + // constant and SGPR are illegal. + legalizeOperands(UseMI); bool DeleteDef = MRI->hasOneNonDBGUse(Reg); if (DeleteDef) @@ -3913,7 +3917,7 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB, return; // Try to eliminate the copy if it is copying an immediate value. - if (Def->isMoveImmediate()) + if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass) FoldImmediate(*Copy, *Def, OpReg, &MRI); } @@ -4147,7 +4151,10 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI, if (VRC || !RI.isSGPRClass(getOpRegClass(MI, 0))) { if (!VRC) { assert(SRC); - VRC = RI.getEquivalentVGPRClass(SRC); + if (getOpRegClass(MI, 0) == &AMDGPU::VReg_1RegClass) { + VRC = &AMDGPU::VReg_1RegClass; + } else + VRC = RI.getEquivalentVGPRClass(SRC); } RC = VRC; } else { @@ -5309,7 +5316,7 @@ const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass( case AMDGPU::INSERT_SUBREG: case AMDGPU::WQM: case AMDGPU::WWM: - if (RI.hasVGPRs(NewDstRC)) + if (RI.hasVGPRs(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass) return nullptr; NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index bfdc1ef9645..e2df3ae5ea7 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -195,6 +195,11 @@ public: unsigned Reg) const; bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const; + virtual bool + isDivergentRegClass(const TargetRegisterClass *RC) const override { + return !isSGPRClass(RC); + } + bool isSGPRPressureSet(unsigned SetID) const { return SGPRPressureSets.test(SetID) && !VGPRPressureSets.test(SetID); } diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 677e4d5b2e8..88d318e7bb3 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1447,7 +1447,9 @@ EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, /// getRegClassFor - Return the register class that should be used for the /// specified value type. -const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const { +const TargetRegisterClass * +ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const { + (void)isDivergent; // Map v4i64 to QQ registers but do not make the type legal. Similarly map // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to // load / store 4 to 8 consecutive D registers. diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 3b94cb0dcb0..8e254d75b1c 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -456,7 +456,8 @@ class VectorType; /// getRegClassFor - Return the register class that should be used for the /// specified value type. - const TargetRegisterClass *getRegClassFor(MVT VT) const override; + const TargetRegisterClass * + getRegClassFor(MVT VT, bool isDivergent = false) const override; /// Returns true if a cast between SrcAS and DestAS is a noop. bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { |