diff options
author | Alexander Timofeev <Alexander.Timofeev@amd.com> | 2018-03-05 15:12:21 +0000 |
---|---|---|
committer | Alexander Timofeev <Alexander.Timofeev@amd.com> | 2018-03-05 15:12:21 +0000 |
commit | 2e5eeceeb7a100a4fe93d2535cad29829b87d101 (patch) | |
tree | ee0d108da103650da7d421f984a4f2f36efc9e3b /llvm/lib | |
parent | 15186d493818ca82087ef766d3f488cc16062787 (diff) | |
download | bcm5719-llvm-2e5eeceeb7a100a4fe93d2535cad29829b87d101.tar.gz bcm5719-llvm-2e5eeceeb7a100a4fe93d2535cad29829b87d101.zip |
Pass Divergence Analysis data to Selection DAG to drive divergence
dependent instruction selection.
Differential revision: https://reviews.llvm.org/D35267
llvm-svn: 326703
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp | 10 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 101 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 27 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 96 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | 53 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SMInstructions.td | 7 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 51 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 3 |
12 files changed, 297 insertions, 62 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 81347fa4bd4..c0abfdb2126 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -547,3 +547,13 @@ FunctionLoweringInfo::getOrCreateSwiftErrorVRegUseAt(const Instruction *I, const } return std::make_pair(It->second, false); } + +const Value * +FunctionLoweringInfo::getValueFromVirtualReg(unsigned Vreg) { + if (VirtReg2Value.empty()) { + for (auto &P : ValueMap) { + VirtReg2Value[P.second] = P.first; + } + } + return VirtReg2Value[Vreg]; +} diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 87828a722b8..37d70c6e204 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -950,7 +950,8 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) void SelectionDAG::init(MachineFunction &NewMF, OptimizationRemarkEmitter &NewORE, - Pass *PassPtr, const TargetLibraryInfo *LibraryInfo) { + Pass *PassPtr, const TargetLibraryInfo *LibraryInfo, + DivergenceAnalysis * Divergence) { MF = &NewMF; SDAGISelPass = PassPtr; ORE = &NewORE; @@ -958,6 +959,7 @@ void SelectionDAG::init(MachineFunction &NewMF, TSI = getSubtarget().getSelectionDAGInfo(); LibInfo = LibraryInfo; Context = &MF->getFunction().getContext(); + DA = Divergence; } SelectionDAG::~SelectionDAG() { @@ -1713,6 +1715,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { return SDValue(E, 0); auto *N = newSDNode<RegisterSDNode>(RegNo, VT); + N->SDNodeBits.IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -6699,6 +6702,7 @@ SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) { if (N->OperandList[1] != Op2) N->OperandList[1].set(Op2); + updateDivergence(N); // If this gets put into a CSE map, add it. if (InsertPos) CSEMap.InsertNode(N, InsertPos); return N; @@ -7340,8 +7344,9 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) { SDUse &Use = UI.getUse(); ++UI; Use.set(To); + if (To->isDivergent() != From->isDivergent()) + updateDivergence(User); } while (UI != UE && *UI == User); - // Now that we have modified User, add it back to the CSE maps. If it // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User); @@ -7395,6 +7400,8 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) { SDUse &Use = UI.getUse(); ++UI; Use.setNode(To); + if (To->isDivergent() != From->isDivergent()) + updateDivergence(User); } while (UI != UE && *UI == User); // Now that we have modified User, add it back to the CSE maps. If it @@ -7439,8 +7446,9 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) { const SDValue &ToOp = To[Use.getResNo()]; ++UI; Use.set(ToOp); + if (To->getNode()->isDivergent() != From->isDivergent()) + updateDivergence(User); } while (UI != UE && *UI == User); - // Now that we have modified User, add it back to the CSE maps. If it // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User); @@ -7498,8 +7506,9 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){ ++UI; Use.set(To); + if (To->isDivergent() != From->isDivergent()) + updateDivergence(User); } while (UI != UE && *UI == User); - // We are iterating over all uses of the From node, so if a use // doesn't use the specific value, no changes are made. if (!UserRemovedFromCSEMaps) @@ -7532,6 +7541,70 @@ namespace { } // end anonymous namespace +void SelectionDAG::updateDivergence(SDNode * N) +{ + if (TLI->isSDNodeAlwaysUniform(N)) + return; + bool IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA); + for (auto &Op : N->ops()) { + if (Op.Val.getValueType() != MVT::Other) + IsDivergent |= Op.getNode()->isDivergent(); + } + if (N->SDNodeBits.IsDivergent != IsDivergent) { + N->SDNodeBits.IsDivergent = IsDivergent; + for (auto U : N->uses()) { + updateDivergence(U); + } + } +} + + +void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode*>& Order) { + DenseMap<SDNode *, unsigned> Degree; + Order.reserve(AllNodes.size()); + for (auto & N : allnodes()) { + unsigned NOps = N.getNumOperands(); + Degree[&N] = NOps; + if (0 == NOps) + Order.push_back(&N); + } + for (std::vector<SDNode *>::iterator I = Order.begin(); + I!=Order.end();++I) { + SDNode * N = *I; + for (auto U : N->uses()) { + unsigned &UnsortedOps = Degree[U]; + if (0 == --UnsortedOps) + Order.push_back(U); + } + } +} + +void SelectionDAG::VerifyDAGDiverence() +{ + std::vector<SDNode*> TopoOrder; + CreateTopologicalOrder(TopoOrder); + const TargetLowering &TLI = getTargetLoweringInfo(); + DenseMap<const SDNode *, bool> DivergenceMap; + for (auto &N : allnodes()) { + DivergenceMap[&N] = false; + } + for (auto N : TopoOrder) { + bool IsDivergent = DivergenceMap[N]; + bool IsSDNodeDivergent = TLI.isSDNodeSourceOfDivergence(N, FLI, DA); + for (auto &Op : N->ops()) { + if (Op.Val.getValueType() != MVT::Other) + IsSDNodeDivergent |= DivergenceMap[Op.getNode()]; + } + if (!IsDivergent && IsSDNodeDivergent && !TLI.isSDNodeAlwaysUniform(N)) { + DivergenceMap[N] = true; + } + } + for (auto &N : allnodes()) { + assert(DivergenceMap[&N] == N.isDivergent() && "Divergence bit inconsistency detected\n"); + } +} + + /// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving /// uses of other values produced by From.getNode() alone. The same value /// may appear in both the From and To list. The Deleted vector is @@ -8337,6 +8410,26 @@ SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) { return nullptr; } +void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) { + assert(!Node->OperandList && "Node already has operands"); + SDUse *Ops = OperandRecycler.allocate( + ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator); + + bool IsDivergent = false; + for (unsigned I = 0; I != Vals.size(); ++I) { + Ops[I].setUser(Node); + Ops[I].setInitial(Vals[I]); + if (Ops[I].Val.getValueType() != MVT::Other) // Skip Chain. It does not carry divergence. + IsDivergent = IsDivergent || Ops[I].getNode()->isDivergent(); + } + Node->NumOperands = Vals.size(); + Node->OperandList = Ops; + IsDivergent |= TLI->isSDNodeSourceOfDivergence(Node, FLI, DA); + if (!TLI->isSDNodeAlwaysUniform(Node)) + Node->SDNodeBits.IsDivergent = IsDivergent; + checkForCycles(Node); +} + #ifndef NDEBUG static void checkForCyclesHelper(const SDNode *N, SmallPtrSetImpl<const SDNode*> &Visited, diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index a7db5da55ee..4c58fb17ac1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -629,6 +629,8 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (getNodeId() != -1) OS << " [ID=" << getNodeId() << ']'; + if (!(isa<ConstantSDNode>(this) || (isa<ConstantFPSDNode>(this)))) + OS << "# D:" << isDivergent(); if (!G) return; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index ab0a2293666..067690af636 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -29,6 +29,7 @@ #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" @@ -329,6 +330,7 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<StackProtector>(); AU.addPreserved<GCModuleInfo>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); if (UseMBPI && OptLevel != CodeGenOpt::None) AU.addRequired<BranchProbabilityInfoWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); @@ -414,7 +416,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI); - CurDAG->init(*MF, *ORE, this, LibInfo); + CurDAG->init(*MF, *ORE, this, LibInfo, + getAnalysisIfAvailable<DivergenceAnalysis>()); FuncInfo->set(Fn, *MF, CurDAG); // Now get the optional analyzes if we want to. @@ -711,6 +714,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { int BlockNumber = -1; (void)BlockNumber; bool MatchFilterBB = false; (void)MatchFilterBB; + TargetTransformInfo &TTI = + getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*FuncInfo->Fn); // Pre-type legalization allow creation of any node types. CurDAG->NewNodesMustHaveLegalTypes = false; @@ -744,6 +749,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel); } + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); + DEBUG(dbgs() << "Optimized lowered selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; @@ -761,6 +769,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { Changed = CurDAG->LegalizeTypes(); } + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); + DEBUG(dbgs() << "Type-legalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; @@ -780,6 +791,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel); } + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); + DEBUG(dbgs() << "Optimized type-legalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; @@ -823,6 +837,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; CurDAG->dump()); + + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); } if (ViewLegalizeDAGs && MatchFilterBB) @@ -834,6 +851,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Legalize(); } + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); + DEBUG(dbgs() << "Legalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; @@ -849,6 +869,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel); } + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); + DEBUG(dbgs() << "Optimized legalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; @@ -1401,6 +1424,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FuncInfo->MBB = FuncInfo->MBBMap[&Fn.getEntryBlock()]; FuncInfo->InsertPt = FuncInfo->MBB->begin(); + CurDAG->setFunctionLoweringInfo(FuncInfo); + if (!FastIS) { LowerArguments(Fn); } else { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 192d4b0f1ef..90cd4ffaf0b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -27,6 +27,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/DivergenceAnalysis.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" @@ -83,6 +84,7 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<AMDGPUArgumentUsageInfo>(); + AU.addRequired<DivergenceAnalysis>(); SelectionDAGISel::getAnalysisUsage(AU); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 78bd0da40fe..f142ef6b01e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -25,6 +25,7 @@ #include "AMDGPURegisterInfo.h" #include "AMDGPUSubtarget.h" #include "AMDGPUTargetMachine.h" +#include "Utils/AMDGPUBaseInfo.h" #include "R600MachineFunctionInfo.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" @@ -748,6 +749,101 @@ bool AMDGPUTargetLowering::isCheapToSpeculateCtlz() const { return true; } +bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode * N) const { + switch (N->getOpcode()) { + default: + return false; + case ISD::EntryToken: + case ISD::TokenFactor: + return true; + case ISD::INTRINSIC_WO_CHAIN: + { + unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + switch (IntrID) { + default: + return false; + case Intrinsic::amdgcn_readfirstlane: + case Intrinsic::amdgcn_readlane: + return true; + } + } + break; + case ISD::LOAD: + { + const LoadSDNode * L = dyn_cast<LoadSDNode>(N); + if (L->getMemOperand()->getAddrSpace() + == Subtarget->getAMDGPUAS().CONSTANT_ADDRESS_32BIT) + return true; + return false; + } + break; + } +} + +bool AMDGPUTargetLowering::isSDNodeSourceOfDivergence(const SDNode * N, + FunctionLoweringInfo * FLI, DivergenceAnalysis * DA) const +{ + switch (N->getOpcode()) { + case ISD::Register: + case ISD::CopyFromReg: + { + const RegisterSDNode *R = nullptr; + if (N->getOpcode() == ISD::Register) { + R = dyn_cast<RegisterSDNode>(N); + } + else { + R = dyn_cast<RegisterSDNode>(N->getOperand(1)); + } + if (R) + { + const MachineFunction * MF = FLI->MF; + const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); + const MachineRegisterInfo &MRI = MF->getRegInfo(); + const SIRegisterInfo &TRI = ST.getInstrInfo()->getRegisterInfo(); + unsigned Reg = R->getReg(); + if (TRI.isPhysicalRegister(Reg)) + return TRI.isVGPR(MRI, Reg); + + if (MRI.isLiveIn(Reg)) { + // workitem.id.x workitem.id.y workitem.id.z + if ((MRI.getLiveInPhysReg(Reg) == AMDGPU::T0_X) || + (MRI.getLiveInPhysReg(Reg) == AMDGPU::T0_Y) || + (MRI.getLiveInPhysReg(Reg) == AMDGPU::T0_Z)|| + (MRI.getLiveInPhysReg(Reg) == AMDGPU::VGPR0) || + (MRI.getLiveInPhysReg(Reg) == AMDGPU::VGPR1) || + (MRI.getLiveInPhysReg(Reg) == AMDGPU::VGPR2)) + return true; + // Formal arguments of non-entry functions + // are conservatively considered divergent + else if (!AMDGPU::isEntryFunctionCC(FLI->Fn->getCallingConv())) + return true; + } + return !DA || DA->isDivergent(FLI->getValueFromVirtualReg(Reg)); + } + } + break; + case ISD::LOAD: { + const LoadSDNode *L = dyn_cast<LoadSDNode>(N); + if (L->getMemOperand()->getAddrSpace() == + Subtarget->getAMDGPUAS().PRIVATE_ADDRESS) + return true; + } break; + case ISD::CALLSEQ_END: + return true; + break; + case ISD::INTRINSIC_WO_CHAIN: + { + + } + return AMDGPU::isIntrinsicSourceOfDivergence( + cast<ConstantSDNode>(N->getOperand(0))->getZExtValue()); + case ISD::INTRINSIC_W_CHAIN: + return AMDGPU::isIntrinsicSourceOfDivergence( + cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()); + } + return false; +} + //===---------------------------------------------------------------------===// // Target Properties //===---------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index c99540ff8f9..afb85a51a6f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -168,6 +168,9 @@ public: bool isCheapToSpeculateCttz() const override; bool isCheapToSpeculateCtlz() const override; + bool isSDNodeSourceOfDivergence(const SDNode * N, + FunctionLoweringInfo * FLI, DivergenceAnalysis * DA) const; + bool isSDNodeAlwaysUniform(const SDNode * N) const; static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg); static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 00ff0308ba1..6d6ab084ee6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -17,6 +17,7 @@ #include "AMDGPUTargetTransformInfo.h" #include "AMDGPUSubtarget.h" +#include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -464,55 +465,7 @@ int AMDGPUTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, } } -static bool isIntrinsicSourceOfDivergence(const IntrinsicInst *I) { - switch (I->getIntrinsicID()) { - case Intrinsic::amdgcn_workitem_id_x: - case Intrinsic::amdgcn_workitem_id_y: - case Intrinsic::amdgcn_workitem_id_z: - case Intrinsic::amdgcn_interp_mov: - case Intrinsic::amdgcn_interp_p1: - case Intrinsic::amdgcn_interp_p2: - case Intrinsic::amdgcn_mbcnt_hi: - case Intrinsic::amdgcn_mbcnt_lo: - case Intrinsic::r600_read_tidig_x: - case Intrinsic::r600_read_tidig_y: - case Intrinsic::r600_read_tidig_z: - case Intrinsic::amdgcn_atomic_inc: - case Intrinsic::amdgcn_atomic_dec: - case Intrinsic::amdgcn_ds_fadd: - case Intrinsic::amdgcn_ds_fmin: - case Intrinsic::amdgcn_ds_fmax: - case Intrinsic::amdgcn_image_atomic_swap: - case Intrinsic::amdgcn_image_atomic_add: - case Intrinsic::amdgcn_image_atomic_sub: - case Intrinsic::amdgcn_image_atomic_smin: - case Intrinsic::amdgcn_image_atomic_umin: - case Intrinsic::amdgcn_image_atomic_smax: - case Intrinsic::amdgcn_image_atomic_umax: - case Intrinsic::amdgcn_image_atomic_and: - case Intrinsic::amdgcn_image_atomic_or: - case Intrinsic::amdgcn_image_atomic_xor: - case Intrinsic::amdgcn_image_atomic_inc: - case Intrinsic::amdgcn_image_atomic_dec: - case Intrinsic::amdgcn_image_atomic_cmpswap: - case Intrinsic::amdgcn_buffer_atomic_swap: - case Intrinsic::amdgcn_buffer_atomic_add: - case Intrinsic::amdgcn_buffer_atomic_sub: - case Intrinsic::amdgcn_buffer_atomic_smin: - case Intrinsic::amdgcn_buffer_atomic_umin: - case Intrinsic::amdgcn_buffer_atomic_smax: - case Intrinsic::amdgcn_buffer_atomic_umax: - case Intrinsic::amdgcn_buffer_atomic_and: - case Intrinsic::amdgcn_buffer_atomic_or: - case Intrinsic::amdgcn_buffer_atomic_xor: - case Intrinsic::amdgcn_buffer_atomic_cmpswap: - case Intrinsic::amdgcn_ps_live: - case Intrinsic::amdgcn_ds_swizzle: - return true; - default: - return false; - } -} + static bool isArgPassedInSGPR(const Argument *A) { const Function *F = A->getParent(); @@ -563,7 +516,7 @@ bool AMDGPUTTIImpl::isSourceOfDivergence(const Value *V) const { return true; if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) - return isIntrinsicSourceOfDivergence(Intrinsic); + return AMDGPU::isIntrinsicSourceOfDivergence(Intrinsic->getIntrinsicID()); // Assume all function calls are a source of divergence. if (isa<CallInst>(V) || isa<InvokeInst>(V)) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index c4d7ab985be..94d1e3a3bf0 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -5372,7 +5372,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { unsigned NumElements = MemVT.getVectorNumElements(); if (AS == AMDGPUASI.CONSTANT_ADDRESS || AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT) { - if (isMemOpUniform(Load)) + if (!Op->isDivergent()) return SDValue(); // Non-uniform loads will be selected to MUBUF instructions, so they // have the same legalization requirements as global and private @@ -5382,7 +5382,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { if (AS == AMDGPUASI.CONSTANT_ADDRESS || AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT || AS == AMDGPUASI.GLOBAL_ADDRESS) { - if (Subtarget->getScalarizeGlobalBehavior() && isMemOpUniform(Load) && + if (Subtarget->getScalarizeGlobalBehavior() && !Op->isDivergent() && !Load->isVolatile() && isMemOpHasNoClobberedMemOperand(Load)) return SDValue(); // Non-uniform loads will be selected to MUBUF instructions, so they diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td index 7ee0af0877c..6bb5abd3b20 100644 --- a/llvm/lib/Target/AMDGPU/SMInstructions.td +++ b/llvm/lib/Target/AMDGPU/SMInstructions.td @@ -223,12 +223,9 @@ def S_MEMREALTIME : SM_Time_Pseudo <"s_memrealtime", int_amdgcn_s_memrealtime> def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ auto Ld = cast<LoadSDNode>(N); return Ld->getAlignment() >= 4 && - (((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS || - Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) && - static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N)) || + ((((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS) || (Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT)) && !N->isDivergent()) || (Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && - !Ld->isVolatile() && - static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N) && + !Ld->isVolatile() && !N->isDivergent() && static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpHasNoClobberedMemOperand(N))); }]>; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 08ecf4fc907..7848a29cc53 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPUBaseInfo.h" +#include "AMDGPUTargetTransformInfo.h" #include "AMDGPU.h" #include "SIDefines.h" #include "llvm/ADT/StringRef.h" @@ -938,5 +939,55 @@ AMDGPUAS getAMDGPUAS(const TargetMachine &M) { AMDGPUAS getAMDGPUAS(const Module &M) { return getAMDGPUAS(Triple(M.getTargetTriple())); } + +bool isIntrinsicSourceOfDivergence(unsigned IntrID) { + switch (IntrID) { + case Intrinsic::amdgcn_workitem_id_x: + case Intrinsic::amdgcn_workitem_id_y: + case Intrinsic::amdgcn_workitem_id_z: + case Intrinsic::amdgcn_interp_mov: + case Intrinsic::amdgcn_interp_p1: + case Intrinsic::amdgcn_interp_p2: + case Intrinsic::amdgcn_mbcnt_hi: + case Intrinsic::amdgcn_mbcnt_lo: + case Intrinsic::r600_read_tidig_x: + case Intrinsic::r600_read_tidig_y: + case Intrinsic::r600_read_tidig_z: + case Intrinsic::amdgcn_atomic_inc: + case Intrinsic::amdgcn_atomic_dec: + case Intrinsic::amdgcn_ds_fadd: + case Intrinsic::amdgcn_ds_fmin: + case Intrinsic::amdgcn_ds_fmax: + case Intrinsic::amdgcn_image_atomic_swap: + case Intrinsic::amdgcn_image_atomic_add: + case Intrinsic::amdgcn_image_atomic_sub: + case Intrinsic::amdgcn_image_atomic_smin: + case Intrinsic::amdgcn_image_atomic_umin: + case Intrinsic::amdgcn_image_atomic_smax: + case Intrinsic::amdgcn_image_atomic_umax: + case Intrinsic::amdgcn_image_atomic_and: + case Intrinsic::amdgcn_image_atomic_or: + case Intrinsic::amdgcn_image_atomic_xor: + case Intrinsic::amdgcn_image_atomic_inc: + case Intrinsic::amdgcn_image_atomic_dec: + case Intrinsic::amdgcn_image_atomic_cmpswap: + case Intrinsic::amdgcn_buffer_atomic_swap: + case Intrinsic::amdgcn_buffer_atomic_add: + case Intrinsic::amdgcn_buffer_atomic_sub: + case Intrinsic::amdgcn_buffer_atomic_smin: + case Intrinsic::amdgcn_buffer_atomic_umin: + case Intrinsic::amdgcn_buffer_atomic_smax: + case Intrinsic::amdgcn_buffer_atomic_umax: + case Intrinsic::amdgcn_buffer_atomic_and: + case Intrinsic::amdgcn_buffer_atomic_or: + case Intrinsic::amdgcn_buffer_atomic_xor: + case Intrinsic::amdgcn_buffer_atomic_cmpswap: + case Intrinsic::amdgcn_ps_live: + case Intrinsic::amdgcn_ds_swizzle: + return true; + default: + return false; + } +} } // namespace AMDGPU } // namespace llvm diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 650abf9bf6e..1fb81533cb7 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -382,6 +382,9 @@ int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset); /// not the encoded offset. bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset); +/// \returns true if the intrinsic is divergent +bool isIntrinsicSourceOfDivergence(unsigned IntrID); + } // end namespace AMDGPU } // end namespace llvm |