diff options
author | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2019-06-13 23:47:36 +0000 |
---|---|---|
committer | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2019-06-13 23:47:36 +0000 |
commit | 68a2fef9ae5beadd0ca7974d936e98caa04aa085 (patch) | |
tree | ea66f916f5ab7b128571ba39eb8227afc23e83b8 /llvm/lib | |
parent | 347ec0faa79a26dd1ba2e896b9acb18d8d05fdfc (diff) | |
download | bcm5719-llvm-68a2fef9ae5beadd0ca7974d936e98caa04aa085.tar.gz bcm5719-llvm-68a2fef9ae5beadd0ca7974d936e98caa04aa085.zip |
[AMDGPU] gfx1010 wave32 icmp/fcmp intrinsic changes for wave32
Differential Revision: https://reviews.llvm.org/D63301
llvm-svn: 363339
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp | 44 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 32 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 7 | ||||
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 4 |
6 files changed, 69 insertions, 25 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp index 4756a77a7ec..1c503c29d55 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp @@ -249,7 +249,8 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I, // We need to know how many lanes are active within the wavefront, and we do // this by doing a ballot of active lanes. CallInst *const Ballot = - B.CreateIntrinsic(Intrinsic::amdgcn_icmp, {B.getInt32Ty()}, + B.CreateIntrinsic(Intrinsic::amdgcn_icmp, + {B.getInt64Ty(), B.getInt32Ty()}, {B.getInt32(1), B.getInt32(0), B.getInt32(33)}); // We need to know how many lanes are active within the wavefront that are diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index bccbcd4da16..ca8dc8c07c6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -1028,6 +1028,10 @@ public: std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const override; + bool isWave32() const { + return WavefrontSize == 32; + } + /// \returns Maximum number of work groups per compute unit supported by the /// subtarget and limited by given \p FlatWorkGroupSize. unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override { diff --git a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp index 15ab80b756e..b764ca7d706 100644 --- a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp @@ -12,11 +12,13 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" +#include "AMDGPUSubtarget.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constant.h" @@ -55,13 +57,13 @@ class SIAnnotateControlFlow : public FunctionPass { Type *Boolean; Type *Void; - Type *Int64; + Type *IntMask; Type *ReturnStruct; ConstantInt *BoolTrue; ConstantInt *BoolFalse; UndefValue *BoolUndef; - Constant *Int64Zero; + Constant *IntMaskZero; Function *If; Function *Else; @@ -74,6 +76,8 @@ class SIAnnotateControlFlow : public FunctionPass { LoopInfo *LI; + void initialize(Module &M, const GCNSubtarget &ST); + bool isUniform(BranchInst *T); bool isTopOfStack(BasicBlock *BB); @@ -103,8 +107,6 @@ public: SIAnnotateControlFlow() : FunctionPass(ID) {} - bool doInitialization(Module &M) override; - bool runOnFunction(Function &F) override; StringRef getPassName() const override { return "SI annotate control flow"; } @@ -114,6 +116,7 @@ public: AU.addRequired<DominatorTreeWrapperPass>(); AU.addRequired<LegacyDivergenceAnalysis>(); AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addRequired<TargetPassConfig>(); FunctionPass::getAnalysisUsage(AU); } }; @@ -124,31 +127,34 @@ INITIALIZE_PASS_BEGIN(SIAnnotateControlFlow, DEBUG_TYPE, "Annotate SI Control Flow", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) INITIALIZE_PASS_END(SIAnnotateControlFlow, DEBUG_TYPE, "Annotate SI Control Flow", false, false) char SIAnnotateControlFlow::ID = 0; /// Initialize all the types and constants used in the pass -bool SIAnnotateControlFlow::doInitialization(Module &M) { +void SIAnnotateControlFlow::initialize(Module &M, const GCNSubtarget &ST) { LLVMContext &Context = M.getContext(); Void = Type::getVoidTy(Context); Boolean = Type::getInt1Ty(Context); - Int64 = Type::getInt64Ty(Context); - ReturnStruct = StructType::get(Boolean, Int64); + IntMask = ST.isWave32() ? Type::getInt32Ty(Context) + : Type::getInt64Ty(Context); + ReturnStruct = StructType::get(Boolean, IntMask); BoolTrue = ConstantInt::getTrue(Context); BoolFalse = ConstantInt::getFalse(Context); BoolUndef = UndefValue::get(Boolean); - Int64Zero = ConstantInt::get(Int64, 0); - - If = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if); - Else = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_else); - IfBreak = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if_break); - Loop = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_loop); - EndCf = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_end_cf); - return false; + IntMaskZero = ConstantInt::get(IntMask, 0); + + If = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if, { IntMask }); + Else = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_else, + { IntMask, IntMask }); + IfBreak = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if_break, + { IntMask, IntMask }); + Loop = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_loop, { IntMask }); + EndCf = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_end_cf, { IntMask }); } /// Is the branch condition uniform or did the StructurizeCFG pass @@ -258,14 +264,14 @@ void SIAnnotateControlFlow::handleLoop(BranchInst *Term) { return; BasicBlock *Target = Term->getSuccessor(1); - PHINode *Broken = PHINode::Create(Int64, 0, "phi.broken", &Target->front()); + PHINode *Broken = PHINode::Create(IntMask, 0, "phi.broken", &Target->front()); Value *Cond = Term->getCondition(); Term->setCondition(BoolTrue); Value *Arg = handleLoopCondition(Cond, Broken, L, Term); for (BasicBlock *Pred : predecessors(Target)) { - Value *PHIValue = Int64Zero; + Value *PHIValue = IntMaskZero; if (Pred == BB) // Remember the value of the previous iteration. PHIValue = Arg; // If the backedge from Pred to Target could be executed before the exit @@ -316,6 +322,10 @@ bool SIAnnotateControlFlow::runOnFunction(Function &F) { DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); DA = &getAnalysis<LegacyDivergenceAnalysis>(); + TargetPassConfig &TPC = getAnalysis<TargetPassConfig>(); + const TargetMachine &TM = TPC.getTM<TargetMachine>(); + + initialize(*F.getParent(), TM.getSubtarget<GCNSubtarget>(F)); for (df_iterator<BasicBlock *> I = df_begin(&F.getEntryBlock()), E = df_end(&F.getEntryBlock()); I != E; ++I) { diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 3bb1ddc6703..92ca105af25 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3839,7 +3839,6 @@ static SDValue lowerICMPIntrinsic(const SITargetLowering &TLI, ICmpInst::Predicate IcInput = static_cast<ICmpInst::Predicate>(CondCode); - SDValue LHS = N->getOperand(1); SDValue RHS = N->getOperand(2); @@ -3855,8 +3854,14 @@ static SDValue lowerICMPIntrinsic(const SITargetLowering &TLI, ISD::CondCode CCOpcode = getICmpCondCode(IcInput); - return DAG.getNode(AMDGPUISD::SETCC, DL, VT, LHS, RHS, - DAG.getCondCode(CCOpcode)); + unsigned WavefrontSize = TLI.getSubtarget()->getWavefrontSize(); + EVT CCVT = EVT::getIntegerVT(*DAG.getContext(), WavefrontSize); + + SDValue SetCC = DAG.getNode(AMDGPUISD::SETCC, DL, CCVT, LHS, RHS, + DAG.getCondCode(CCOpcode)); + if (VT.bitsEq(CCVT)) + return SetCC; + return DAG.getZExtOrTrunc(SetCC, DL, VT); } static SDValue lowerFCMPIntrinsic(const SITargetLowering &TLI, @@ -3882,8 +3887,13 @@ static SDValue lowerFCMPIntrinsic(const SITargetLowering &TLI, FCmpInst::Predicate IcInput = static_cast<FCmpInst::Predicate>(CondCode); ISD::CondCode CCOpcode = getFCmpCondCode(IcInput); - return DAG.getNode(AMDGPUISD::SETCC, SL, VT, Src0, - Src1, DAG.getCondCode(CCOpcode)); + unsigned WavefrontSize = TLI.getSubtarget()->getWavefrontSize(); + EVT CCVT = EVT::getIntegerVT(*DAG.getContext(), WavefrontSize); + SDValue SetCC = DAG.getNode(AMDGPUISD::SETCC, SL, CCVT, Src0, + Src1, DAG.getCondCode(CCOpcode)); + if (VT.bitsEq(CCVT)) + return SetCC; + return DAG.getZExtOrTrunc(SetCC, SL, VT); } void SITargetLowering::ReplaceNodeResults(SDNode *N, @@ -5394,6 +5404,9 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32, SDLoc(DAG.getEntryNode()), MFI->getArgInfo().WorkItemIDZ); + case Intrinsic::amdgcn_wavefrontsize: + return DAG.getConstant(MF.getSubtarget<GCNSubtarget>().getWavefrontSize(), + SDLoc(Op), MVT::i32); case Intrinsic::amdgcn_s_buffer_load: { unsigned Cache = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); return lowerSBuffer(VT, DL, Op.getOperand(1), Op.getOperand(2), @@ -5598,6 +5611,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::amdgcn_fmad_ftz: return DAG.getNode(AMDGPUISD::FMAD_FTZ, DL, VT, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + + case Intrinsic::amdgcn_if_break: + return SDValue(DAG.getMachineNode(AMDGPU::SI_IF_BREAK, DL, VT, + Op->getOperand(1), Op->getOperand(2)), 0); + default: if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrinsicID)) @@ -6495,6 +6513,10 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, M->getMemoryVT(), M->getMemOperand()); } + case Intrinsic::amdgcn_end_cf: + return SDValue(DAG.getMachineNode(AMDGPU::SI_END_CF, DL, MVT::Other, + Op->getOperand(2), Chain), 0); + default: { if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrinsicID)) diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 4a3e8b3e36b..e6b64ecbfce 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -604,7 +604,12 @@ def : Pat < // TODO: we could add more variants for other types of conditionals def : Pat < - (int_amdgcn_icmp i1:$src, (i1 0), (i32 33)), + (i64 (int_amdgcn_icmp i1:$src, (i1 0), (i32 33))), + (COPY $src) // Return the SGPRs representing i1 src +>; + +def : Pat < + (i32 (int_amdgcn_icmp i1:$src, (i1 0), (i32 33))), (COPY $src) // Return the SGPRs representing i1 src >; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 2c8fa20b259..f167762b602 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3733,7 +3733,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; Function *NewF = - Intrinsic::getDeclaration(II->getModule(), NewIID, SrcLHS->getType()); + Intrinsic::getDeclaration(II->getModule(), NewIID, + { II->getType(), + SrcLHS->getType() }); Value *Args[] = { SrcLHS, SrcRHS, ConstantInt::get(CC->getType(), SrcPred) }; CallInst *NewCall = Builder.CreateCall(NewF, Args); |