diff options
| -rw-r--r-- | llvm/include/llvm/Analysis/DivergenceAnalysis.h | 16 | ||||
| -rw-r--r-- | llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h | 16 | ||||
| -rw-r--r-- | llvm/lib/Analysis/DivergenceAnalysis.cpp | 10 | ||||
| -rw-r--r-- | llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp | 29 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/divergence-at-use.ll | 20 |
6 files changed, 79 insertions, 20 deletions
diff --git a/llvm/include/llvm/Analysis/DivergenceAnalysis.h b/llvm/include/llvm/Analysis/DivergenceAnalysis.h index 3cfb9d13df9..2fac9c8b4b3 100644 --- a/llvm/include/llvm/Analysis/DivergenceAnalysis.h +++ b/llvm/include/llvm/Analysis/DivergenceAnalysis.h @@ -73,9 +73,12 @@ public: /// operands bool isAlwaysUniform(const Value &Val) const; - /// \brief Whether \p Val is a divergent value + /// \brief Whether \p Val is divergent at its definition. bool isDivergent(const Value &Val) const; + /// \brief Whether \p U is divergent. Uses of a uniform value can be divergent. + bool isDivergentUse(const Use &U) const; + void print(raw_ostream &OS, const Module *) const; private: @@ -189,12 +192,19 @@ public: /// The GPU kernel this analysis result is for const Function &getFunction() const { return DA.getFunction(); } - /// Whether \p V is divergent. + /// Whether \p V is divergent at its definition. bool isDivergent(const Value &V) const; - /// Whether \p V is uniform/non-divergent + /// Whether \p U is divergent. Uses of a uniform value can be divergent. + bool isDivergentUse(const Use &U) const; + + /// Whether \p V is uniform/non-divergent. bool isUniform(const Value &V) const { return !isDivergent(V); } + /// Whether \p U is uniform/non-divergent. Uses of a uniform value can be + /// divergent. + bool isUniformUse(const Use &U) const { return !isDivergentUse(U); } + /// Print all divergent values in the kernel. void print(raw_ostream &OS, const Module *) const; }; diff --git a/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h b/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h index 0a338b81664..e33b8f4129f 100644 --- a/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h +++ b/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h @@ -39,17 +39,18 @@ public: void print(raw_ostream &OS, const Module *) const override; // Returns true if V is divergent at its definition. - // - // Even if this function returns false, V may still be divergent when used - // in a different basic block. bool isDivergent(const Value *V) const; + // Returns true if U is divergent. Uses of a uniform value can be divergent. + bool isDivergentUse(const Use *U) const; + // Returns true if V is uniform/non-divergent. - // - // Even if this function returns true, V may still be divergent when used - // in a different basic block. bool isUniform(const Value *V) const { return !isDivergent(V); } + // Returns true if U is uniform/non-divergent. Uses of a uniform value can be + // divergent. + bool isUniformUse(const Use *U) const { return !isDivergentUse(U); } + // Keep the analysis results uptodate by removing an erased value. void removeValue(const Value *V) { DivergentValues.erase(V); } @@ -62,6 +63,9 @@ private: // Stores all divergent values. DenseSet<const Value *> DivergentValues; + + // Stores divergent uses of possibly uniform values. + DenseSet<const Use *> DivergentUses; }; } // End llvm namespace diff --git a/llvm/lib/Analysis/DivergenceAnalysis.cpp b/llvm/lib/Analysis/DivergenceAnalysis.cpp index 0ccd59ef2bf..3d1be1e1cce 100644 --- a/llvm/lib/Analysis/DivergenceAnalysis.cpp +++ b/llvm/lib/Analysis/DivergenceAnalysis.cpp @@ -412,6 +412,12 @@ bool DivergenceAnalysis::isDivergent(const Value &V) const { return DivergentValues.find(&V) != DivergentValues.end(); } +bool DivergenceAnalysis::isDivergentUse(const Use &U) const { + Value &V = *U.get(); + Instruction &I = *cast<Instruction>(U.getUser()); + return isDivergent(V) || isTemporalDivergent(*I.getParent(), V); +} + void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const { if (DivergentValues.empty()) return; @@ -449,6 +455,10 @@ bool GPUDivergenceAnalysis::isDivergent(const Value &val) const { return DA.isDivergent(val); } +bool GPUDivergenceAnalysis::isDivergentUse(const Use &use) const { + return DA.isDivergentUse(use); +} + void GPUDivergenceAnalysis::print(raw_ostream &OS, const Module *mod) const { OS << "Divergence of kernel " << DA.getFunction().getName() << " {\n"; DA.print(OS, mod); diff --git a/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp b/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp index 52212e1c42a..2fd8b8d6e1d 100644 --- a/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp +++ b/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp @@ -93,8 +93,9 @@ namespace { class DivergencePropagator { public: DivergencePropagator(Function &F, TargetTransformInfo &TTI, DominatorTree &DT, - PostDominatorTree &PDT, DenseSet<const Value *> &DV) - : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV) {} + PostDominatorTree &PDT, DenseSet<const Value *> &DV, + DenseSet<const Use *> &DU) + : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV), DU(DU) {} void populateWithSourcesOfDivergence(); void propagate(); @@ -118,11 +119,14 @@ private: PostDominatorTree &PDT; std::vector<Value *> Worklist; // Stack for DFS. DenseSet<const Value *> &DV; // Stores all divergent values. + DenseSet<const Use *> &DU; // Stores divergent uses of possibly uniform + // values. }; void DivergencePropagator::populateWithSourcesOfDivergence() { Worklist.clear(); DV.clear(); + DU.clear(); for (auto &I : instructions(F)) { if (TTI.isSourceOfDivergence(&I)) { Worklist.push_back(&I); @@ -197,8 +201,10 @@ void DivergencePropagator::exploreSyncDependency(Instruction *TI) { // dominators of TI until it is outside the influence region. BasicBlock *InfluencedBB = ThisBB; while (InfluenceRegion.count(InfluencedBB)) { - for (auto &I : *InfluencedBB) - findUsersOutsideInfluenceRegion(I, InfluenceRegion); + for (auto &I : *InfluencedBB) { + if (!DV.count(&I)) + findUsersOutsideInfluenceRegion(I, InfluenceRegion); + } DomTreeNode *IDomNode = DT.getNode(InfluencedBB)->getIDom(); if (IDomNode == nullptr) break; @@ -208,9 +214,10 @@ void DivergencePropagator::exploreSyncDependency(Instruction *TI) { void DivergencePropagator::findUsersOutsideInfluenceRegion( Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion) { - for (User *U : I.users()) { - Instruction *UserInst = cast<Instruction>(U); + for (Use &Use : I.uses()) { + Instruction *UserInst = cast<Instruction>(Use.getUser()); if (!InfluenceRegion.count(UserInst->getParent())) { + DU.insert(&Use); if (DV.insert(UserInst).second) Worklist.push_back(UserInst); } @@ -320,6 +327,7 @@ bool LegacyDivergenceAnalysis::runOnFunction(Function &F) { return false; DivergentValues.clear(); + DivergentUses.clear(); gpuDA = nullptr; auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); @@ -332,7 +340,7 @@ bool LegacyDivergenceAnalysis::runOnFunction(Function &F) { } else { // run LLVM's existing DivergenceAnalysis - DivergencePropagator DP(F, TTI, DT, PDT, DivergentValues); + DivergencePropagator DP(F, TTI, DT, PDT, DivergentValues, DivergentUses); DP.populateWithSourcesOfDivergence(); DP.propagate(); } @@ -351,6 +359,13 @@ bool LegacyDivergenceAnalysis::isDivergent(const Value *V) const { return DivergentValues.count(V); } +bool LegacyDivergenceAnalysis::isDivergentUse(const Use *U) const { + if (gpuDA) { + return gpuDA->isDivergentUse(*U); + } + return DivergentValues.count(U->get()) || DivergentUses.count(U); +} + void LegacyDivergenceAnalysis::print(raw_ostream &OS, const Module *) const { if ((!gpuDA || !gpuDA->hasDivergence()) && DivergentValues.empty()) return; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp index de974011a91..92d94ee894a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp @@ -142,11 +142,11 @@ void AMDGPUAtomicOptimizer::visitAtomicRMWInst(AtomicRMWInst &I) { // If the pointer operand is divergent, then each lane is doing an atomic // operation on a different address, and we cannot optimize that. - if (DA->isDivergent(I.getOperand(PtrIdx))) { + if (DA->isDivergentUse(&I.getOperandUse(PtrIdx))) { return; } - const bool ValDivergent = DA->isDivergent(I.getOperand(ValIdx)); + const bool ValDivergent = DA->isDivergentUse(&I.getOperandUse(ValIdx)); // If the value operand is divergent, each lane is contributing a different // value to the atomic calculation. We can only optimize divergent values if @@ -219,7 +219,7 @@ void AMDGPUAtomicOptimizer::visitIntrinsicInst(IntrinsicInst &I) { const unsigned ValIdx = 0; - const bool ValDivergent = DA->isDivergent(I.getOperand(ValIdx)); + const bool ValDivergent = DA->isDivergentUse(&I.getOperandUse(ValIdx)); // If the value operand is divergent, each lane is contributing a different // value to the atomic calculation. We can only optimize divergent values if @@ -232,7 +232,7 @@ void AMDGPUAtomicOptimizer::visitIntrinsicInst(IntrinsicInst &I) { // If any of the other arguments to the intrinsic are divergent, we can't // optimize the operation. for (unsigned Idx = 1; Idx < I.getNumOperands(); Idx++) { - if (DA->isDivergent(I.getOperand(Idx))) { + if (DA->isDivergentUse(&I.getOperandUse(Idx))) { return; } } diff --git a/llvm/test/CodeGen/AMDGPU/divergence-at-use.ll b/llvm/test/CodeGen/AMDGPU/divergence-at-use.ll new file mode 100644 index 00000000000..1aba1e4934d --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/divergence-at-use.ll @@ -0,0 +1,20 @@ +; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-atomic-optimizations=true < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-atomic-optimizations=true < %s -use-gpu-divergence-analysis | FileCheck %s + +@local = addrspace(3) global i32 undef + +define void @reducible(i32 %x) { +; CHECK-LABEL: reducible: +; CHECK-NOT: dpp +entry: + br label %loop +loop: + %i = phi i32 [ 0, %entry ], [ %i1, %loop ] + %gep = getelementptr i32, i32 addrspace(3)* @local, i32 %i + %cond = icmp ult i32 %i, %x + %i1 = add i32 %i, 1 + br i1 %cond, label %loop, label %exit +exit: + %old = atomicrmw add i32 addrspace(3)* %gep, i32 %x acq_rel + ret void +} |

