summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp94
1 files changed, 89 insertions, 5 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
index 16815affbf9..c011be6fa16 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
@@ -15,7 +15,10 @@
#include "AMDGPU.h"
#include "AMDGPUIntrinsicInfo.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/Support/Debug.h"
@@ -30,6 +33,10 @@ namespace {
class AMDGPUAnnotateUniformValues : public FunctionPass,
public InstVisitor<AMDGPUAnnotateUniformValues> {
DivergenceAnalysis *DA;
+ MemoryDependenceResults *MDR;
+ LoopInfo *LI;
+ DenseMap<Value*, GetElementPtrInst*> noClobberClones;
+ bool isKernelFunc;
public:
static char ID;
@@ -42,12 +49,14 @@ public:
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DivergenceAnalysis>();
+ AU.addRequired<MemoryDependenceWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
AU.setPreservesAll();
}
void visitBranchInst(BranchInst &I);
void visitLoadInst(LoadInst &I);
-
+ bool isClobberedInFunction(LoadInst * Load);
};
} // End anonymous namespace
@@ -55,6 +64,8 @@ public:
INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
"Add AMDGPU uniform metadata", false, false)
INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
"Add AMDGPU uniform metadata", false, false)
@@ -63,6 +74,46 @@ char AMDGPUAnnotateUniformValues::ID = 0;
static void setUniformMetadata(Instruction *I) {
I->setMetadata("amdgpu.uniform", MDNode::get(I->getContext(), {}));
}
+static void setNoClobberMetadata(Instruction *I) {
+ I->setMetadata("amdgpu.noclobber", MDNode::get(I->getContext(), {}));
+}
+
+static void DFS(BasicBlock *Root, SetVector<BasicBlock*> & Set) {
+ for (auto I : predecessors(Root))
+ if (Set.insert(I))
+ DFS(I, Set);
+}
+
+bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) {
+ // 1. get Loop for the Load->getparent();
+ // 2. if it exists, collect all the BBs from the most outer
+ // loop and check for the writes. If NOT - start DFS over all preds.
+ // 3. Start DFS over all preds from the most outer loop header.
+ SetVector<BasicBlock *> Checklist;
+ BasicBlock *Start = Load->getParent();
+ Checklist.insert(Start);
+ const Value *Ptr = Load->getPointerOperand();
+ const Loop *L = LI->getLoopFor(Start);
+ if (L) {
+ const Loop *P = L;
+ do {
+ L = P;
+ P = P->getParentLoop();
+ } while (P);
+ Checklist.insert(L->block_begin(), L->block_end());
+ Start = L->getHeader();
+ }
+
+ DFS(Start, Checklist);
+ for (auto &BB : Checklist) {
+ BasicBlock::iterator StartIt = (BB == Load->getParent()) ?
+ BasicBlock::iterator(Load) : BB->end();
+ if (MDR->getPointerDependencyFrom(MemoryLocation(Ptr),
+ true, StartIt, BB, Load).isClobber())
+ return true;
+ }
+ return false;
+}
void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) {
if (I.isUnconditional())
@@ -79,10 +130,39 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
Value *Ptr = I.getPointerOperand();
if (!DA->isUniform(Ptr))
return;
+ auto isGlobalLoad = [](LoadInst &Load)->bool {
+ return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
+ };
+ // We're tracking up to the Function boundaries
+ // We cannot go beyond because of FunctionPass restrictions
+ // Thus we can ensure that memory not clobbered for memory
+ // operations that live in kernel only.
+ bool NotClobbered = isKernelFunc && !isClobberedInFunction(&I);
+ Instruction *PtrI = dyn_cast<Instruction>(Ptr);
+ if (!PtrI && NotClobbered && isGlobalLoad(I)) {
+ if (isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) {
+ // Lookup for the existing GEP
+ if (noClobberClones.count(Ptr)) {
+ PtrI = noClobberClones[Ptr];
+ } else {
+ // Create GEP of the Value
+ Function *F = I.getParent()->getParent();
+ Value *Idx = Constant::getIntegerValue(
+ Type::getInt32Ty(Ptr->getContext()), APInt(64, 0));
+ // Insert GEP at the entry to make it dominate all uses
+ PtrI = GetElementPtrInst::Create(
+ Ptr->getType()->getPointerElementType(), Ptr,
+ ArrayRef<Value*>(Idx), Twine(""), F->getEntryBlock().getFirstNonPHI());
+ }
+ I.replaceUsesOfWith(Ptr, PtrI);
+ }
+ }
- if (Instruction *PtrI = dyn_cast<Instruction>(Ptr))
+ if (PtrI) {
setUniformMetadata(PtrI);
-
+ if (NotClobbered)
+ setNoClobberMetadata(PtrI);
+ }
}
bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) {
@@ -93,9 +173,13 @@ bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
- DA = &getAnalysis<DivergenceAnalysis>();
- visit(F);
+ DA = &getAnalysis<DivergenceAnalysis>();
+ MDR = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ isKernelFunc = F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
+ visit(F);
+ noClobberClones.clear();
return true;
}
OpenPOWER on IntegriCloud