summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorYaxun Liu <Yaxun.Liu@amd.com>2018-04-11 14:46:15 +0000
committerYaxun Liu <Yaxun.Liu@amd.com>2018-04-11 14:46:15 +0000
commit9381ae9791d57dd09fa10c22d52a17ca2bbcd4b2 (patch)
treebf8c73e25d171d2d6f509c9fbd925493a0e23e91 /llvm/lib
parent2f326d453feea698996c1c3f104a92e4354cd40f (diff)
downloadbcm5719-llvm-9381ae9791d57dd09fa10c22d52a17ca2bbcd4b2.tar.gz
bcm5719-llvm-9381ae9791d57dd09fa10c22d52a17ca2bbcd4b2.zip
[AMDGPU] Fix lowering enqueue_kernel
Two issues were fixed: runtime has difficulty to allocate memory for an external symbol of a kernel and set the address of the external symbol, therefore make the runtime handle of an enqueued kernel an ordinary global variable. Runtime only needs to store the address of the loaded kernel to the handle and has verified that this approach works. handle the situation where __enqueue_kernel* gets inlined therefore the enqueued kernel may be used through a constant expr instead of an instruction. Differential Revision: https://reviews.llvm.org/D45187 llvm-svn: 329815
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp50
1 files changed, 30 insertions, 20 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
index 102a88074df..514670af2d0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
@@ -81,14 +81,27 @@ static void collectCallers(Function *F, DenseSet<Function *> &Callers) {
for (auto U : F->users()) {
if (auto *CI = dyn_cast<CallInst>(&*U)) {
auto *Caller = CI->getParent()->getParent();
- if (Callers.count(Caller))
- continue;
- Callers.insert(Caller);
- collectCallers(Caller, Callers);
+ if (Callers.insert(Caller).second)
+ collectCallers(Caller, Callers);
}
}
}
+/// If \p U is instruction or constant, collect functions which directly or
+/// indirectly use it.
+static void collectFunctionUsers(User *U, DenseSet<Function *> &Funcs) {
+ if (auto *I = dyn_cast<Instruction>(U)) {
+ auto *F = I->getParent()->getParent();
+ if (Funcs.insert(F).second)
+ collectCallers(F, Funcs);
+ return;
+ }
+ if (!isa<Constant>(U))
+ return;
+ for (auto UU : U->users())
+ collectFunctionUsers(&*UU, Funcs);
+}
+
bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) {
DenseSet<Function *> Callers;
auto &C = M.getContext();
@@ -101,32 +114,28 @@ bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) {
M.getDataLayout());
F.setName(Name);
}
+ DEBUG(dbgs() << "found enqueued kernel: " << F.getName() << '\n');
auto RuntimeHandle = (F.getName() + ".runtime_handle").str();
+ auto T = Type::getInt8Ty(C)->getPointerTo(AMDGPUAS::GLOBAL_ADDRESS);
auto *GV = new GlobalVariable(
- M, Type::getInt8Ty(C)->getPointerTo(AMDGPUAS::GLOBAL_ADDRESS),
- /*IsConstant=*/true, GlobalValue::ExternalLinkage,
- /*Initializer=*/nullptr, RuntimeHandle, /*InsertBefore=*/nullptr,
- GlobalValue::NotThreadLocal, AMDGPUAS::GLOBAL_ADDRESS,
- /*IsExternallyInitialized=*/true);
+ M, T,
+ /*IsConstant=*/false, GlobalValue::ExternalLinkage,
+ /*Initializer=*/Constant::getNullValue(T), RuntimeHandle,
+ /*InsertBefore=*/nullptr, GlobalValue::NotThreadLocal,
+ AMDGPUAS::GLOBAL_ADDRESS,
+ /*IsExternallyInitialized=*/false);
DEBUG(dbgs() << "runtime handle created: " << *GV << '\n');
for (auto U : F.users()) {
- if (!isa<ConstantExpr>(&*U))
+ auto *UU = &*U;
+ if (!isa<ConstantExpr>(UU))
continue;
- auto *BitCast = cast<ConstantExpr>(&*U);
+ collectFunctionUsers(UU, Callers);
+ auto *BitCast = cast<ConstantExpr>(UU);
auto *NewPtr = ConstantExpr::getPointerCast(GV, BitCast->getType());
BitCast->replaceAllUsesWith(NewPtr);
F.addFnAttr("runtime-handle", RuntimeHandle);
F.setLinkage(GlobalValue::ExternalLinkage);
-
- // Collect direct or indirect callers of enqueue_kernel.
- for (auto U : NewPtr->users()) {
- if (auto *I = dyn_cast<Instruction>(&*U)) {
- auto *F = I->getParent()->getParent();
- Callers.insert(F);
- collectCallers(F, Callers);
- }
- }
Changed = true;
}
}
@@ -136,6 +145,7 @@ bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) {
if (F->getCallingConv() != CallingConv::AMDGPU_KERNEL)
continue;
F->addFnAttr("calls-enqueue-kernel");
+ DEBUG(dbgs() << "mark enqueue_kernel caller:" << F->getName() << '\n');
}
return Changed;
}
OpenPOWER on IntegriCloud