diff options
| author | Yaxun Liu <Yaxun.Liu@amd.com> | 2018-04-11 14:46:15 +0000 |
|---|---|---|
| committer | Yaxun Liu <Yaxun.Liu@amd.com> | 2018-04-11 14:46:15 +0000 |
| commit | 9381ae9791d57dd09fa10c22d52a17ca2bbcd4b2 (patch) | |
| tree | bf8c73e25d171d2d6f509c9fbd925493a0e23e91 /llvm/lib | |
| parent | 2f326d453feea698996c1c3f104a92e4354cd40f (diff) | |
| download | bcm5719-llvm-9381ae9791d57dd09fa10c22d52a17ca2bbcd4b2.tar.gz bcm5719-llvm-9381ae9791d57dd09fa10c22d52a17ca2bbcd4b2.zip | |
[AMDGPU] Fix lowering enqueue_kernel
Two issues were fixed:
runtime has difficulty to allocate memory for an external symbol of a
kernel and set the address of the external symbol, therefore make the runtime
handle of an enqueued kernel an ordinary global variable. Runtime only needs
to store the address of the loaded kernel to the handle and has verified
that this approach works.
handle the situation where __enqueue_kernel* gets inlined therefore
the enqueued kernel may be used through a constant expr instead
of an instruction.
Differential Revision: https://reviews.llvm.org/D45187
llvm-svn: 329815
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp | 50 |
1 files changed, 30 insertions, 20 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp index 102a88074df..514670af2d0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp @@ -81,14 +81,27 @@ static void collectCallers(Function *F, DenseSet<Function *> &Callers) { for (auto U : F->users()) { if (auto *CI = dyn_cast<CallInst>(&*U)) { auto *Caller = CI->getParent()->getParent(); - if (Callers.count(Caller)) - continue; - Callers.insert(Caller); - collectCallers(Caller, Callers); + if (Callers.insert(Caller).second) + collectCallers(Caller, Callers); } } } +/// If \p U is instruction or constant, collect functions which directly or +/// indirectly use it. +static void collectFunctionUsers(User *U, DenseSet<Function *> &Funcs) { + if (auto *I = dyn_cast<Instruction>(U)) { + auto *F = I->getParent()->getParent(); + if (Funcs.insert(F).second) + collectCallers(F, Funcs); + return; + } + if (!isa<Constant>(U)) + return; + for (auto UU : U->users()) + collectFunctionUsers(&*UU, Funcs); +} + bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) { DenseSet<Function *> Callers; auto &C = M.getContext(); @@ -101,32 +114,28 @@ bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) { M.getDataLayout()); F.setName(Name); } + DEBUG(dbgs() << "found enqueued kernel: " << F.getName() << '\n'); auto RuntimeHandle = (F.getName() + ".runtime_handle").str(); + auto T = Type::getInt8Ty(C)->getPointerTo(AMDGPUAS::GLOBAL_ADDRESS); auto *GV = new GlobalVariable( - M, Type::getInt8Ty(C)->getPointerTo(AMDGPUAS::GLOBAL_ADDRESS), - /*IsConstant=*/true, GlobalValue::ExternalLinkage, - /*Initializer=*/nullptr, RuntimeHandle, /*InsertBefore=*/nullptr, - GlobalValue::NotThreadLocal, AMDGPUAS::GLOBAL_ADDRESS, - /*IsExternallyInitialized=*/true); + M, T, + /*IsConstant=*/false, GlobalValue::ExternalLinkage, + /*Initializer=*/Constant::getNullValue(T), RuntimeHandle, + /*InsertBefore=*/nullptr, GlobalValue::NotThreadLocal, + AMDGPUAS::GLOBAL_ADDRESS, + /*IsExternallyInitialized=*/false); DEBUG(dbgs() << "runtime handle created: " << *GV << '\n'); for (auto U : F.users()) { - if (!isa<ConstantExpr>(&*U)) + auto *UU = &*U; + if (!isa<ConstantExpr>(UU)) continue; - auto *BitCast = cast<ConstantExpr>(&*U); + collectFunctionUsers(UU, Callers); + auto *BitCast = cast<ConstantExpr>(UU); auto *NewPtr = ConstantExpr::getPointerCast(GV, BitCast->getType()); BitCast->replaceAllUsesWith(NewPtr); F.addFnAttr("runtime-handle", RuntimeHandle); F.setLinkage(GlobalValue::ExternalLinkage); - - // Collect direct or indirect callers of enqueue_kernel. - for (auto U : NewPtr->users()) { - if (auto *I = dyn_cast<Instruction>(&*U)) { - auto *F = I->getParent()->getParent(); - Callers.insert(F); - collectCallers(F, Callers); - } - } Changed = true; } } @@ -136,6 +145,7 @@ bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) { if (F->getCallingConv() != CallingConv::AMDGPU_KERNEL) continue; F->addFnAttr("calls-enqueue-kernel"); + DEBUG(dbgs() << "mark enqueue_kernel caller:" << F->getName() << '\n'); } return Changed; } |

