diff options
| author | Yaxun Liu <Yaxun.Liu@amd.com> | 2019-02-14 02:00:09 +0000 |
|---|---|---|
| committer | Yaxun Liu <Yaxun.Liu@amd.com> | 2019-02-14 02:00:09 +0000 |
| commit | c18e9ecd4fc1f8bce7cf1ad0a63cb64a3fc40040 (patch) | |
| tree | 7587a4dc5e7e28987dd81c73da25540c776cb260 /clang/lib | |
| parent | 1d158dd9301a0622b152b23b97e7ded56caf552f (diff) | |
| download | bcm5719-llvm-c18e9ecd4fc1f8bce7cf1ad0a63cb64a3fc40040.tar.gz bcm5719-llvm-c18e9ecd4fc1f8bce7cf1ad0a63cb64a3fc40040.zip | |
[CUDA][HIP] Use device side kernel and variable names when registering them
__hipRegisterFunction and __hipRegisterVar need to accept device side kernel and variable names
so that HIP runtime can associate kernel stub functions in host code with kernel symbols in fat binaries,
and associate shadow variables in host code with device variables in fat binaries.
Currently, clang assumes kernel functions and device variables have the same name as the kernel
stub functions and shadow variables. However, when host is compiled in windows with MSVC C++
ABI and device is compiled with Itanium C++ ABI (e.g. AMDGPU), kernels and device symbols in fat
binary are mangled differently than host.
This patch gets the device side kernel and variable name by mangling them in the mangle context
of aux target.
Differential Revision: https://reviews.llvm.org/D58163
llvm-svn: 354004
Diffstat (limited to 'clang/lib')
| -rw-r--r-- | clang/lib/AST/ASTContext.cpp | 6 | ||||
| -rw-r--r-- | clang/lib/CodeGen/CGCUDANV.cpp | 68 | ||||
| -rw-r--r-- | clang/lib/CodeGen/CGCUDARuntime.h | 4 | ||||
| -rw-r--r-- | clang/lib/CodeGen/CodeGenModule.cpp | 2 |
4 files changed, 61 insertions, 19 deletions
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 9c8117d4c49..6af0cf15184 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -9981,8 +9981,10 @@ VTableContextBase *ASTContext::getVTableContext() { return VTContext.get(); } -MangleContext *ASTContext::createMangleContext() { - switch (Target->getCXXABI().getKind()) { +MangleContext *ASTContext::createMangleContext(const TargetInfo *T) { + if (!T) + T = Target; + switch (T->getCXXABI().getKind()) { case TargetCXXABI::GenericAArch64: case TargetCXXABI::GenericItanium: case TargetCXXABI::GenericARM: diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp index 68e83b939ae..62661039a32 100644 --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -42,14 +42,25 @@ private: /// Convenience reference to the current module llvm::Module &TheModule; /// Keeps track of kernel launch stubs emitted in this module - llvm::SmallVector<llvm::Function *, 16> EmittedKernels; - llvm::SmallVector<std::pair<llvm::GlobalVariable *, unsigned>, 16> DeviceVars; + struct KernelInfo { + llvm::Function *Kernel; + const Decl *D; + }; + llvm::SmallVector<KernelInfo, 16> EmittedKernels; + struct VarInfo { + llvm::GlobalVariable *Var; + const VarDecl *D; + unsigned Flag; + }; + llvm::SmallVector<VarInfo, 16> DeviceVars; /// Keeps track of variable containing handle of GPU binary. Populated by /// ModuleCtorFunction() and used to create corresponding cleanup calls in /// ModuleDtorFunction() llvm::GlobalVariable *GpuBinaryHandle = nullptr; /// Whether we generate relocatable device code. bool RelocatableDeviceCode; + /// Mangle context for device. + std::unique_ptr<MangleContext> DeviceMC; llvm::FunctionCallee getSetupArgumentFn() const; llvm::FunctionCallee getLaunchFn() const; @@ -106,13 +117,15 @@ private: void emitDeviceStubBodyLegacy(CodeGenFunction &CGF, FunctionArgList &Args); void emitDeviceStubBodyNew(CodeGenFunction &CGF, FunctionArgList &Args); + std::string getDeviceSideName(const Decl *ND); public: CGNVCUDARuntime(CodeGenModule &CGM); void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override; - void registerDeviceVar(llvm::GlobalVariable &Var, unsigned Flags) override { - DeviceVars.push_back(std::make_pair(&Var, Flags)); + void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var, + unsigned Flags) override { + DeviceVars.push_back({&Var, VD, Flags}); } /// Creates module constructor function @@ -138,7 +151,9 @@ CGNVCUDARuntime::addUnderscoredPrefixToName(StringRef FuncName) const { CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) : CGCUDARuntime(CGM), Context(CGM.getLLVMContext()), TheModule(CGM.getModule()), - RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode) { + RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode), + DeviceMC(CGM.getContext().createMangleContext( + CGM.getContext().getAuxTargetInfo())) { CodeGen::CodeGenTypes &Types = CGM.getTypes(); ASTContext &Ctx = CGM.getContext(); @@ -187,9 +202,26 @@ llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy() const { return llvm::FunctionType::get(VoidTy, Params, false); } +std::string CGNVCUDARuntime::getDeviceSideName(const Decl *D) { + auto *ND = cast<const NamedDecl>(D); + std::string DeviceSideName; + if (DeviceMC->shouldMangleDeclName(ND)) { + SmallString<256> Buffer; + llvm::raw_svector_ostream Out(Buffer); + DeviceMC->mangleName(ND, Out); + DeviceSideName = Out.str(); + } else + DeviceSideName = ND->getIdentifier()->getName(); + return DeviceSideName; +} + void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) { - EmittedKernels.push_back(CGF.CurFn); + assert(getDeviceSideName(CGF.CurFuncDecl) == CGF.CurFn->getName() || + CGF.CGM.getContext().getTargetInfo().getCXXABI() != + CGF.CGM.getContext().getAuxTargetInfo()->getCXXABI()); + + EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl}); if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(), CudaFeature::CUDA_USES_NEW_LAUNCH)) emitDeviceStubBodyNew(CGF, Args); @@ -367,13 +399,19 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { // __cuda_register_globals() and generate __cudaRegisterFunction() call for // each emitted kernel. llvm::Argument &GpuBinaryHandlePtr = *RegisterKernelsFunc->arg_begin(); - for (llvm::Function *Kernel : EmittedKernels) { - llvm::Constant *KernelName = makeConstantString(Kernel->getName()); + for (auto &&I : EmittedKernels) { + llvm::Constant *KernelName = makeConstantString(getDeviceSideName(I.D)); llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy); llvm::Value *Args[] = { - &GpuBinaryHandlePtr, Builder.CreateBitCast(Kernel, VoidPtrTy), - KernelName, KernelName, llvm::ConstantInt::get(IntTy, -1), NullPtr, - NullPtr, NullPtr, NullPtr, + &GpuBinaryHandlePtr, + Builder.CreateBitCast(I.Kernel, VoidPtrTy), + KernelName, + KernelName, + llvm::ConstantInt::get(IntTy, -1), + NullPtr, + NullPtr, + NullPtr, + NullPtr, llvm::ConstantPointerNull::get(IntTy->getPointerTo())}; Builder.CreateCall(RegisterFunc, Args); } @@ -386,10 +424,10 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { llvm::FunctionCallee RegisterVar = CGM.CreateRuntimeFunction( llvm::FunctionType::get(IntTy, RegisterVarParams, false), addUnderscoredPrefixToName("RegisterVar")); - for (auto &Pair : DeviceVars) { - llvm::GlobalVariable *Var = Pair.first; - unsigned Flags = Pair.second; - llvm::Constant *VarName = makeConstantString(Var->getName()); + for (auto &&Info : DeviceVars) { + llvm::GlobalVariable *Var = Info.Var; + unsigned Flags = Info.Flag; + llvm::Constant *VarName = makeConstantString(getDeviceSideName(Info.D)); uint64_t VarSize = CGM.getDataLayout().getTypeAllocSize(Var->getValueType()); llvm::Value *Args[] = { diff --git a/clang/lib/CodeGen/CGCUDARuntime.h b/clang/lib/CodeGen/CGCUDARuntime.h index e0e096bdcf9..ada6734a564 100644 --- a/clang/lib/CodeGen/CGCUDARuntime.h +++ b/clang/lib/CodeGen/CGCUDARuntime.h @@ -23,6 +23,7 @@ class GlobalVariable; namespace clang { class CUDAKernelCallExpr; +class VarDecl; namespace CodeGen { @@ -52,7 +53,8 @@ public: /// Emits a kernel launch stub. virtual void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) = 0; - virtual void registerDeviceVar(llvm::GlobalVariable &Var, unsigned Flags) = 0; + virtual void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var, + unsigned Flags) = 0; /// Constructs and returns a module initialization function or nullptr if it's /// not needed. Must be called after all kernels have been emitted. diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index ece26deff01..972d2afa8e6 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -3635,7 +3635,7 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, // Extern global variables will be registered in the TU where they are // defined. if (!D->hasExternalStorage()) - getCUDARuntime().registerDeviceVar(*GV, Flags); + getCUDARuntime().registerDeviceVar(D, *GV, Flags); } else if (D->hasAttr<CUDASharedAttr>()) // __shared__ variables are odd. Shadows do get created, but // they are not registered with the CUDA runtime, so they |

