diff options
author | Artem Belevich <tra@google.com> | 2016-03-02 18:28:50 +0000 |
---|---|---|
committer | Artem Belevich <tra@google.com> | 2016-03-02 18:28:50 +0000 |
commit | 42e1949b4649c2ecbc9a13ca8b56f902b5214b95 (patch) | |
tree | 039124b3490f5cb0926e371b806aa50e94fd4a2d /clang/lib/CodeGen/CGCUDANV.cpp | |
parent | cdf3a2a5be7fb4c650ae30a44200248980e214ed (diff) | |
download | bcm5719-llvm-42e1949b4649c2ecbc9a13ca8b56f902b5214b95.tar.gz bcm5719-llvm-42e1949b4649c2ecbc9a13ca8b56f902b5214b95.zip |
[CUDA] Emit host-side 'shadows' for device-side global variables
... and register them with CUDA runtime.
This is needed for commonly used cudaMemcpy*() APIs that use address of
host-side shadow to access their counterparts on device side.
Fixes PR26340
Differential Revision: http://reviews.llvm.org/D17779
llvm-svn: 262498
Diffstat (limited to 'clang/lib/CodeGen/CGCUDANV.cpp')
-rw-r--r-- | clang/lib/CodeGen/CGCUDANV.cpp | 66 |
1 files changed, 51 insertions, 15 deletions
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp index 9dd7928bcf9..f0ecb57c714 100644 --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -38,6 +38,7 @@ private: llvm::Module &TheModule; /// Keeps track of kernel launch stubs emitted in this module llvm::SmallVector<llvm::Function *, 16> EmittedKernels; + llvm::SmallVector<std::pair<llvm::GlobalVariable *, unsigned>, 16> DeviceVars; /// Keeps track of variables containing handles of GPU binaries. Populated by /// ModuleCtorFunction() and used to create corresponding cleanup calls in /// ModuleDtorFunction() @@ -47,7 +48,7 @@ private: llvm::Constant *getLaunchFn() const; /// Creates a function to register all kernel stubs generated in this module. - llvm::Function *makeRegisterKernelsFn(); + llvm::Function *makeRegisterGlobalsFn(); /// Helper function that generates a constant string and returns a pointer to /// the start of the string. The result of this function can be used anywhere @@ -68,6 +69,10 @@ public: CGNVCUDARuntime(CodeGenModule &CGM); void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override; + void registerDeviceVar(llvm::GlobalVariable &Var, unsigned Flags) override { + DeviceVars.push_back(std::make_pair(&Var, Flags)); + } + /// Creates module constructor function llvm::Function *makeModuleCtorFunction() override; /// Creates module destructor function @@ -158,19 +163,24 @@ void CGNVCUDARuntime::emitDeviceStubBody(CodeGenFunction &CGF, CGF.EmitBlock(EndBlock); } -/// Creates internal function to register all kernel stubs generated in this -/// module with the CUDA runtime. +/// Creates a function that sets up state on the host side for CUDA objects that +/// have a presence on both the host and device sides. Specifically, registers +/// the host side of kernel functions and device global variables with the CUDA +/// runtime. /// \code -/// void __cuda_register_kernels(void** GpuBinaryHandle) { +/// void __cuda_register_globals(void** GpuBinaryHandle) { /// __cudaRegisterFunction(GpuBinaryHandle,Kernel0,...); /// ... /// __cudaRegisterFunction(GpuBinaryHandle,KernelM,...); +/// __cudaRegisterVar(GpuBinaryHandle, GlobalVar0, ...); +/// ... +/// __cudaRegisterVar(GpuBinaryHandle, GlobalVarN, ...); /// } /// \endcode -llvm::Function *CGNVCUDARuntime::makeRegisterKernelsFn() { +llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { llvm::Function *RegisterKernelsFunc = llvm::Function::Create( llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false), - llvm::GlobalValue::InternalLinkage, "__cuda_register_kernels", &TheModule); + llvm::GlobalValue::InternalLinkage, "__cuda_register_globals", &TheModule); llvm::BasicBlock *EntryBB = llvm::BasicBlock::Create(Context, "entry", RegisterKernelsFunc); CGBuilderTy Builder(CGM, Context); @@ -186,18 +196,44 @@ llvm::Function *CGNVCUDARuntime::makeRegisterKernelsFn() { "__cudaRegisterFunction"); // Extract GpuBinaryHandle passed as the first argument passed to - // __cuda_register_kernels() and generate __cudaRegisterFunction() call for + // __cuda_register_globals() and generate __cudaRegisterFunction() call for // each emitted kernel. llvm::Argument &GpuBinaryHandlePtr = *RegisterKernelsFunc->arg_begin(); for (llvm::Function *Kernel : EmittedKernels) { llvm::Constant *KernelName = makeConstantString(Kernel->getName()); llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy); - llvm::Value *args[] = { + llvm::Value *Args[] = { &GpuBinaryHandlePtr, Builder.CreateBitCast(Kernel, VoidPtrTy), KernelName, KernelName, llvm::ConstantInt::get(IntTy, -1), NullPtr, NullPtr, NullPtr, NullPtr, llvm::ConstantPointerNull::get(IntTy->getPointerTo())}; - Builder.CreateCall(RegisterFunc, args); + Builder.CreateCall(RegisterFunc, Args); + } + + // void __cudaRegisterVar(void **, char *, char *, const char *, + // int, int, int, int) + std::vector<llvm::Type *> RegisterVarParams = { + VoidPtrPtrTy, CharPtrTy, CharPtrTy, CharPtrTy, + IntTy, IntTy, IntTy, IntTy}; + llvm::Constant *RegisterVar = CGM.CreateRuntimeFunction( + llvm::FunctionType::get(IntTy, RegisterVarParams, false), + "__cudaRegisterVar"); + for (auto &Pair : DeviceVars) { + llvm::GlobalVariable *Var = Pair.first; + unsigned Flags = Pair.second; + llvm::Constant *VarName = makeConstantString(Var->getName()); + uint64_t VarSize = + CGM.getDataLayout().getTypeAllocSize(Var->getValueType()); + llvm::Value *Args[] = { + &GpuBinaryHandlePtr, + Builder.CreateBitCast(Var, VoidPtrTy), + VarName, + VarName, + llvm::ConstantInt::get(IntTy, (Flags & ExternDeviceVar) ? 1 : 0), + llvm::ConstantInt::get(IntTy, VarSize), + llvm::ConstantInt::get(IntTy, (Flags & ConstantDeviceVar) ? 1 : 0), + llvm::ConstantInt::get(IntTy, 0)}; + Builder.CreateCall(RegisterVar, Args); } Builder.CreateRetVoid(); @@ -208,15 +244,15 @@ llvm::Function *CGNVCUDARuntime::makeRegisterKernelsFn() { /// \code /// void __cuda_module_ctor(void*) { /// Handle0 = __cudaRegisterFatBinary(GpuBinaryBlob0); -/// __cuda_register_kernels(Handle0); +/// __cuda_register_globals(Handle0); /// ... /// HandleN = __cudaRegisterFatBinary(GpuBinaryBlobN); -/// __cuda_register_kernels(HandleN); +/// __cuda_register_globals(HandleN); /// } /// \endcode llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { - // void __cuda_register_kernels(void* handle); - llvm::Function *RegisterKernelsFunc = makeRegisterKernelsFn(); + // void __cuda_register_globals(void* handle); + llvm::Function *RegisterGlobalsFunc = makeRegisterGlobalsFn(); // void ** __cudaRegisterFatBinary(void *); llvm::Constant *RegisterFatbinFunc = CGM.CreateRuntimeFunction( llvm::FunctionType::get(VoidPtrPtrTy, VoidPtrTy, false), @@ -272,8 +308,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle, CGM.getPointerAlign()); - // Call __cuda_register_kernels(GpuBinaryHandle); - CtorBuilder.CreateCall(RegisterKernelsFunc, RegisterFatbinCall); + // Call __cuda_register_globals(GpuBinaryHandle); + CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall); // Save GpuBinaryHandle so we can unregister it in destructor. GpuBinaryHandles.push_back(GpuBinaryHandle); |