diff options
author | Artem Belevich <tra@google.com> | 2016-03-02 18:28:53 +0000 |
---|---|---|
committer | Artem Belevich <tra@google.com> | 2016-03-02 18:28:53 +0000 |
commit | 8c1ec1ef38e361c76a7f71f8078e21220619cca4 (patch) | |
tree | 1e1af98956cb3842e76ed6f566c612f08738d702 /clang/lib/CodeGen/CGCUDANV.cpp | |
parent | 42e1949b4649c2ecbc9a13ca8b56f902b5214b95 (diff) | |
download | bcm5719-llvm-8c1ec1ef38e361c76a7f71f8078e21220619cca4.tar.gz bcm5719-llvm-8c1ec1ef38e361c76a7f71f8078e21220619cca4.zip |
[CUDA] Do not generate unnecessary runtime init code.
Differential Revision: http://reviews.llvm.org/D17780
llvm-svn: 262499
Diffstat (limited to 'clang/lib/CodeGen/CGCUDANV.cpp')
-rw-r--r-- | clang/lib/CodeGen/CGCUDANV.cpp | 15 |
1 files changed, 14 insertions, 1 deletions
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp index f0ecb57c714..c6788091cac 100644 --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -178,6 +178,10 @@ void CGNVCUDARuntime::emitDeviceStubBody(CodeGenFunction &CGF, /// } /// \endcode llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { + // No need to register anything + if (EmittedKernels.empty() && DeviceVars.empty()) + return nullptr; + llvm::Function *RegisterKernelsFunc = llvm::Function::Create( llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false), llvm::GlobalValue::InternalLinkage, "__cuda_register_globals", &TheModule); @@ -251,6 +255,10 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { /// } /// \endcode llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { + // No need to generate ctors/dtors if there are no GPU binaries. + if (CGM.getCodeGenOpts().CudaGpuBinaryFileNames.empty()) + return nullptr; + // void __cuda_register_globals(void* handle); llvm::Function *RegisterGlobalsFunc = makeRegisterGlobalsFn(); // void ** __cudaRegisterFatBinary(void *); @@ -309,7 +317,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { CGM.getPointerAlign()); // Call __cuda_register_globals(GpuBinaryHandle); - CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall); + if (RegisterGlobalsFunc) + CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall); // Save GpuBinaryHandle so we can unregister it in destructor. GpuBinaryHandles.push_back(GpuBinaryHandle); @@ -329,6 +338,10 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { /// } /// \endcode llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() { + // No need for destructor if we don't have handles to unregister. + if (GpuBinaryHandles.empty()) + return nullptr; + // void __cudaUnregisterFatBinary(void ** handle); llvm::Constant *UnregisterFatbinFunc = CGM.CreateRuntimeFunction( llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false), |