summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen/CGCUDANV.cpp
diff options
context:
space:
mode:
authorArtem Belevich <tra@google.com>2016-03-02 18:28:53 +0000
committerArtem Belevich <tra@google.com>2016-03-02 18:28:53 +0000
commit8c1ec1ef38e361c76a7f71f8078e21220619cca4 (patch)
tree1e1af98956cb3842e76ed6f566c612f08738d702 /clang/lib/CodeGen/CGCUDANV.cpp
parent42e1949b4649c2ecbc9a13ca8b56f902b5214b95 (diff)
downloadbcm5719-llvm-8c1ec1ef38e361c76a7f71f8078e21220619cca4.tar.gz
bcm5719-llvm-8c1ec1ef38e361c76a7f71f8078e21220619cca4.zip
[CUDA] Do not generate unnecessary runtime init code.
Differential Revision: http://reviews.llvm.org/D17780 llvm-svn: 262499
Diffstat (limited to 'clang/lib/CodeGen/CGCUDANV.cpp')
-rw-r--r--clang/lib/CodeGen/CGCUDANV.cpp15
1 files changed, 14 insertions, 1 deletions
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index f0ecb57c714..c6788091cac 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -178,6 +178,10 @@ void CGNVCUDARuntime::emitDeviceStubBody(CodeGenFunction &CGF,
/// }
/// \endcode
llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
+ // No need to register anything
+ if (EmittedKernels.empty() && DeviceVars.empty())
+ return nullptr;
+
llvm::Function *RegisterKernelsFunc = llvm::Function::Create(
llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
llvm::GlobalValue::InternalLinkage, "__cuda_register_globals", &TheModule);
@@ -251,6 +255,10 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
/// }
/// \endcode
llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
+ // No need to generate ctors/dtors if there are no GPU binaries.
+ if (CGM.getCodeGenOpts().CudaGpuBinaryFileNames.empty())
+ return nullptr;
+
// void __cuda_register_globals(void* handle);
llvm::Function *RegisterGlobalsFunc = makeRegisterGlobalsFn();
// void ** __cudaRegisterFatBinary(void *);
@@ -309,7 +317,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
CGM.getPointerAlign());
// Call __cuda_register_globals(GpuBinaryHandle);
- CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall);
+ if (RegisterGlobalsFunc)
+ CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall);
// Save GpuBinaryHandle so we can unregister it in destructor.
GpuBinaryHandles.push_back(GpuBinaryHandle);
@@ -329,6 +338,10 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
/// }
/// \endcode
llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {
+ // No need for destructor if we don't have handles to unregister.
+ if (GpuBinaryHandles.empty())
+ return nullptr;
+
// void __cudaUnregisterFatBinary(void ** handle);
llvm::Constant *UnregisterFatbinFunc = CGM.CreateRuntimeFunction(
llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
OpenPOWER on IntegriCloud