diff options
author | Aaron Enye Shi <enye.shi@gmail.com> | 2019-04-02 20:49:41 +0000 |
---|---|---|
committer | Aaron Enye Shi <enye.shi@gmail.com> | 2019-04-02 20:49:41 +0000 |
commit | 8129521318accc44c2a009647572f6ebd3fc56dd (patch) | |
tree | 6661b70fd54aaa9c86d032af4904882b1e307640 | |
parent | 19775a4c673aa94f414448818ad28d8a3235acb6 (diff) | |
download | bcm5719-llvm-8129521318accc44c2a009647572f6ebd3fc56dd.tar.gz bcm5719-llvm-8129521318accc44c2a009647572f6ebd3fc56dd.zip |
[HIP-Clang] Fat binary should not be produced for non GPU code 2
Also for CUDA, we need to disable producing these fat binary functions when there is no GPU code.
Reviewers: yaxunl, tra
Differential Revision: https://reviews.llvm.org/D60141
llvm-svn: 357526
-rw-r--r-- | clang/lib/CodeGen/CGCUDANV.cpp | 4 | ||||
-rw-r--r-- | clang/test/CodeGenCUDA/device-stub.cu | 16 |
2 files changed, 8 insertions, 12 deletions
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp index cd969dc8972..913d58e0178 100644 --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -468,11 +468,13 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { /// \endcode llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { bool IsHIP = CGM.getLangOpts().HIP; + bool IsCUDA = CGM.getLangOpts().CUDA; // No need to generate ctors/dtors if there is no GPU binary. StringRef CudaGpuBinaryFileName = CGM.getCodeGenOpts().CudaGpuBinaryFileName; if (CudaGpuBinaryFileName.empty() && !IsHIP) return nullptr; - if (IsHIP && EmittedKernels.empty() && DeviceVars.empty()) + if ( (IsHIP || (IsCUDA && !RelocatableDeviceCode) ) + && EmittedKernels.empty() && DeviceVars.empty()) return nullptr; // void __{cuda|hip}_register_globals(void* handle); diff --git a/clang/test/CodeGenCUDA/device-stub.cu b/clang/test/CodeGenCUDA/device-stub.cu index 00d7fa4c5c8..9db5738cded 100644 --- a/clang/test/CodeGenCUDA/device-stub.cu +++ b/clang/test/CodeGenCUDA/device-stub.cu @@ -227,20 +227,14 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); } // There should be no __[[PREFIX]]_register_globals if we have no // device-side globals, but we still need to register GPU binary. // Skip GPU binary string first. -// CUDANOGLOBALS: @{{.*}} = private constant{{.*}} +// CUDANOGLOBALS-NOT: @{{.*}} = private constant{{.*}} // HIPNOGLOBALS-NOT: @{{.*}} = internal constant{{.*}} // NOGLOBALS-NOT: define internal void @__{{.*}}_register_globals -// CUDANOGLOBALS: define internal void @__[[PREFIX:cuda|hip]]_module_ctor -// CUDANOGLOBALS: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper +// NOGLOBALS-NOT: define internal void @__[[PREFIX:cuda|hip]]_module_ctor +// NOGLOBALS-NOT: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper // NOGLOBALS-NOT: call void @__[[PREFIX]]_register_globals -// CUDANOGLOBALS: define internal void @__[[PREFIX]]_module_dtor -// CUDANOGLOBALS: call void @__[[PREFIX]]UnregisterFatBinary - -// There should be no fat binary functions when no device-code is found for HIP. -// HIPNOGLOBALS-NOT: define internal void @__[[PREFIX:cuda|hip]]_module_ctor -// HIPNOGLOBALS-NOT: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper -// HIPNOGLOBALS-NOT: define internal void @__[[PREFIX]]_module_dtor -// HIPNOGLOBALS-NOT: call void @__[[PREFIX]]UnregisterFatBinary +// NOGLOBALS-NOT: define internal void @__[[PREFIX]]_module_dtor +// NOGLOBALS-NOT: call void @__[[PREFIX]]UnregisterFatBinary // There should be no constructors/destructors if we have no GPU binary. // NOGPUBIN-NOT: define internal void @__[[PREFIX]]_register_globals |