diff options
author | Aaron Enye Shi <enye.shi@gmail.com> | 2019-04-02 20:10:18 +0000 |
---|---|---|
committer | Aaron Enye Shi <enye.shi@gmail.com> | 2019-04-02 20:10:18 +0000 |
commit | 13d8e929409fb1a1f8a2318250923c90f3abac7c (patch) | |
tree | 27aad49cef42e1da44ec25980327a80e50e7d4cd | |
parent | 9ca4ff2666299898b657c363e091096a78203898 (diff) | |
download | bcm5719-llvm-13d8e929409fb1a1f8a2318250923c90f3abac7c.tar.gz bcm5719-llvm-13d8e929409fb1a1f8a2318250923c90f3abac7c.zip |
[HIP-Clang] Fat binary should not be produced for non GPU code
Skip producing the fat binary functions for HIP when no device code is present.
Reviewers: yaxunl
Differential Review: https://reviews.llvm.org/D60141
llvm-svn: 357520
-rw-r--r-- | clang/lib/CodeGen/CGCUDANV.cpp | 2 | ||||
-rw-r--r-- | clang/test/CodeGenCUDA/device-stub.cu | 16 |
2 files changed, 13 insertions, 5 deletions
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp index 0602601f5e1..cd969dc8972 100644 --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -472,6 +472,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { StringRef CudaGpuBinaryFileName = CGM.getCodeGenOpts().CudaGpuBinaryFileName; if (CudaGpuBinaryFileName.empty() && !IsHIP) return nullptr; + if (IsHIP && EmittedKernels.empty() && DeviceVars.empty()) + return nullptr; // void __{cuda|hip}_register_globals(void* handle); llvm::Function *RegisterGlobalsFunc = makeRegisterGlobalsFn(); diff --git a/clang/test/CodeGenCUDA/device-stub.cu b/clang/test/CodeGenCUDA/device-stub.cu index 387a787cceb..00d7fa4c5c8 100644 --- a/clang/test/CodeGenCUDA/device-stub.cu +++ b/clang/test/CodeGenCUDA/device-stub.cu @@ -228,13 +228,19 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); } // device-side globals, but we still need to register GPU binary. // Skip GPU binary string first. // CUDANOGLOBALS: @{{.*}} = private constant{{.*}} -// HIPNOGLOBALS: @{{.*}} = internal constant{{.*}} +// HIPNOGLOBALS-NOT: @{{.*}} = internal constant{{.*}} // NOGLOBALS-NOT: define internal void @__{{.*}}_register_globals -// NOGLOBALS: define internal void @__[[PREFIX:cuda|hip]]_module_ctor -// NOGLOBALS: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper +// CUDANOGLOBALS: define internal void @__[[PREFIX:cuda|hip]]_module_ctor +// CUDANOGLOBALS: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper // NOGLOBALS-NOT: call void @__[[PREFIX]]_register_globals -// NOGLOBALS: define internal void @__[[PREFIX]]_module_dtor -// NOGLOBALS: call void @__[[PREFIX]]UnregisterFatBinary +// CUDANOGLOBALS: define internal void @__[[PREFIX]]_module_dtor +// CUDANOGLOBALS: call void @__[[PREFIX]]UnregisterFatBinary + +// There should be no fat binary functions when no device-code is found for HIP. +// HIPNOGLOBALS-NOT: define internal void @__[[PREFIX:cuda|hip]]_module_ctor +// HIPNOGLOBALS-NOT: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper +// HIPNOGLOBALS-NOT: define internal void @__[[PREFIX]]_module_dtor +// HIPNOGLOBALS-NOT: call void @__[[PREFIX]]UnregisterFatBinary // There should be no constructors/destructors if we have no GPU binary. // NOGPUBIN-NOT: define internal void @__[[PREFIX]]_register_globals |