summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAaron Enye Shi <enye.shi@gmail.com>2019-04-02 20:10:18 +0000
committerAaron Enye Shi <enye.shi@gmail.com>2019-04-02 20:10:18 +0000
commit13d8e929409fb1a1f8a2318250923c90f3abac7c (patch)
tree27aad49cef42e1da44ec25980327a80e50e7d4cd
parent9ca4ff2666299898b657c363e091096a78203898 (diff)
downloadbcm5719-llvm-13d8e929409fb1a1f8a2318250923c90f3abac7c.tar.gz
bcm5719-llvm-13d8e929409fb1a1f8a2318250923c90f3abac7c.zip
[HIP-Clang] Fat binary should not be produced for non GPU code
Skip producing the fat binary functions for HIP when no device code is present. Reviewers: yaxunl Differential Review: https://reviews.llvm.org/D60141 llvm-svn: 357520
-rw-r--r--clang/lib/CodeGen/CGCUDANV.cpp2
-rw-r--r--clang/test/CodeGenCUDA/device-stub.cu16
2 files changed, 13 insertions, 5 deletions
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index 0602601f5e1..cd969dc8972 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -472,6 +472,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
StringRef CudaGpuBinaryFileName = CGM.getCodeGenOpts().CudaGpuBinaryFileName;
if (CudaGpuBinaryFileName.empty() && !IsHIP)
return nullptr;
+ if (IsHIP && EmittedKernels.empty() && DeviceVars.empty())
+ return nullptr;
// void __{cuda|hip}_register_globals(void* handle);
llvm::Function *RegisterGlobalsFunc = makeRegisterGlobalsFn();
diff --git a/clang/test/CodeGenCUDA/device-stub.cu b/clang/test/CodeGenCUDA/device-stub.cu
index 387a787cceb..00d7fa4c5c8 100644
--- a/clang/test/CodeGenCUDA/device-stub.cu
+++ b/clang/test/CodeGenCUDA/device-stub.cu
@@ -228,13 +228,19 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
// device-side globals, but we still need to register GPU binary.
// Skip GPU binary string first.
// CUDANOGLOBALS: @{{.*}} = private constant{{.*}}
-// HIPNOGLOBALS: @{{.*}} = internal constant{{.*}}
+// HIPNOGLOBALS-NOT: @{{.*}} = internal constant{{.*}}
// NOGLOBALS-NOT: define internal void @__{{.*}}_register_globals
-// NOGLOBALS: define internal void @__[[PREFIX:cuda|hip]]_module_ctor
-// NOGLOBALS: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper
+// CUDANOGLOBALS: define internal void @__[[PREFIX:cuda|hip]]_module_ctor
+// CUDANOGLOBALS: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper
// NOGLOBALS-NOT: call void @__[[PREFIX]]_register_globals
-// NOGLOBALS: define internal void @__[[PREFIX]]_module_dtor
-// NOGLOBALS: call void @__[[PREFIX]]UnregisterFatBinary
+// CUDANOGLOBALS: define internal void @__[[PREFIX]]_module_dtor
+// CUDANOGLOBALS: call void @__[[PREFIX]]UnregisterFatBinary
+
+// There should be no fat binary functions when no device-code is found for HIP.
+// HIPNOGLOBALS-NOT: define internal void @__[[PREFIX:cuda|hip]]_module_ctor
+// HIPNOGLOBALS-NOT: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper
+// HIPNOGLOBALS-NOT: define internal void @__[[PREFIX]]_module_dtor
+// HIPNOGLOBALS-NOT: call void @__[[PREFIX]]UnregisterFatBinary
// There should be no constructors/destructors if we have no GPU binary.
// NOGPUBIN-NOT: define internal void @__[[PREFIX]]_register_globals
OpenPOWER on IntegriCloud