diff options
author | Justin Lebar <jlebar@google.com> | 2016-06-30 18:41:33 +0000 |
---|---|---|
committer | Justin Lebar <jlebar@google.com> | 2016-06-30 18:41:33 +0000 |
commit | 27ee130e3818f39a8ecffd5d1212c52e1411feda (patch) | |
tree | 21908c1058960e498b3c45174a772ff968b390f0 /clang | |
parent | cc4bb63351e527682d13a0b06a3ff3e32eaa1c37 (diff) | |
download | bcm5719-llvm-27ee130e3818f39a8ecffd5d1212c52e1411feda.tar.gz bcm5719-llvm-27ee130e3818f39a8ecffd5d1212c52e1411feda.zip |
[CUDA] Give templated device functions internal linkage, templated kernels external linkage.
Summary:
This lets LLVM perform IPO over these functions. In particular, it
allows LLVM to emit ld.global.nc for loads to __restrict pointers in
kernels that are never written to.
Reviewers: rsmith
Subscribers: cfe-commits, tra
Differential Revision: http://reviews.llvm.org/D21337
llvm-svn: 274261
Diffstat (limited to 'clang')
-rw-r--r-- | clang/lib/CodeGen/CodeGenModule.cpp | 15 | ||||
-rw-r--r-- | clang/test/CodeGenCUDA/ptx-kernels.cu | 4 |
2 files changed, 14 insertions, 5 deletions
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index bbec03842cb..bf2e66da692 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2671,9 +2671,18 @@ llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageForDeclarator( // explicit instantiations can occur in multiple translation units // and must all be equivalent. However, we are not allowed to // throw away these explicit instantiations. - if (Linkage == GVA_StrongODR) - return !Context.getLangOpts().AppleKext ? llvm::Function::WeakODRLinkage - : llvm::Function::ExternalLinkage; + // + // We don't currently support CUDA device code spread out across multiple TUs, + // so say that CUDA templates are either external (for kernels) or internal. + // This lets llvm perform aggressive inter-procedural optimizations. + if (Linkage == GVA_StrongODR) { + if (Context.getLangOpts().AppleKext) + return llvm::Function::ExternalLinkage; + if (Context.getLangOpts().CUDA && Context.getLangOpts().CUDAIsDevice) + return D->hasAttr<CUDAGlobalAttr>() ? llvm::Function::ExternalLinkage + : llvm::Function::InternalLinkage; + return llvm::Function::WeakODRLinkage; + } // C++ doesn't have tentative definitions and thus cannot have common // linkage. diff --git a/clang/test/CodeGenCUDA/ptx-kernels.cu b/clang/test/CodeGenCUDA/ptx-kernels.cu index 034cef6b60f..1d330bdf6a4 100644 --- a/clang/test/CodeGenCUDA/ptx-kernels.cu +++ b/clang/test/CodeGenCUDA/ptx-kernels.cu @@ -19,11 +19,11 @@ __global__ void global_function() { // Make sure host-instantiated kernels are preserved on device side. template <typename T> __global__ void templated_kernel(T param) {} -// CHECK-DAG: define weak_odr void @_Z16templated_kernelIiEvT_( +// CHECK-DAG: define void @_Z16templated_kernelIiEvT_( namespace { __global__ void anonymous_ns_kernel() {} -// CHECK-DAG: define weak_odr void @_ZN12_GLOBAL__N_119anonymous_ns_kernelEv( +// CHECK-DAG: define void @_ZN12_GLOBAL__N_119anonymous_ns_kernelEv( } void host_function() { |