summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen/CodeGenModule.cpp
diff options
context:
space:
mode:
authorJustin Lebar <jlebar@google.com>2016-06-30 18:41:33 +0000
committerJustin Lebar <jlebar@google.com>2016-06-30 18:41:33 +0000
commit27ee130e3818f39a8ecffd5d1212c52e1411feda (patch)
tree21908c1058960e498b3c45174a772ff968b390f0 /clang/lib/CodeGen/CodeGenModule.cpp
parentcc4bb63351e527682d13a0b06a3ff3e32eaa1c37 (diff)
downloadbcm5719-llvm-27ee130e3818f39a8ecffd5d1212c52e1411feda.tar.gz
bcm5719-llvm-27ee130e3818f39a8ecffd5d1212c52e1411feda.zip
[CUDA] Give templated device functions internal linkage, templated kernels external linkage.
Summary: This lets LLVM perform IPO over these functions. In particular, it allows LLVM to emit ld.global.nc for loads to __restrict pointers in kernels that are never written to. Reviewers: rsmith Subscribers: cfe-commits, tra Differential Revision: http://reviews.llvm.org/D21337 llvm-svn: 274261
Diffstat (limited to 'clang/lib/CodeGen/CodeGenModule.cpp')
-rw-r--r--clang/lib/CodeGen/CodeGenModule.cpp15
1 files changed, 12 insertions, 3 deletions
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index bbec03842cb..bf2e66da692 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2671,9 +2671,18 @@ llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageForDeclarator(
// explicit instantiations can occur in multiple translation units
// and must all be equivalent. However, we are not allowed to
// throw away these explicit instantiations.
- if (Linkage == GVA_StrongODR)
- return !Context.getLangOpts().AppleKext ? llvm::Function::WeakODRLinkage
- : llvm::Function::ExternalLinkage;
+ //
+ // We don't currently support CUDA device code spread out across multiple TUs,
+ // so say that CUDA templates are either external (for kernels) or internal.
+ // This lets llvm perform aggressive inter-procedural optimizations.
+ if (Linkage == GVA_StrongODR) {
+ if (Context.getLangOpts().AppleKext)
+ return llvm::Function::ExternalLinkage;
+ if (Context.getLangOpts().CUDA && Context.getLangOpts().CUDAIsDevice)
+ return D->hasAttr<CUDAGlobalAttr>() ? llvm::Function::ExternalLinkage
+ : llvm::Function::InternalLinkage;
+ return llvm::Function::WeakODRLinkage;
+ }
// C++ doesn't have tentative definitions and thus cannot have common
// linkage.
OpenPOWER on IntegriCloud