From 27ee130e3818f39a8ecffd5d1212c52e1411feda Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Thu, 30 Jun 2016 18:41:33 +0000 Subject: [CUDA] Give templated device functions internal linkage, templated kernels external linkage. Summary: This lets LLVM perform IPO over these functions. In particular, it allows LLVM to emit ld.global.nc for loads to __restrict pointers in kernels that are never written to. Reviewers: rsmith Subscribers: cfe-commits, tra Differential Revision: http://reviews.llvm.org/D21337 llvm-svn: 274261 --- clang/lib/CodeGen/CodeGenModule.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'clang/lib/CodeGen/CodeGenModule.cpp') diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index bbec03842cb..bf2e66da692 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2671,9 +2671,18 @@ llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageForDeclarator( // explicit instantiations can occur in multiple translation units // and must all be equivalent. However, we are not allowed to // throw away these explicit instantiations. - if (Linkage == GVA_StrongODR) - return !Context.getLangOpts().AppleKext ? llvm::Function::WeakODRLinkage - : llvm::Function::ExternalLinkage; + // + // We don't currently support CUDA device code spread out across multiple TUs, + // so say that CUDA templates are either external (for kernels) or internal. + // This lets llvm perform aggressive inter-procedural optimizations. + if (Linkage == GVA_StrongODR) { + if (Context.getLangOpts().AppleKext) + return llvm::Function::ExternalLinkage; + if (Context.getLangOpts().CUDA && Context.getLangOpts().CUDAIsDevice) + return D->hasAttr() ? llvm::Function::ExternalLinkage + : llvm::Function::InternalLinkage; + return llvm::Function::WeakODRLinkage; + } // C++ doesn't have tentative definitions and thus cannot have common // linkage. -- cgit v1.2.3