summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen/CodeGenModule.cpp
diff options
context:
space:
mode:
authorJingyue Wu <jingyue@google.com>2015-08-22 05:49:28 +0000
committerJingyue Wu <jingyue@google.com>2015-08-22 05:49:28 +0000
commit284ebe237ff277625caa23192f159a1be7d0a6d4 (patch)
tree2c0c5535653c752cdc6f108496ed8e5730509a24 /clang/lib/CodeGen/CodeGenModule.cpp
parentfcec09866a12114cbcb2c0dd0457c36e83bd8d50 (diff)
downloadbcm5719-llvm-284ebe237ff277625caa23192f159a1be7d0a6d4.tar.gz
bcm5719-llvm-284ebe237ff277625caa23192f159a1be7d0a6d4.zip
[CUDA] Change initializer for CUDA device code based on CUDA documentation.
Summary: According to CUDA documentation, global variables declared with __device__, __constant__ can be initialized from host code, so mark them as externally initialized. Because __shared__ variables cannot have an initialization as part of their declaration and since the value maybe kept across different kernel invocation, the value of __shared__ is effectively undefined instead of zero initialized. Wrongly using zero initializer may cause illegitimate optimization, e.g. removing unused __constant__ variable because it's not updated in the device code and the value is initialized with zero. Test Plan: test/CodeGenCUDA/address-spaces.cu Patch by Xuetian Weng Reviewers: jholewinski, eliben, tra, jingyue Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D12241 llvm-svn: 245786
Diffstat (limited to 'clang/lib/CodeGen/CodeGenModule.cpp')
-rw-r--r--clang/lib/CodeGen/CodeGenModule.cpp22
1 files changed, 21 insertions, 1 deletions
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 6352646f19c..05258693f1f 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -1990,7 +1990,16 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
const VarDecl *InitDecl;
const Expr *InitExpr = D->getAnyInitializer(InitDecl);
- if (!InitExpr) {
+ // CUDA E.2.4.1 "__shared__ variables cannot have an initialization as part
+ // of their declaration."
+ if (getLangOpts().CPlusPlus && getLangOpts().CUDAIsDevice
+ && D->hasAttr<CUDASharedAttr>()) {
+ if (InitExpr) {
+ Error(D->getLocation(),
+ "__shared__ variable cannot have an initialization.");
+ }
+ Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy));
+ } else if (!InitExpr) {
// This is a tentative definition; tentative definitions are
// implicitly initialized with { 0 }.
//
@@ -2076,6 +2085,17 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
if (D->hasAttr<AnnotateAttr>())
AddGlobalAnnotations(D, GV);
+ // CUDA B.2.1 "The __device__ qualifier declares a variable that resides on
+ // the device. [...]"
+ // CUDA B.2.2 "The __constant__ qualifier, optionally used together with
+ // __device__, declares a variable that: [...]
+ // Is accessible from all the threads within the grid and from the host
+ // through the runtime library (cudaGetSymbolAddress() / cudaGetSymbolSize()
+ // / cudaMemcpyToSymbol() / cudaMemcpyFromSymbol())."
+ if (GV && LangOpts.CUDA && LangOpts.CUDAIsDevice &&
+ (D->hasAttr<CUDAConstantAttr>() || D->hasAttr<CUDADeviceAttr>())) {
+ GV->setExternallyInitialized(true);
+ }
GV->setInitializer(Init);
// If it is safe to mark the global 'constant', do so now.
OpenPOWER on IntegriCloud