summaryrefslogtreecommitdiffstats
path: root/clang/test/CodeGenCUDA
diff options
context:
space:
mode:
authorJingyue Wu <jingyue@google.com>2015-08-22 05:49:28 +0000
committerJingyue Wu <jingyue@google.com>2015-08-22 05:49:28 +0000
commit284ebe237ff277625caa23192f159a1be7d0a6d4 (patch)
tree2c0c5535653c752cdc6f108496ed8e5730509a24 /clang/test/CodeGenCUDA
parentfcec09866a12114cbcb2c0dd0457c36e83bd8d50 (diff)
downloadbcm5719-llvm-284ebe237ff277625caa23192f159a1be7d0a6d4.tar.gz
bcm5719-llvm-284ebe237ff277625caa23192f159a1be7d0a6d4.zip
[CUDA] Change initializer for CUDA device code based on CUDA documentation.
Summary: According to CUDA documentation, global variables declared with __device__, __constant__ can be initialized from host code, so mark them as externally initialized. Because __shared__ variables cannot have an initialization as part of their declaration and since the value maybe kept across different kernel invocation, the value of __shared__ is effectively undefined instead of zero initialized. Wrongly using zero initializer may cause illegitimate optimization, e.g. removing unused __constant__ variable because it's not updated in the device code and the value is initialized with zero. Test Plan: test/CodeGenCUDA/address-spaces.cu Patch by Xuetian Weng Reviewers: jholewinski, eliben, tra, jingyue Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D12241 llvm-svn: 245786
Diffstat (limited to 'clang/test/CodeGenCUDA')
-rw-r--r--clang/test/CodeGenCUDA/address-spaces.cu6
-rw-r--r--clang/test/CodeGenCUDA/filter-decl.cu8
2 files changed, 7 insertions, 7 deletions
diff --git a/clang/test/CodeGenCUDA/address-spaces.cu b/clang/test/CodeGenCUDA/address-spaces.cu
index b0ef3558e2d..5e7ff66b699 100644
--- a/clang/test/CodeGenCUDA/address-spaces.cu
+++ b/clang/test/CodeGenCUDA/address-spaces.cu
@@ -5,10 +5,10 @@
#include "Inputs/cuda.h"
-// CHECK: @i = addrspace(1) global
+// CHECK: @i = addrspace(1) externally_initialized global
__device__ int i;
-// CHECK: @j = addrspace(4) global
+// CHECK: @j = addrspace(4) externally_initialized global
__constant__ int j;
// CHECK: @k = addrspace(3) global
@@ -24,7 +24,7 @@ struct MyStruct {
// CHECK: @_ZZ5func2vE1a = internal addrspace(3) global [256 x float] zeroinitializer
// CHECK: @_ZZ5func3vE1a = internal addrspace(3) global float 0.000000e+00
// CHECK: @_ZZ5func4vE1a = internal addrspace(3) global float 0.000000e+00
-// CHECK: @b = addrspace(3) global float 0.000000e+00
+// CHECK: @b = addrspace(3) global float undef
__device__ void foo() {
// CHECK: load i32, i32* addrspacecast (i32 addrspace(1)* @i to i32*)
diff --git a/clang/test/CodeGenCUDA/filter-decl.cu b/clang/test/CodeGenCUDA/filter-decl.cu
index e69473f3e84..023ae61f3af 100644
--- a/clang/test/CodeGenCUDA/filter-decl.cu
+++ b/clang/test/CodeGenCUDA/filter-decl.cu
@@ -9,12 +9,12 @@
// CHECK-DEVICE-NOT: module asm "file scope asm is host only"
__asm__("file scope asm is host only");
-// CHECK-HOST-NOT: constantdata = global
-// CHECK-DEVICE: constantdata = global
+// CHECK-HOST-NOT: constantdata = externally_initialized global
+// CHECK-DEVICE: constantdata = externally_initialized global
__constant__ char constantdata[256];
-// CHECK-HOST-NOT: devicedata = global
-// CHECK-DEVICE: devicedata = global
+// CHECK-HOST-NOT: devicedata = externally_initialized global
+// CHECK-DEVICE: devicedata = externally_initialized global
__device__ char devicedata[256];
// CHECK-HOST-NOT: shareddata = global
OpenPOWER on IntegriCloud