summaryrefslogtreecommitdiffstats
path: root/clang/test
diff options
context:
space:
mode:
authorArtem Belevich <tra@google.com>2016-03-02 18:28:50 +0000
committerArtem Belevich <tra@google.com>2016-03-02 18:28:50 +0000
commit42e1949b4649c2ecbc9a13ca8b56f902b5214b95 (patch)
tree039124b3490f5cb0926e371b806aa50e94fd4a2d /clang/test
parentcdf3a2a5be7fb4c650ae30a44200248980e214ed (diff)
downloadbcm5719-llvm-42e1949b4649c2ecbc9a13ca8b56f902b5214b95.tar.gz
bcm5719-llvm-42e1949b4649c2ecbc9a13ca8b56f902b5214b95.zip
[CUDA] Emit host-side 'shadows' for device-side global variables
... and register them with CUDA runtime. This is needed for commonly used cudaMemcpy*() APIs that use address of host-side shadow to access their counterparts on device side. Fixes PR26340 Differential Revision: http://reviews.llvm.org/D17779 llvm-svn: 262498
Diffstat (limited to 'clang/test')
-rw-r--r--clang/test/CodeGenCUDA/device-stub.cu47
-rw-r--r--clang/test/CodeGenCUDA/filter-decl.cu6
2 files changed, 46 insertions, 7 deletions
diff --git a/clang/test/CodeGenCUDA/device-stub.cu b/clang/test/CodeGenCUDA/device-stub.cu
index 7f5e159151c..81d23a2990d 100644
--- a/clang/test/CodeGenCUDA/device-stub.cu
+++ b/clang/test/CodeGenCUDA/device-stub.cu
@@ -2,6 +2,40 @@
#include "Inputs/cuda.h"
+// CHECK-DAG: @device_var = internal global i32
+__device__ int device_var;
+
+// CHECK-DAG: @constant_var = internal global i32
+__constant__ int constant_var;
+
+// CHECK-DAG: @shared_var = internal global i32
+__shared__ int shared_var;
+
+// Make sure host globals don't get internalized...
+// CHECK-DAG: @host_var = global i32
+int host_var;
+// ... and that extern vars remain external.
+// CHECK-DAG: @ext_host_var = external global i32
+extern int ext_host_var;
+
+// Shadows for external device-side variables are *definitions* of
+// those variables.
+// CHECK-DAG: @ext_device_var = internal global i32
+extern __device__ int ext_device_var;
+// CHECK-DAG: @ext_device_var = internal global i32
+extern __constant__ int ext_constant_var;
+
+void use_pointers() {
+ int *p;
+ p = &device_var;
+ p = &constant_var;
+ p = &shared_var;
+ p = &host_var;
+ p = &ext_device_var;
+ p = &ext_constant_var;
+ p = &ext_host_var;
+}
+
// Make sure that all parts of GPU code init/cleanup are there:
// * constant unnamed string with the kernel name
// CHECK: private unnamed_addr constant{{.*}}kernelfunc{{.*}}\00"
@@ -32,9 +66,14 @@ __global__ void kernelfunc(int i, int j, int k) {}
// CHECK: call{{.*}}kernelfunc
void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
-// Test that we've built a function to register kernels
-// CHECK: define internal void @__cuda_register_kernels
+// Test that we've built a function to register kernels and global vars.
+// CHECK: define internal void @__cuda_register_globals
// CHECK: call{{.*}}cudaRegisterFunction(i8** %0, {{.*}}kernelfunc
+// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}device_var{{.*}}i32 0, i32 4, i32 0, i32 0
+// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}constant_var{{.*}}i32 0, i32 4, i32 1, i32 0
+// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}ext_device_var{{.*}}i32 1, i32 4, i32 0, i32 0
+// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}ext_constant_var{{.*}}i32 1, i32 4, i32 1, i32 0
+// CHECK: ret void
// Test that we've built contructor..
// CHECK: define internal void @__cuda_module_ctor
@@ -42,8 +81,8 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
// CHECK: call{{.*}}cudaRegisterFatBinary{{.*}}__cuda_fatbin_wrapper
// .. stores return value in __cuda_gpubin_handle
// CHECK-NEXT: store{{.*}}__cuda_gpubin_handle
-// .. and then calls __cuda_register_kernels
-// CHECK-NEXT: call void @__cuda_register_kernels
+// .. and then calls __cuda_register_globals
+// CHECK-NEXT: call void @__cuda_register_globals
// Test that we've created destructor.
// CHECK: define internal void @__cuda_module_dtor
diff --git a/clang/test/CodeGenCUDA/filter-decl.cu b/clang/test/CodeGenCUDA/filter-decl.cu
index 023ae61f3af..bc744a07a33 100644
--- a/clang/test/CodeGenCUDA/filter-decl.cu
+++ b/clang/test/CodeGenCUDA/filter-decl.cu
@@ -9,15 +9,15 @@
// CHECK-DEVICE-NOT: module asm "file scope asm is host only"
__asm__("file scope asm is host only");
-// CHECK-HOST-NOT: constantdata = externally_initialized global
+// CHECK-HOST: constantdata = internal global
// CHECK-DEVICE: constantdata = externally_initialized global
__constant__ char constantdata[256];
-// CHECK-HOST-NOT: devicedata = externally_initialized global
+// CHECK-HOST: devicedata = internal global
// CHECK-DEVICE: devicedata = externally_initialized global
__device__ char devicedata[256];
-// CHECK-HOST-NOT: shareddata = global
+// CHECK-HOST: shareddata = internal global
// CHECK-DEVICE: shareddata = global
__shared__ char shareddata[256];
OpenPOWER on IntegriCloud