diff options
| author | Artem Belevich <tra@google.com> | 2016-03-02 18:28:50 +0000 |
|---|---|---|
| committer | Artem Belevich <tra@google.com> | 2016-03-02 18:28:50 +0000 |
| commit | 42e1949b4649c2ecbc9a13ca8b56f902b5214b95 (patch) | |
| tree | 039124b3490f5cb0926e371b806aa50e94fd4a2d /clang/test | |
| parent | cdf3a2a5be7fb4c650ae30a44200248980e214ed (diff) | |
| download | bcm5719-llvm-42e1949b4649c2ecbc9a13ca8b56f902b5214b95.tar.gz bcm5719-llvm-42e1949b4649c2ecbc9a13ca8b56f902b5214b95.zip | |
[CUDA] Emit host-side 'shadows' for device-side global variables
... and register them with CUDA runtime.
This is needed for commonly used cudaMemcpy*() APIs that use address of
host-side shadow to access their counterparts on device side.
Fixes PR26340
Differential Revision: http://reviews.llvm.org/D17779
llvm-svn: 262498
Diffstat (limited to 'clang/test')
| -rw-r--r-- | clang/test/CodeGenCUDA/device-stub.cu | 47 | ||||
| -rw-r--r-- | clang/test/CodeGenCUDA/filter-decl.cu | 6 |
2 files changed, 46 insertions, 7 deletions
diff --git a/clang/test/CodeGenCUDA/device-stub.cu b/clang/test/CodeGenCUDA/device-stub.cu index 7f5e159151c..81d23a2990d 100644 --- a/clang/test/CodeGenCUDA/device-stub.cu +++ b/clang/test/CodeGenCUDA/device-stub.cu @@ -2,6 +2,40 @@ #include "Inputs/cuda.h" +// CHECK-DAG: @device_var = internal global i32 +__device__ int device_var; + +// CHECK-DAG: @constant_var = internal global i32 +__constant__ int constant_var; + +// CHECK-DAG: @shared_var = internal global i32 +__shared__ int shared_var; + +// Make sure host globals don't get internalized... +// CHECK-DAG: @host_var = global i32 +int host_var; +// ... and that extern vars remain external. +// CHECK-DAG: @ext_host_var = external global i32 +extern int ext_host_var; + +// Shadows for external device-side variables are *definitions* of +// those variables. +// CHECK-DAG: @ext_device_var = internal global i32 +extern __device__ int ext_device_var; +// CHECK-DAG: @ext_device_var = internal global i32 +extern __constant__ int ext_constant_var; + +void use_pointers() { + int *p; + p = &device_var; + p = &constant_var; + p = &shared_var; + p = &host_var; + p = &ext_device_var; + p = &ext_constant_var; + p = &ext_host_var; +} + // Make sure that all parts of GPU code init/cleanup are there: // * constant unnamed string with the kernel name // CHECK: private unnamed_addr constant{{.*}}kernelfunc{{.*}}\00" @@ -32,9 +66,14 @@ __global__ void kernelfunc(int i, int j, int k) {} // CHECK: call{{.*}}kernelfunc void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); } -// Test that we've built a function to register kernels -// CHECK: define internal void @__cuda_register_kernels +// Test that we've built a function to register kernels and global vars. +// CHECK: define internal void @__cuda_register_globals // CHECK: call{{.*}}cudaRegisterFunction(i8** %0, {{.*}}kernelfunc +// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}device_var{{.*}}i32 0, i32 4, i32 0, i32 0 +// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}constant_var{{.*}}i32 0, i32 4, i32 1, i32 0 +// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}ext_device_var{{.*}}i32 1, i32 4, i32 0, i32 0 +// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}ext_constant_var{{.*}}i32 1, i32 4, i32 1, i32 0 +// CHECK: ret void // Test that we've built contructor.. // CHECK: define internal void @__cuda_module_ctor @@ -42,8 +81,8 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); } // CHECK: call{{.*}}cudaRegisterFatBinary{{.*}}__cuda_fatbin_wrapper // .. stores return value in __cuda_gpubin_handle // CHECK-NEXT: store{{.*}}__cuda_gpubin_handle -// .. and then calls __cuda_register_kernels -// CHECK-NEXT: call void @__cuda_register_kernels +// .. and then calls __cuda_register_globals +// CHECK-NEXT: call void @__cuda_register_globals // Test that we've created destructor. // CHECK: define internal void @__cuda_module_dtor diff --git a/clang/test/CodeGenCUDA/filter-decl.cu b/clang/test/CodeGenCUDA/filter-decl.cu index 023ae61f3af..bc744a07a33 100644 --- a/clang/test/CodeGenCUDA/filter-decl.cu +++ b/clang/test/CodeGenCUDA/filter-decl.cu @@ -9,15 +9,15 @@ // CHECK-DEVICE-NOT: module asm "file scope asm is host only" __asm__("file scope asm is host only"); -// CHECK-HOST-NOT: constantdata = externally_initialized global +// CHECK-HOST: constantdata = internal global // CHECK-DEVICE: constantdata = externally_initialized global __constant__ char constantdata[256]; -// CHECK-HOST-NOT: devicedata = externally_initialized global +// CHECK-HOST: devicedata = internal global // CHECK-DEVICE: devicedata = externally_initialized global __device__ char devicedata[256]; -// CHECK-HOST-NOT: shareddata = global +// CHECK-HOST: shareddata = internal global // CHECK-DEVICE: shareddata = global __shared__ char shareddata[256]; |

