summaryrefslogtreecommitdiffstats
path: root/clang/test/CodeGenCUDA
diff options
context:
space:
mode:
authorArtem Belevich <tra@google.com>2015-08-10 20:57:02 +0000
committerArtem Belevich <tra@google.com>2015-08-10 20:57:02 +0000
commitb7e4aab40cd4eb66c07f496c358a0cc0d716ce34 (patch)
tree072e0336b260bfe9bbf5d528234057f6a24c4cb7 /clang/test/CodeGenCUDA
parenta01ff22bb1ef303107c1ce155be5d03ae82f5080 (diff)
downloadbcm5719-llvm-b7e4aab40cd4eb66c07f496c358a0cc0d716ce34.tar.gz
bcm5719-llvm-b7e4aab40cd4eb66c07f496c358a0cc0d716ce34.zip
[CUDA] Add implicit __attribute__((used)) to all __global__ functions.
This allows emitting kernels that were instantiated from the host code and which would never be explicitly referenced otherwise. Differential Revision: http://reviews.llvm.org/D11666 llvm-svn: 244501
Diffstat (limited to 'clang/test/CodeGenCUDA')
-rw-r--r--clang/test/CodeGenCUDA/ptx-kernels.cu15
1 files changed, 15 insertions, 0 deletions
diff --git a/clang/test/CodeGenCUDA/ptx-kernels.cu b/clang/test/CodeGenCUDA/ptx-kernels.cu
index 658b3488fc1..bf3b14d3a28 100644
--- a/clang/test/CodeGenCUDA/ptx-kernels.cu
+++ b/clang/test/CodeGenCUDA/ptx-kernels.cu
@@ -1,7 +1,16 @@
+// Make sure that __global__ functions are emitted along with correct
+// annotations and are added to @llvm.used to prevent their elimination.
+// REQUIRES: nvptx-registered-target
+//
// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - | FileCheck %s
#include "Inputs/cuda.h"
+// Make sure that all __global__ functiona are added to @llvm.used
+// CHECK: @llvm.used = appending global
+// CHECK-SAME: @global_function
+// CHECK-SAME: @_Z16templated_kernelIiEvT_
+
// CHECK-LABEL: define void @device_function
extern "C"
__device__ void device_function() {}
@@ -13,4 +22,10 @@ __global__ void global_function() {
device_function();
}
+// Make sure host-instantiated kernels are preserved on device side.
+template <typename T> __global__ void templated_kernel(T param) {}
+// CHECK-LABEL: define linkonce_odr void @_Z16templated_kernelIiEvT_
+void host_function() { templated_kernel<<<0,0>>>(0); }
+
// CHECK: !{{[0-9]+}} = !{void ()* @global_function, !"kernel", i32 1}
+// CHECK: !{{[0-9]+}} = !{void (i32)* @_Z16templated_kernelIiEvT_, !"kernel", i32 1}
OpenPOWER on IntegriCloud