summaryrefslogtreecommitdiffstats
path: root/clang/test/CodeGenCUDA
diff options
context:
space:
mode:
authorArtem Belevich <tra@google.com>2015-04-21 22:55:54 +0000
committerArtem Belevich <tra@google.com>2015-04-21 22:55:54 +0000
commit7093e4064116b710a3b61aebdca60c215f5c29fe (patch)
treeeb60e35036d16b30ce51846a93bdf935e1d2b022 /clang/test/CodeGenCUDA
parent6e3344ed98a3432bf59a9d9b28b7bb869ca10912 (diff)
downloadbcm5719-llvm-7093e4064116b710a3b61aebdca60c215f5c29fe.tar.gz
bcm5719-llvm-7093e4064116b710a3b61aebdca60c215f5c29fe.zip
[cuda] Allow using integral non-type template parameters as launch_bounds attribute arguments.
- Changed CUDALaunchBounds arguments from integers to Expr* so they can be saved in AST for instantiation. - Added support for template instantiation of launch_bounds attrubute. - Moved evaluation of launch_bounds arguments to NVPTXTargetCodeGenInfo:: SetTargetAttributes() where it can be done after template instantiation. - Added a warning on negative launch_bounds arguments. - Amended test cases. Differential Revision: http://reviews.llvm.org/D8985 llvm-svn: 235452
Diffstat (limited to 'clang/test/CodeGenCUDA')
-rw-r--r--clang/test/CodeGenCUDA/launch-bounds.cu51
1 files changed, 51 insertions, 0 deletions
diff --git a/clang/test/CodeGenCUDA/launch-bounds.cu b/clang/test/CodeGenCUDA/launch-bounds.cu
index 6f4102ea007..ecbd0ad7058 100644
--- a/clang/test/CodeGenCUDA/launch-bounds.cu
+++ b/clang/test/CodeGenCUDA/launch-bounds.cu
@@ -28,3 +28,54 @@ Kernel2()
}
// CHECK: !{{[0-9]+}} = !{void ()* @Kernel2, !"maxntidx", i32 256}
+
+template <int max_threads_per_block>
+__global__ void
+__launch_bounds__(max_threads_per_block)
+Kernel3()
+{
+}
+
+template void Kernel3<MAX_THREADS_PER_BLOCK>();
+// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel3{{.*}}, !"maxntidx", i32 256}
+
+template <int max_threads_per_block, int min_blocks_per_mp>
+__global__ void
+__launch_bounds__(max_threads_per_block, min_blocks_per_mp)
+Kernel4()
+{
+}
+template void Kernel4<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>();
+
+// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel4{{.*}}, !"maxntidx", i32 256}
+// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel4{{.*}}, !"minctasm", i32 2}
+
+const int constint = 100;
+template <int max_threads_per_block, int min_blocks_per_mp>
+__global__ void
+__launch_bounds__(max_threads_per_block + constint,
+ min_blocks_per_mp + max_threads_per_block)
+Kernel5()
+{
+}
+template void Kernel5<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>();
+
+// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel5{{.*}}, !"maxntidx", i32 356}
+// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel5{{.*}}, !"minctasm", i32 258}
+
+// Make sure we don't emit negative launch bounds values.
+__global__ void
+__launch_bounds__( -MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP )
+Kernel6()
+{
+}
+// CHECK-NOT: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel6{{.*}}, !"maxntidx",
+// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel6{{.*}}, !"minctasm",
+
+__global__ void
+__launch_bounds__( MAX_THREADS_PER_BLOCK, -MIN_BLOCKS_PER_MP )
+Kernel7()
+{
+}
+// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel7{{.*}}, !"maxntidx",
+// CHECK-NOT: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel7{{.*}}, !"minctasm",
OpenPOWER on IntegriCloud