[cuda] Allow using integral non-type template parameters as launch_bounds attribute arguments.

- Changed CUDALaunchBounds arguments from integers to Expr* so they can be saved in AST for instantiation. - Added support for template instantiation of launch_bounds attrubute. - Moved evaluation of launch_bounds arguments to NVPTXTargetCodeGenInfo:: SetTargetAttributes() where it can be done after template instantiation. - Added a warning on negative launch_bounds arguments. - Amended test cases. Differential Revision: http://reviews.llvm.org/D8985 llvm-svn: 235452
author: Artem Belevich <tra@google.com> 2015-04-21 22:55:54 +0000
committer: Artem Belevich <tra@google.com> 2015-04-21 22:55:54 +0000
commit: 7093e4064116b710a3b61aebdca60c215f5c29fe (patch)
tree: eb60e35036d16b30ce51846a93bdf935e1d2b022 /clang/test/CodeGenCUDA
parent: 6e3344ed98a3432bf59a9d9b28b7bb869ca10912 (diff)
download: bcm5719-llvm-7093e4064116b710a3b61aebdca60c215f5c29fe.tar.gz
bcm5719-llvm-7093e4064116b710a3b61aebdca60c215f5c29fe.zip
1 files changed, 51 insertions, 0 deletions
diff --git a/clang/test/CodeGenCUDA/launch-bounds.cu b/clang/test/CodeGenCUDA/launch-bounds.cu
index 6f4102ea007..ecbd0ad7058 100644
--- a/clang/test/CodeGenCUDA/launch-bounds.cu
+++ b/clang/test/CodeGenCUDA/launch-bounds.cu
@@ -28,3 +28,54 @@ Kernel2()
 }
 
 // CHECK: !{{[0-9]+}} = !{void ()* @Kernel2, !"maxntidx", i32 256}
+
+template <int max_threads_per_block>
+__global__ void
+__launch_bounds__(max_threads_per_block)
+Kernel3()
+{
+}
+
+template void Kernel3<MAX_THREADS_PER_BLOCK>();
+// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel3{{.*}}, !"maxntidx", i32 256}
+
+template <int max_threads_per_block, int min_blocks_per_mp>
+__global__ void
+__launch_bounds__(max_threads_per_block, min_blocks_per_mp)
+Kernel4()
+{
+}
+template void Kernel4<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>();
+
+// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel4{{.*}}, !"maxntidx", i32 256}
+// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel4{{.*}}, !"minctasm", i32 2}
+
+const int constint = 100;
+template <int max_threads_per_block, int min_blocks_per_mp>
+__global__ void
+__launch_bounds__(max_threads_per_block + constint,
+                  min_blocks_per_mp + max_threads_per_block)
+Kernel5()
+{
+}
+template void Kernel5<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>();
+
+// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel5{{.*}}, !"maxntidx", i32 356}
+// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel5{{.*}}, !"minctasm", i32 258}
+
+// Make sure we don't emit negative launch bounds values.
+__global__ void
+__launch_bounds__( -MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP )
+Kernel6()
+{
+}
+// CHECK-NOT: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel6{{.*}}, !"maxntidx",
+// CHECK:     !{{[0-9]+}} = !{void ()* @{{.*}}Kernel6{{.*}}, !"minctasm",
+
+__global__ void
+__launch_bounds__( MAX_THREADS_PER_BLOCK, -MIN_BLOCKS_PER_MP )
+Kernel7()
+{
+}
+// CHECK:     !{{[0-9]+}} = !{void ()* @{{.*}}Kernel7{{.*}}, !"maxntidx",
+// CHECK-NOT: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel7{{.*}}, !"minctasm",
author	Artem Belevich <tra@google.com>	2015-04-21 22:55:54 +0000
committer	Artem Belevich <tra@google.com>	2015-04-21 22:55:54 +0000
commit	7093e4064116b710a3b61aebdca60c215f5c29fe (patch)
tree	eb60e35036d16b30ce51846a93bdf935e1d2b022 /clang/test/CodeGenCUDA
parent	6e3344ed98a3432bf59a9d9b28b7bb869ca10912 (diff)
download	bcm5719-llvm-7093e4064116b710a3b61aebdca60c215f5c29fe.tar.gz bcm5719-llvm-7093e4064116b710a3b61aebdca60c215f5c29fe.zip