diff options
| author | Artem Belevich <tra@google.com> | 2015-04-21 22:55:54 +0000 |
|---|---|---|
| committer | Artem Belevich <tra@google.com> | 2015-04-21 22:55:54 +0000 |
| commit | 7093e4064116b710a3b61aebdca60c215f5c29fe (patch) | |
| tree | eb60e35036d16b30ce51846a93bdf935e1d2b022 /clang/test/CodeGenCUDA | |
| parent | 6e3344ed98a3432bf59a9d9b28b7bb869ca10912 (diff) | |
| download | bcm5719-llvm-7093e4064116b710a3b61aebdca60c215f5c29fe.tar.gz bcm5719-llvm-7093e4064116b710a3b61aebdca60c215f5c29fe.zip | |
[cuda] Allow using integral non-type template parameters as launch_bounds attribute arguments.
- Changed CUDALaunchBounds arguments from integers to Expr* so they can
be saved in AST for instantiation.
- Added support for template instantiation of launch_bounds attrubute.
- Moved evaluation of launch_bounds arguments to NVPTXTargetCodeGenInfo::
SetTargetAttributes() where it can be done after template instantiation.
- Added a warning on negative launch_bounds arguments.
- Amended test cases.
Differential Revision: http://reviews.llvm.org/D8985
llvm-svn: 235452
Diffstat (limited to 'clang/test/CodeGenCUDA')
| -rw-r--r-- | clang/test/CodeGenCUDA/launch-bounds.cu | 51 |
1 files changed, 51 insertions, 0 deletions
diff --git a/clang/test/CodeGenCUDA/launch-bounds.cu b/clang/test/CodeGenCUDA/launch-bounds.cu index 6f4102ea007..ecbd0ad7058 100644 --- a/clang/test/CodeGenCUDA/launch-bounds.cu +++ b/clang/test/CodeGenCUDA/launch-bounds.cu @@ -28,3 +28,54 @@ Kernel2() } // CHECK: !{{[0-9]+}} = !{void ()* @Kernel2, !"maxntidx", i32 256} + +template <int max_threads_per_block> +__global__ void +__launch_bounds__(max_threads_per_block) +Kernel3() +{ +} + +template void Kernel3<MAX_THREADS_PER_BLOCK>(); +// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel3{{.*}}, !"maxntidx", i32 256} + +template <int max_threads_per_block, int min_blocks_per_mp> +__global__ void +__launch_bounds__(max_threads_per_block, min_blocks_per_mp) +Kernel4() +{ +} +template void Kernel4<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>(); + +// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel4{{.*}}, !"maxntidx", i32 256} +// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel4{{.*}}, !"minctasm", i32 2} + +const int constint = 100; +template <int max_threads_per_block, int min_blocks_per_mp> +__global__ void +__launch_bounds__(max_threads_per_block + constint, + min_blocks_per_mp + max_threads_per_block) +Kernel5() +{ +} +template void Kernel5<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>(); + +// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel5{{.*}}, !"maxntidx", i32 356} +// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel5{{.*}}, !"minctasm", i32 258} + +// Make sure we don't emit negative launch bounds values. +__global__ void +__launch_bounds__( -MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP ) +Kernel6() +{ +} +// CHECK-NOT: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel6{{.*}}, !"maxntidx", +// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel6{{.*}}, !"minctasm", + +__global__ void +__launch_bounds__( MAX_THREADS_PER_BLOCK, -MIN_BLOCKS_PER_MP ) +Kernel7() +{ +} +// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel7{{.*}}, !"maxntidx", +// CHECK-NOT: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel7{{.*}}, !"minctasm", |

