diff options
| author | Artem Belevich <tra@google.com> | 2016-05-26 17:02:56 +0000 |
|---|---|---|
| committer | Artem Belevich <tra@google.com> | 2016-05-26 17:02:56 +0000 |
| commit | 49e9a8123676041262c59ca1d28c42424b2c4c55 (patch) | |
| tree | 5e126bc7856663d99f18fa0433563e016a9691d3 /llvm/test | |
| parent | 6edc135d0f4e4a5636bf0707971b6e619d5dc0c6 (diff) | |
| download | bcm5719-llvm-49e9a8123676041262c59ca1d28c42424b2c4c55.tar.gz bcm5719-llvm-49e9a8123676041262c59ca1d28c42424b2c4c55.zip | |
[NVPTX] Added NVVMIntrRange pass
NVVMIntrRange adds !range metadata to calls of NVVM intrinsics
that return values within known limited range.
This allows LLVM to generate optimal code for indexing arrays
based on tid/ctaid which is a frequently used pattern in CUDA code.
Differential Revision: http://reviews.llvm.org/D20644
llvm-svn: 270872
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/NVPTX/intrinsic-old.ll | 67 |
1 files changed, 54 insertions, 13 deletions
diff --git a/llvm/test/CodeGen/NVPTX/intrinsic-old.ll b/llvm/test/CodeGen/NVPTX/intrinsic-old.ll index 3c51776c0ec..5c73f44d075 100644 --- a/llvm/test/CodeGen/NVPTX/intrinsic-old.ll +++ b/llvm/test/CodeGen/NVPTX/intrinsic-old.ll @@ -1,8 +1,14 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -nvvm-intr-range \ +; RUN: | FileCheck --check-prefix=RANGE --check-prefix=RANGE_20 %s +; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda \ +; RUN: -nvvm-intr-range -nvvm-intr-range-sm=30 \ +; RUN: | FileCheck --check-prefix=RANGE --check-prefix=RANGE_30 %s define ptx_device i32 @test_tid_x() { ; CHECK: mov.u32 %r{{[0-9]+}}, %tid.x; +; RANGE: call i32 @llvm.ptx.read.tid.x(), !range ![[BLK_IDX_XY:[0-9]+]] ; CHECK: ret; %x = call i32 @llvm.ptx.read.tid.x() ret i32 %x @@ -10,6 +16,7 @@ define ptx_device i32 @test_tid_x() { define ptx_device i32 @test_tid_y() { ; CHECK: mov.u32 %r{{[0-9]+}}, %tid.y; +; RANGE: call i32 @llvm.ptx.read.tid.y(), !range ![[BLK_IDX_XY]] ; CHECK: ret; %x = call i32 @llvm.ptx.read.tid.y() ret i32 %x @@ -17,6 +24,7 @@ define ptx_device i32 @test_tid_y() { define ptx_device i32 @test_tid_z() { ; CHECK: mov.u32 %r{{[0-9]+}}, %tid.z; +; RANGE: call i32 @llvm.ptx.read.tid.z(), !range ![[BLK_IDX_Z:[0-9]+]] ; CHECK: ret; %x = call i32 @llvm.ptx.read.tid.z() ret i32 %x @@ -31,6 +39,7 @@ define ptx_device i32 @test_tid_w() { define ptx_device i32 @test_ntid_x() { ; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.x; +; RANGE: call i32 @llvm.ptx.read.ntid.x(), !range ![[BLK_SIZE_XY:[0-9]+]] ; CHECK: ret; %x = call i32 @llvm.ptx.read.ntid.x() ret i32 %x @@ -38,6 +47,7 @@ define ptx_device i32 @test_ntid_x() { define ptx_device i32 @test_ntid_y() { ; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.y; +; RANGE: call i32 @llvm.ptx.read.ntid.y(), !range ![[BLK_SIZE_XY]] ; CHECK: ret; %x = call i32 @llvm.ptx.read.ntid.y() ret i32 %x @@ -45,6 +55,7 @@ define ptx_device i32 @test_ntid_y() { define ptx_device i32 @test_ntid_z() { ; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.z; +; RANGE: call i32 @llvm.ptx.read.ntid.z(), !range ![[BLK_SIZE_Z:[0-9]+]] ; CHECK: ret; %x = call i32 @llvm.ptx.read.ntid.z() ret i32 %x @@ -59,11 +70,20 @@ define ptx_device i32 @test_ntid_w() { define ptx_device i32 @test_laneid() { ; CHECK: mov.u32 %r{{[0-9]+}}, %laneid; +; RANGE: call i32 @llvm.ptx.read.laneid(), !range ![[LANEID:[0-9]+]] ; CHECK: ret; %x = call i32 @llvm.ptx.read.laneid() ret i32 %x } +define ptx_device i32 @test_warpsize() { +; CHECK: mov.u32 %r{{[0-9]+}}, WARP_SZ; +; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.warpsize(), !range ![[WARPSIZE:[0-9]+]] +; CHECK: ret; + %x = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() + ret i32 %x +} + define ptx_device i32 @test_warpid() { ; CHECK: mov.u32 %r{{[0-9]+}}, %warpid; ; CHECK: ret; @@ -78,15 +98,9 @@ define ptx_device i32 @test_nwarpid() { ret i32 %x } -define ptx_device i32 @test_ctaid_x() { -; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.x; -; CHECK: ret; - %x = call i32 @llvm.ptx.read.ctaid.x() - ret i32 %x -} - define ptx_device i32 @test_ctaid_y() { ; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.y; +; RANGE: call i32 @llvm.ptx.read.ctaid.y(), !range ![[GRID_IDX_YZ:[0-9]+]] ; CHECK: ret; %x = call i32 @llvm.ptx.read.ctaid.y() ret i32 %x @@ -94,27 +108,31 @@ define ptx_device i32 @test_ctaid_y() { define ptx_device i32 @test_ctaid_z() { ; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.z; +; RANGE: call i32 @llvm.ptx.read.ctaid.z(), !range ![[GRID_IDX_YZ]] ; CHECK: ret; %x = call i32 @llvm.ptx.read.ctaid.z() ret i32 %x } -define ptx_device i32 @test_ctaid_w() { -; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.w; +define ptx_device i32 @test_ctaid_x() { +; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.x; +; RANGE_30: call i32 @llvm.ptx.read.ctaid.x(), !range ![[GRID_IDX_X:[0-9]+]] +; RANGE_20: call i32 @llvm.ptx.read.ctaid.x(), !range ![[GRID_IDX_YZ]] ; CHECK: ret; - %x = call i32 @llvm.ptx.read.ctaid.w() + %x = call i32 @llvm.ptx.read.ctaid.x() ret i32 %x } -define ptx_device i32 @test_nctaid_x() { -; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.x; +define ptx_device i32 @test_ctaid_w() { +; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.w; ; CHECK: ret; - %x = call i32 @llvm.ptx.read.nctaid.x() + %x = call i32 @llvm.ptx.read.ctaid.w() ret i32 %x } define ptx_device i32 @test_nctaid_y() { ; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.y; +; RANGE: call i32 @llvm.ptx.read.nctaid.y(), !range ![[GRID_SIZE_YZ:[0-9]+]] ; CHECK: ret; %x = call i32 @llvm.ptx.read.nctaid.y() ret i32 %x @@ -122,11 +140,22 @@ define ptx_device i32 @test_nctaid_y() { define ptx_device i32 @test_nctaid_z() { ; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.z; +; RANGE: call i32 @llvm.ptx.read.nctaid.z(), !range ![[GRID_SIZE_YZ]] ; CHECK: ret; %x = call i32 @llvm.ptx.read.nctaid.z() ret i32 %x } +define ptx_device i32 @test_nctaid_x() { +; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.x; +; RANGE_30: call i32 @llvm.ptx.read.nctaid.x(), !range ![[GRID_SIZE_X:[0-9]+]] +; RANGE_20: call i32 @llvm.ptx.read.nctaid.x(), !range ![[GRID_SIZE_YZ]] +; CHECK: ret; + %x = call i32 @llvm.ptx.read.nctaid.x() + ret i32 %x +} + + define ptx_device i32 @test_nctaid_w() { ; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.w; ; CHECK: ret; @@ -248,6 +277,7 @@ declare i32 @llvm.ptx.read.ntid.y() declare i32 @llvm.ptx.read.ntid.z() declare i32 @llvm.ptx.read.ntid.w() +declare i32 @llvm.nvvm.read.ptx.sreg.warpsize() declare i32 @llvm.ptx.read.laneid() declare i32 @llvm.ptx.read.warpid() declare i32 @llvm.ptx.read.nwarpid() @@ -280,3 +310,14 @@ declare i32 @llvm.ptx.read.pm2() declare i32 @llvm.ptx.read.pm3() declare void @llvm.ptx.bar.sync(i32 %i) + +; RANGE-DAG: ![[BLK_IDX_XY]] = !{i32 0, i32 1024} +; RANGE-DAG: ![[BLK_IDX_Z]] = !{i32 0, i32 64} +; RANGE-DAG: ![[BLK_SIZE_XY]] = !{i32 1, i32 1025} +; RANGE-DAG: ![[BLK_SIZE_Z]] = !{i32 1, i32 65} +; RANGE-DAG: ![[LANEID]] = !{i32 0, i32 32} +; RANGE-DAG: ![[WARPSIZE]] = !{i32 32, i32 33} +; RANGE_30-DAG: ![[GRID_IDX_X]] = !{i32 0, i32 2147483647} +; RANGE-DAG: ![[GRID_IDX_YZ]] = !{i32 0, i32 65535} +; RANGE_30-DAG: ![[GRID_SIZE_X]] = !{i32 1, i32 -2147483648} +; RANGE-DAG: ![[GRID_SIZE_YZ]] = !{i32 1, i32 65536} |

