summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorArtem Belevich <tra@google.com>2016-05-26 17:02:56 +0000
committerArtem Belevich <tra@google.com>2016-05-26 17:02:56 +0000
commit49e9a8123676041262c59ca1d28c42424b2c4c55 (patch)
tree5e126bc7856663d99f18fa0433563e016a9691d3 /llvm/test
parent6edc135d0f4e4a5636bf0707971b6e619d5dc0c6 (diff)
downloadbcm5719-llvm-49e9a8123676041262c59ca1d28c42424b2c4c55.tar.gz
bcm5719-llvm-49e9a8123676041262c59ca1d28c42424b2c4c55.zip
[NVPTX] Added NVVMIntrRange pass
NVVMIntrRange adds !range metadata to calls of NVVM intrinsics that return values within known limited range. This allows LLVM to generate optimal code for indexing arrays based on tid/ctaid which is a frequently used pattern in CUDA code. Differential Revision: http://reviews.llvm.org/D20644 llvm-svn: 270872
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/NVPTX/intrinsic-old.ll67
1 files changed, 54 insertions, 13 deletions
diff --git a/llvm/test/CodeGen/NVPTX/intrinsic-old.ll b/llvm/test/CodeGen/NVPTX/intrinsic-old.ll
index 3c51776c0ec..5c73f44d075 100644
--- a/llvm/test/CodeGen/NVPTX/intrinsic-old.ll
+++ b/llvm/test/CodeGen/NVPTX/intrinsic-old.ll
@@ -1,8 +1,14 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -nvvm-intr-range \
+; RUN: | FileCheck --check-prefix=RANGE --check-prefix=RANGE_20 %s
+; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda \
+; RUN: -nvvm-intr-range -nvvm-intr-range-sm=30 \
+; RUN: | FileCheck --check-prefix=RANGE --check-prefix=RANGE_30 %s
define ptx_device i32 @test_tid_x() {
; CHECK: mov.u32 %r{{[0-9]+}}, %tid.x;
+; RANGE: call i32 @llvm.ptx.read.tid.x(), !range ![[BLK_IDX_XY:[0-9]+]]
; CHECK: ret;
%x = call i32 @llvm.ptx.read.tid.x()
ret i32 %x
@@ -10,6 +16,7 @@ define ptx_device i32 @test_tid_x() {
define ptx_device i32 @test_tid_y() {
; CHECK: mov.u32 %r{{[0-9]+}}, %tid.y;
+; RANGE: call i32 @llvm.ptx.read.tid.y(), !range ![[BLK_IDX_XY]]
; CHECK: ret;
%x = call i32 @llvm.ptx.read.tid.y()
ret i32 %x
@@ -17,6 +24,7 @@ define ptx_device i32 @test_tid_y() {
define ptx_device i32 @test_tid_z() {
; CHECK: mov.u32 %r{{[0-9]+}}, %tid.z;
+; RANGE: call i32 @llvm.ptx.read.tid.z(), !range ![[BLK_IDX_Z:[0-9]+]]
; CHECK: ret;
%x = call i32 @llvm.ptx.read.tid.z()
ret i32 %x
@@ -31,6 +39,7 @@ define ptx_device i32 @test_tid_w() {
define ptx_device i32 @test_ntid_x() {
; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.x;
+; RANGE: call i32 @llvm.ptx.read.ntid.x(), !range ![[BLK_SIZE_XY:[0-9]+]]
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ntid.x()
ret i32 %x
@@ -38,6 +47,7 @@ define ptx_device i32 @test_ntid_x() {
define ptx_device i32 @test_ntid_y() {
; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.y;
+; RANGE: call i32 @llvm.ptx.read.ntid.y(), !range ![[BLK_SIZE_XY]]
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ntid.y()
ret i32 %x
@@ -45,6 +55,7 @@ define ptx_device i32 @test_ntid_y() {
define ptx_device i32 @test_ntid_z() {
; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.z;
+; RANGE: call i32 @llvm.ptx.read.ntid.z(), !range ![[BLK_SIZE_Z:[0-9]+]]
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ntid.z()
ret i32 %x
@@ -59,11 +70,20 @@ define ptx_device i32 @test_ntid_w() {
define ptx_device i32 @test_laneid() {
; CHECK: mov.u32 %r{{[0-9]+}}, %laneid;
+; RANGE: call i32 @llvm.ptx.read.laneid(), !range ![[LANEID:[0-9]+]]
; CHECK: ret;
%x = call i32 @llvm.ptx.read.laneid()
ret i32 %x
}
+define ptx_device i32 @test_warpsize() {
+; CHECK: mov.u32 %r{{[0-9]+}}, WARP_SZ;
+; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.warpsize(), !range ![[WARPSIZE:[0-9]+]]
+; CHECK: ret;
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+ ret i32 %x
+}
+
define ptx_device i32 @test_warpid() {
; CHECK: mov.u32 %r{{[0-9]+}}, %warpid;
; CHECK: ret;
@@ -78,15 +98,9 @@ define ptx_device i32 @test_nwarpid() {
ret i32 %x
}
-define ptx_device i32 @test_ctaid_x() {
-; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.x;
-; CHECK: ret;
- %x = call i32 @llvm.ptx.read.ctaid.x()
- ret i32 %x
-}
-
define ptx_device i32 @test_ctaid_y() {
; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.y;
+; RANGE: call i32 @llvm.ptx.read.ctaid.y(), !range ![[GRID_IDX_YZ:[0-9]+]]
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ctaid.y()
ret i32 %x
@@ -94,27 +108,31 @@ define ptx_device i32 @test_ctaid_y() {
define ptx_device i32 @test_ctaid_z() {
; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.z;
+; RANGE: call i32 @llvm.ptx.read.ctaid.z(), !range ![[GRID_IDX_YZ]]
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ctaid.z()
ret i32 %x
}
-define ptx_device i32 @test_ctaid_w() {
-; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.w;
+define ptx_device i32 @test_ctaid_x() {
+; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.x;
+; RANGE_30: call i32 @llvm.ptx.read.ctaid.x(), !range ![[GRID_IDX_X:[0-9]+]]
+; RANGE_20: call i32 @llvm.ptx.read.ctaid.x(), !range ![[GRID_IDX_YZ]]
; CHECK: ret;
- %x = call i32 @llvm.ptx.read.ctaid.w()
+ %x = call i32 @llvm.ptx.read.ctaid.x()
ret i32 %x
}
-define ptx_device i32 @test_nctaid_x() {
-; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.x;
+define ptx_device i32 @test_ctaid_w() {
+; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.w;
; CHECK: ret;
- %x = call i32 @llvm.ptx.read.nctaid.x()
+ %x = call i32 @llvm.ptx.read.ctaid.w()
ret i32 %x
}
define ptx_device i32 @test_nctaid_y() {
; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.y;
+; RANGE: call i32 @llvm.ptx.read.nctaid.y(), !range ![[GRID_SIZE_YZ:[0-9]+]]
; CHECK: ret;
%x = call i32 @llvm.ptx.read.nctaid.y()
ret i32 %x
@@ -122,11 +140,22 @@ define ptx_device i32 @test_nctaid_y() {
define ptx_device i32 @test_nctaid_z() {
; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.z;
+; RANGE: call i32 @llvm.ptx.read.nctaid.z(), !range ![[GRID_SIZE_YZ]]
; CHECK: ret;
%x = call i32 @llvm.ptx.read.nctaid.z()
ret i32 %x
}
+define ptx_device i32 @test_nctaid_x() {
+; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.x;
+; RANGE_30: call i32 @llvm.ptx.read.nctaid.x(), !range ![[GRID_SIZE_X:[0-9]+]]
+; RANGE_20: call i32 @llvm.ptx.read.nctaid.x(), !range ![[GRID_SIZE_YZ]]
+; CHECK: ret;
+ %x = call i32 @llvm.ptx.read.nctaid.x()
+ ret i32 %x
+}
+
+
define ptx_device i32 @test_nctaid_w() {
; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.w;
; CHECK: ret;
@@ -248,6 +277,7 @@ declare i32 @llvm.ptx.read.ntid.y()
declare i32 @llvm.ptx.read.ntid.z()
declare i32 @llvm.ptx.read.ntid.w()
+declare i32 @llvm.nvvm.read.ptx.sreg.warpsize()
declare i32 @llvm.ptx.read.laneid()
declare i32 @llvm.ptx.read.warpid()
declare i32 @llvm.ptx.read.nwarpid()
@@ -280,3 +310,14 @@ declare i32 @llvm.ptx.read.pm2()
declare i32 @llvm.ptx.read.pm3()
declare void @llvm.ptx.bar.sync(i32 %i)
+
+; RANGE-DAG: ![[BLK_IDX_XY]] = !{i32 0, i32 1024}
+; RANGE-DAG: ![[BLK_IDX_Z]] = !{i32 0, i32 64}
+; RANGE-DAG: ![[BLK_SIZE_XY]] = !{i32 1, i32 1025}
+; RANGE-DAG: ![[BLK_SIZE_Z]] = !{i32 1, i32 65}
+; RANGE-DAG: ![[LANEID]] = !{i32 0, i32 32}
+; RANGE-DAG: ![[WARPSIZE]] = !{i32 32, i32 33}
+; RANGE_30-DAG: ![[GRID_IDX_X]] = !{i32 0, i32 2147483647}
+; RANGE-DAG: ![[GRID_IDX_YZ]] = !{i32 0, i32 65535}
+; RANGE_30-DAG: ![[GRID_SIZE_X]] = !{i32 1, i32 -2147483648}
+; RANGE-DAG: ![[GRID_SIZE_YZ]] = !{i32 1, i32 65536}
OpenPOWER on IntegriCloud