AMDGPU: use ComplexPattern for offsets in llvm.amdgcn.buffer.load/store.format

Summary: We cannot easily deduce that an offset is in an SGPR, but the Mesa frontend cannot easily make use of an explicit soffset parameter either. Furthermore, it is likely that in the future, LLVM will be in a better position than the frontend to choose an SGPR offset if possible. Since there aren't any frontend uses of these intrinsics in upstream repositories yet, I would like to take this opportunity to change the intrinsic signatures to a single offset parameter, which is then selected to immediate offsets or voffsets using a ComplexPattern. Reviewers: arsenm, tstellarAMD, mareko Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D18218 llvm-svn: 263790
author: Nicolai Haehnle <nhaehnle@gmail.com> 2016-03-18 16:24:20 +0000
committer: Nicolai Haehnle <nhaehnle@gmail.com> 2016-03-18 16:24:20 +0000
commit: 3003ba00a3260bdee71dd802bcfa970c3580e6bb (patch)
tree: 2ee90f91725b6c097dbf0def5e621966580566e2 /llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll
parent: a74cd526e9b273f1ca99793e62dfb6d1396bf6b3 (diff)
download: bcm5719-llvm-3003ba00a3260bdee71dd802bcfa970c3580e6bb.tar.gz
bcm5719-llvm-3003ba00a3260bdee71dd802bcfa970c3580e6bb.zip
1 files changed, 56 insertions, 15 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll
index d8ee315cfb8..c6222f426b3 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll
@@ -2,15 +2,15 @@
 ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
 
 ;CHECK-LABEL: {{^}}buffer_load:
-;CHECK: buffer_load_format_xyzw v[0:3], s[0:3], s4
-;CHECK: buffer_load_format_xyzw v[4:7], s[0:3], s4 glc
-;CHECK: buffer_load_format_xyzw v[8:11], s[0:3], s4 slc
+;CHECK: buffer_load_format_xyzw v[0:3], s[0:3], 0
+;CHECK: buffer_load_format_xyzw v[4:7], s[0:3], 0 glc
+;CHECK: buffer_load_format_xyzw v[8:11], s[0:3], 0 slc
 ;CHECK: s_waitcnt
-define {<4 x float>, <4 x float>, <4 x float>} @buffer_load(<4 x i32> inreg, i32 inreg) #0 {
+define {<4 x float>, <4 x float>, <4 x float>} @buffer_load(<4 x i32> inreg) #0 {
 main_body:
-  %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 %1, i32 0, i32 0, i32 0, i1 0, i1 0)
-  %data_glc = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 %1, i32 0, i32 0, i32 0, i1 1, i1 0)
-  %data_slc = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 %1, i32 0, i32 0, i32 0, i1 0, i1 1)
+  %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 0, i1 0, i1 0)
+  %data_glc = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 0, i1 1, i1 0)
+  %data_slc = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 0, i1 0, i1 1)
   %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %data, 0
   %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %data_glc, 1
   %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %data_slc, 2
@@ -18,11 +18,42 @@ main_body:
 }
 
 ;CHECK-LABEL: {{^}}buffer_load_immoffs:
-;CHECK: buffer_load_format_xyzw v[0:3], s[0:3], s4 offset:42
+;CHECK: buffer_load_format_xyzw v[0:3], s[0:3], 0 offset:42
 ;CHECK: s_waitcnt
-define <4 x float> @buffer_load_immoffs(<4 x i32> inreg, i32 inreg) #0 {
+define <4 x float> @buffer_load_immoffs(<4 x i32> inreg) #0 {
 main_body:
-  %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 %1, i32 42, i32 0, i32 0, i1 0, i1 0)
+  %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 42, i1 0, i1 0)
+  ret <4 x float> %data
+}
+
+;CHECK-LABEL: {{^}}buffer_load_immoffs_large:
+;CHECK-DAG: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, s[0:3], 61 offset:4095
+;CHECK-DAG: s_movk_i32 [[OFS1:s[0-9]+]], 0x7fff
+;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, s[0:3], [[OFS1]] offset:4093
+;CHECK: s_mov_b32 [[OFS2:s[0-9]+]], 0x8fff
+;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, s[0:3], [[OFS2]] offset:1
+;CHECK: s_waitcnt
+define <4 x float> @buffer_load_immoffs_large(<4 x i32> inreg) #0 {
+main_body:
+  %d.0 = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 4156, i1 0, i1 0)
+  %d.1 = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 36860, i1 0, i1 0)
+  %d.2 = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 36864, i1 0, i1 0)
+  %d.3 = fadd <4 x float> %d.0, %d.1
+  %data = fadd <4 x float> %d.2, %d.3
+  ret <4 x float> %data
+}
+
+;CHECK-LABEL: {{^}}buffer_load_immoffs_reuse:
+;CHECK: s_movk_i32 [[OFS:s[0-9]+]], 0xfff
+;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, s[0:3], [[OFS]] offset:65
+;CHECK-NOT: s_mov
+;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, s[0:3], [[OFS]] offset:81
+;CHECK: s_waitcnt
+define <4 x float> @buffer_load_immoffs_reuse(<4 x i32> inreg) #0 {
+main_body:
+  %d.0 = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 4160, i1 0, i1 0)
+  %d.1 = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 4176, i1 0, i1 0)
+  %data = fadd <4 x float> %d.0, %d.1
   ret <4 x float> %data
 }
 
@@ -31,7 +62,7 @@ main_body:
 ;CHECK: s_waitcnt
 define <4 x float> @buffer_load_idx(<4 x i32> inreg, i32) #0 {
 main_body:
-  %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 0, i32 %1, i32 0, i1 0, i1 0)
+  %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 %1, i32 0, i1 0, i1 0)
   ret <4 x float> %data
 }
 
@@ -40,7 +71,17 @@ main_body:
 ;CHECK: s_waitcnt
 define <4 x float> @buffer_load_ofs(<4 x i32> inreg, i32) #0 {
 main_body:
-  %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 0, i32 0, i32 %1, i1 0, i1 0)
+  %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 %1, i1 0, i1 0)
+  ret <4 x float> %data
+}
+
+;CHECK-LABEL: {{^}}buffer_load_ofs_imm:
+;CHECK: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 offen offset:58
+;CHECK: s_waitcnt
+define <4 x float> @buffer_load_ofs_imm(<4 x i32> inreg, i32) #0 {
+main_body:
+  %ofs = add i32 %1, 58
+  %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 %ofs, i1 0, i1 0)
   ret <4 x float> %data
 }
 
@@ -49,7 +90,7 @@ main_body:
 ;CHECK: s_waitcnt
 define <4 x float> @buffer_load_both(<4 x i32> inreg, i32, i32) #0 {
 main_body:
-  %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 0, i32 %1, i32 %2, i1 0, i1 0)
+  %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 %1, i32 %2, i1 0, i1 0)
   ret <4 x float> %data
 }
 
@@ -59,11 +100,11 @@ main_body:
 ;CHECK: s_waitcnt
 define <4 x float> @buffer_load_both_reversed(<4 x i32> inreg, i32, i32) #0 {
 main_body:
-  %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 0, i32 %2, i32 %1, i1 0, i1 0)
+  %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 %2, i32 %1, i1 0, i1 0)
   ret <4 x float> %data
 }
 
-declare <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32>, i32, i32, i32, i32, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32>, i32, i32, i1, i1) #1
 
 attributes #0 = { "ShaderType"="0" }
 attributes #1 = { nounwind readonly }
author	Nicolai Haehnle <nhaehnle@gmail.com>	2016-03-18 16:24:20 +0000
committer	Nicolai Haehnle <nhaehnle@gmail.com>	2016-03-18 16:24:20 +0000
commit	3003ba00a3260bdee71dd802bcfa970c3580e6bb (patch)
tree	2ee90f91725b6c097dbf0def5e621966580566e2 /llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll
parent	a74cd526e9b273f1ca99793e62dfb6d1396bf6b3 (diff)
download	bcm5719-llvm-3003ba00a3260bdee71dd802bcfa970c3580e6bb.tar.gz bcm5719-llvm-3003ba00a3260bdee71dd802bcfa970c3580e6bb.zip