[AMDGPU] Add buffer/load 8/16 bit overloaded intrinsics

Summary: Add buffer store/load 8/16 overloaded intrinsics for buffer, raw_buffer and struct_buffer Change-Id: I166a29f071b2ff4e4683fb0392564b1f223ac61d Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59265 llvm-svn: 356465
author: Ryan Taylor <rtayl@amd.com> 2019-03-19 16:07:00 +0000
committer: Ryan Taylor <rtayl@amd.com> 2019-03-19 16:07:00 +0000
commit: 00e063ab92345a00fe89b27c857ceaa281077166 (patch)
tree: 1ed3d628638ac18918e1b0c4c00a4714376a6bb0 /llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll
parent: e85f6bd64fbe0d0c21c64c60a7b34f7e173d009b (diff)
download: bcm5719-llvm-00e063ab92345a00fe89b27c857ceaa281077166.tar.gz
bcm5719-llvm-00e063ab92345a00fe89b27c857ceaa281077166.zip
1 files changed, 58 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll
index 5484c8b86b7..4ac34286b57 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll
@@ -144,6 +144,62 @@ main_body:
   ret {<4 x float>, <2 x float>, float} %r2
 }
 
+;CHECK-LABEL: {{^}}struct_buffer_load_ubyte:
+;CHECK-NEXT: %bb.
+;CHECK-NEXT: buffer_load_ubyte v{{[0-9]}}, v[0:1], s[0:3], 0 idxen offen
+;CHECK: s_waitcnt vmcnt(0)
+;CHECK-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
+;CHECK-NEXT: ; return to shader part epilog
+define amdgpu_ps float @struct_buffer_load_ubyte(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) {
+main_body:
+  %tmp = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 0, i32 0)
+  %tmp2 = zext i8 %tmp to i32
+  %val = uitofp i32 %tmp2 to float
+  ret float %val
+}
+
+;CHECK-LABEL: {{^}}struct_buffer_load_ushort:
+;CHECK-NEXT: %bb.
+;CHECK-NEXT: buffer_load_ushort v{{[0-9]}}, v[0:1], s[0:3], 0 idxen offen
+;CHECK-NEXT: s_waitcnt vmcnt(0)
+;CHECK-NEXT: v_cvt_f32_u32_e32 v0, v0
+;CHECK-NEXT: ; return to shader part epilog
+define amdgpu_ps float @struct_buffer_load_ushort(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) {
+main_body:
+  %tmp = call i16 @llvm.amdgcn.struct.buffer.load.i16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 0, i32 0)
+  %tmp2 = zext i16 %tmp to i32
+  %val = uitofp i32 %tmp2 to float
+  ret float %val
+}
+
+;CHECK-LABEL: {{^}}struct_buffer_load_sbyte:
+;CHECK-NEXT: %bb.
+;CHECK-NEXT: buffer_load_sbyte v{{[0-9]}}, v[0:1], s[0:3], 0 idxen offen
+;CHECK-NEXT: s_waitcnt vmcnt(0)
+;CHECK-NEXT: v_cvt_f32_i32_e32 v0, v0
+;CHECK-NEXT: ; return to shader part epilog
+define amdgpu_ps float @struct_buffer_load_sbyte(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) {
+main_body:
+  %tmp = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 0, i32 0)
+  %tmp2 = sext i8 %tmp to i32
+  %val = sitofp i32 %tmp2 to float
+  ret float %val
+}
+
+;CHECK-LABEL: {{^}}struct_buffer_load_sshort:
+;CHECK-NEXT: %bb.
+;CHECK-NEXT: buffer_load_sshort v{{[0-9]}}, v[0:1], s[0:3], 0 idxen offen
+;CHECK-NEXT: s_waitcnt vmcnt(0)
+;CHECK-NEXT: v_cvt_f32_i32_e32 v0, v0
+;CHECK-NEXT: ; return to shader part epilog
+define amdgpu_ps float @struct_buffer_load_sshort(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) {
+main_body:
+  %tmp = call i16 @llvm.amdgcn.struct.buffer.load.i16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 0, i32 0)
+  %tmp2 = sext i16 %tmp to i32
+  %val = sitofp i32 %tmp2 to float
+  ret float %val
+}
+
 declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32) #0
 declare <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32>, i32, i32, i32, i32) #0
 declare <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) #0
@@ -151,5 +207,7 @@ declare i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32>, i32, i32, i32, i32) #
 declare <2 x i32> @llvm.amdgcn.struct.buffer.load.v2i32(<4 x i32>, i32, i32, i32, i32) #0
 declare <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32>, i32, i32, i32, i32) #0
 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
+declare i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32>, i32, i32, i32, i32) #0
+declare i16 @llvm.amdgcn.struct.buffer.load.i16(<4 x i32>, i32, i32, i32, i32) #0
 
 attributes #0 = { nounwind readonly }
author	Ryan Taylor <rtayl@amd.com>	2019-03-19 16:07:00 +0000
committer	Ryan Taylor <rtayl@amd.com>	2019-03-19 16:07:00 +0000
commit	00e063ab92345a00fe89b27c857ceaa281077166 (patch)
tree	1ed3d628638ac18918e1b0c4c00a4714376a6bb0 /llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll
parent	e85f6bd64fbe0d0c21c64c60a7b34f7e173d009b (diff)
download	bcm5719-llvm-00e063ab92345a00fe89b27c857ceaa281077166.tar.gz bcm5719-llvm-00e063ab92345a00fe89b27c857ceaa281077166.zip