summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll
diff options
context:
space:
mode:
authorRyan Taylor <rtayl@amd.com>2019-03-19 16:07:00 +0000
committerRyan Taylor <rtayl@amd.com>2019-03-19 16:07:00 +0000
commit00e063ab92345a00fe89b27c857ceaa281077166 (patch)
tree1ed3d628638ac18918e1b0c4c00a4714376a6bb0 /llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll
parente85f6bd64fbe0d0c21c64c60a7b34f7e173d009b (diff)
downloadbcm5719-llvm-00e063ab92345a00fe89b27c857ceaa281077166.tar.gz
bcm5719-llvm-00e063ab92345a00fe89b27c857ceaa281077166.zip
[AMDGPU] Add buffer/load 8/16 bit overloaded intrinsics
Summary: Add buffer store/load 8/16 overloaded intrinsics for buffer, raw_buffer and struct_buffer Change-Id: I166a29f071b2ff4e4683fb0392564b1f223ac61d Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59265 llvm-svn: 356465
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll58
1 files changed, 58 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll
index 5484c8b86b7..4ac34286b57 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll
@@ -144,6 +144,62 @@ main_body:
ret {<4 x float>, <2 x float>, float} %r2
}
+;CHECK-LABEL: {{^}}struct_buffer_load_ubyte:
+;CHECK-NEXT: %bb.
+;CHECK-NEXT: buffer_load_ubyte v{{[0-9]}}, v[0:1], s[0:3], 0 idxen offen
+;CHECK: s_waitcnt vmcnt(0)
+;CHECK-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
+;CHECK-NEXT: ; return to shader part epilog
+define amdgpu_ps float @struct_buffer_load_ubyte(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) {
+main_body:
+ %tmp = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 0, i32 0)
+ %tmp2 = zext i8 %tmp to i32
+ %val = uitofp i32 %tmp2 to float
+ ret float %val
+}
+
+;CHECK-LABEL: {{^}}struct_buffer_load_ushort:
+;CHECK-NEXT: %bb.
+;CHECK-NEXT: buffer_load_ushort v{{[0-9]}}, v[0:1], s[0:3], 0 idxen offen
+;CHECK-NEXT: s_waitcnt vmcnt(0)
+;CHECK-NEXT: v_cvt_f32_u32_e32 v0, v0
+;CHECK-NEXT: ; return to shader part epilog
+define amdgpu_ps float @struct_buffer_load_ushort(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) {
+main_body:
+ %tmp = call i16 @llvm.amdgcn.struct.buffer.load.i16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 0, i32 0)
+ %tmp2 = zext i16 %tmp to i32
+ %val = uitofp i32 %tmp2 to float
+ ret float %val
+}
+
+;CHECK-LABEL: {{^}}struct_buffer_load_sbyte:
+;CHECK-NEXT: %bb.
+;CHECK-NEXT: buffer_load_sbyte v{{[0-9]}}, v[0:1], s[0:3], 0 idxen offen
+;CHECK-NEXT: s_waitcnt vmcnt(0)
+;CHECK-NEXT: v_cvt_f32_i32_e32 v0, v0
+;CHECK-NEXT: ; return to shader part epilog
+define amdgpu_ps float @struct_buffer_load_sbyte(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) {
+main_body:
+ %tmp = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 0, i32 0)
+ %tmp2 = sext i8 %tmp to i32
+ %val = sitofp i32 %tmp2 to float
+ ret float %val
+}
+
+;CHECK-LABEL: {{^}}struct_buffer_load_sshort:
+;CHECK-NEXT: %bb.
+;CHECK-NEXT: buffer_load_sshort v{{[0-9]}}, v[0:1], s[0:3], 0 idxen offen
+;CHECK-NEXT: s_waitcnt vmcnt(0)
+;CHECK-NEXT: v_cvt_f32_i32_e32 v0, v0
+;CHECK-NEXT: ; return to shader part epilog
+define amdgpu_ps float @struct_buffer_load_sshort(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) {
+main_body:
+ %tmp = call i16 @llvm.amdgcn.struct.buffer.load.i16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 0, i32 0)
+ %tmp2 = sext i16 %tmp to i32
+ %val = sitofp i32 %tmp2 to float
+ ret float %val
+}
+
declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32) #0
declare <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32>, i32, i32, i32, i32) #0
declare <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) #0
@@ -151,5 +207,7 @@ declare i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32>, i32, i32, i32, i32) #
declare <2 x i32> @llvm.amdgcn.struct.buffer.load.v2i32(<4 x i32>, i32, i32, i32, i32) #0
declare <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32>, i32, i32, i32, i32) #0
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
+declare i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32>, i32, i32, i32, i32) #0
+declare i16 @llvm.amdgcn.struct.buffer.load.i16(<4 x i32>, i32, i32, i32, i32) #0
attributes #0 = { nounwind readonly }
OpenPOWER on IntegriCloud