diff options
author | Ryan Taylor <rtayl@amd.com> | 2019-03-19 16:07:00 +0000 |
---|---|---|
committer | Ryan Taylor <rtayl@amd.com> | 2019-03-19 16:07:00 +0000 |
commit | 00e063ab92345a00fe89b27c857ceaa281077166 (patch) | |
tree | 1ed3d628638ac18918e1b0c4c00a4714376a6bb0 /llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll | |
parent | e85f6bd64fbe0d0c21c64c60a7b34f7e173d009b (diff) | |
download | bcm5719-llvm-00e063ab92345a00fe89b27c857ceaa281077166.tar.gz bcm5719-llvm-00e063ab92345a00fe89b27c857ceaa281077166.zip |
[AMDGPU] Add buffer/load 8/16 bit overloaded intrinsics
Summary:
Add buffer store/load 8/16 overloaded intrinsics for buffer, raw_buffer and struct_buffer
Change-Id: I166a29f071b2ff4e4683fb0392564b1f223ac61d
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59265
llvm-svn: 356465
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll index 5484c8b86b7..4ac34286b57 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll @@ -144,6 +144,62 @@ main_body: ret {<4 x float>, <2 x float>, float} %r2 } +;CHECK-LABEL: {{^}}struct_buffer_load_ubyte: +;CHECK-NEXT: %bb. +;CHECK-NEXT: buffer_load_ubyte v{{[0-9]}}, v[0:1], s[0:3], 0 idxen offen +;CHECK: s_waitcnt vmcnt(0) +;CHECK-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 +;CHECK-NEXT: ; return to shader part epilog +define amdgpu_ps float @struct_buffer_load_ubyte(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) { +main_body: + %tmp = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 0, i32 0) + %tmp2 = zext i8 %tmp to i32 + %val = uitofp i32 %tmp2 to float + ret float %val +} + +;CHECK-LABEL: {{^}}struct_buffer_load_ushort: +;CHECK-NEXT: %bb. +;CHECK-NEXT: buffer_load_ushort v{{[0-9]}}, v[0:1], s[0:3], 0 idxen offen +;CHECK-NEXT: s_waitcnt vmcnt(0) +;CHECK-NEXT: v_cvt_f32_u32_e32 v0, v0 +;CHECK-NEXT: ; return to shader part epilog +define amdgpu_ps float @struct_buffer_load_ushort(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) { +main_body: + %tmp = call i16 @llvm.amdgcn.struct.buffer.load.i16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 0, i32 0) + %tmp2 = zext i16 %tmp to i32 + %val = uitofp i32 %tmp2 to float + ret float %val +} + +;CHECK-LABEL: {{^}}struct_buffer_load_sbyte: +;CHECK-NEXT: %bb. +;CHECK-NEXT: buffer_load_sbyte v{{[0-9]}}, v[0:1], s[0:3], 0 idxen offen +;CHECK-NEXT: s_waitcnt vmcnt(0) +;CHECK-NEXT: v_cvt_f32_i32_e32 v0, v0 +;CHECK-NEXT: ; return to shader part epilog +define amdgpu_ps float @struct_buffer_load_sbyte(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) { +main_body: + %tmp = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 0, i32 0) + %tmp2 = sext i8 %tmp to i32 + %val = sitofp i32 %tmp2 to float + ret float %val +} + +;CHECK-LABEL: {{^}}struct_buffer_load_sshort: +;CHECK-NEXT: %bb. +;CHECK-NEXT: buffer_load_sshort v{{[0-9]}}, v[0:1], s[0:3], 0 idxen offen +;CHECK-NEXT: s_waitcnt vmcnt(0) +;CHECK-NEXT: v_cvt_f32_i32_e32 v0, v0 +;CHECK-NEXT: ; return to shader part epilog +define amdgpu_ps float @struct_buffer_load_sshort(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) { +main_body: + %tmp = call i16 @llvm.amdgcn.struct.buffer.load.i16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 0, i32 0) + %tmp2 = sext i16 %tmp to i32 + %val = sitofp i32 %tmp2 to float + ret float %val +} + declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32) #0 declare <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32>, i32, i32, i32, i32) #0 declare <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) #0 @@ -151,5 +207,7 @@ declare i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32>, i32, i32, i32, i32) # declare <2 x i32> @llvm.amdgcn.struct.buffer.load.v2i32(<4 x i32>, i32, i32, i32, i32) #0 declare <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32>, i32, i32, i32, i32) #0 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 +declare i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32>, i32, i32, i32, i32) #0 +declare i16 @llvm.amdgcn.struct.buffer.load.i16(<4 x i32>, i32, i32, i32, i32) #0 attributes #0 = { nounwind readonly } |