diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll | 71 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll | 48 |
2 files changed, 80 insertions, 39 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll index d8ee315cfb8..c6222f426b3 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll @@ -2,15 +2,15 @@ ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ;CHECK-LABEL: {{^}}buffer_load: -;CHECK: buffer_load_format_xyzw v[0:3], s[0:3], s4 -;CHECK: buffer_load_format_xyzw v[4:7], s[0:3], s4 glc -;CHECK: buffer_load_format_xyzw v[8:11], s[0:3], s4 slc +;CHECK: buffer_load_format_xyzw v[0:3], s[0:3], 0 +;CHECK: buffer_load_format_xyzw v[4:7], s[0:3], 0 glc +;CHECK: buffer_load_format_xyzw v[8:11], s[0:3], 0 slc ;CHECK: s_waitcnt -define {<4 x float>, <4 x float>, <4 x float>} @buffer_load(<4 x i32> inreg, i32 inreg) #0 { +define {<4 x float>, <4 x float>, <4 x float>} @buffer_load(<4 x i32> inreg) #0 { main_body: - %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 %1, i32 0, i32 0, i32 0, i1 0, i1 0) - %data_glc = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 %1, i32 0, i32 0, i32 0, i1 1, i1 0) - %data_slc = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 %1, i32 0, i32 0, i32 0, i1 0, i1 1) + %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 0, i1 0, i1 0) + %data_glc = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 0, i1 1, i1 0) + %data_slc = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 0, i1 0, i1 1) %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %data, 0 %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %data_glc, 1 %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %data_slc, 2 @@ -18,11 +18,42 @@ main_body: } ;CHECK-LABEL: {{^}}buffer_load_immoffs: -;CHECK: buffer_load_format_xyzw v[0:3], s[0:3], s4 offset:42 +;CHECK: buffer_load_format_xyzw v[0:3], s[0:3], 0 offset:42 ;CHECK: s_waitcnt -define <4 x float> @buffer_load_immoffs(<4 x i32> inreg, i32 inreg) #0 { +define <4 x float> @buffer_load_immoffs(<4 x i32> inreg) #0 { main_body: - %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 %1, i32 42, i32 0, i32 0, i1 0, i1 0) + %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 42, i1 0, i1 0) + ret <4 x float> %data +} + +;CHECK-LABEL: {{^}}buffer_load_immoffs_large: +;CHECK-DAG: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, s[0:3], 61 offset:4095 +;CHECK-DAG: s_movk_i32 [[OFS1:s[0-9]+]], 0x7fff +;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, s[0:3], [[OFS1]] offset:4093 +;CHECK: s_mov_b32 [[OFS2:s[0-9]+]], 0x8fff +;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, s[0:3], [[OFS2]] offset:1 +;CHECK: s_waitcnt +define <4 x float> @buffer_load_immoffs_large(<4 x i32> inreg) #0 { +main_body: + %d.0 = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 4156, i1 0, i1 0) + %d.1 = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 36860, i1 0, i1 0) + %d.2 = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 36864, i1 0, i1 0) + %d.3 = fadd <4 x float> %d.0, %d.1 + %data = fadd <4 x float> %d.2, %d.3 + ret <4 x float> %data +} + +;CHECK-LABEL: {{^}}buffer_load_immoffs_reuse: +;CHECK: s_movk_i32 [[OFS:s[0-9]+]], 0xfff +;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, s[0:3], [[OFS]] offset:65 +;CHECK-NOT: s_mov +;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, s[0:3], [[OFS]] offset:81 +;CHECK: s_waitcnt +define <4 x float> @buffer_load_immoffs_reuse(<4 x i32> inreg) #0 { +main_body: + %d.0 = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 4160, i1 0, i1 0) + %d.1 = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 4176, i1 0, i1 0) + %data = fadd <4 x float> %d.0, %d.1 ret <4 x float> %data } @@ -31,7 +62,7 @@ main_body: ;CHECK: s_waitcnt define <4 x float> @buffer_load_idx(<4 x i32> inreg, i32) #0 { main_body: - %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 0, i32 %1, i32 0, i1 0, i1 0) + %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 %1, i32 0, i1 0, i1 0) ret <4 x float> %data } @@ -40,7 +71,17 @@ main_body: ;CHECK: s_waitcnt define <4 x float> @buffer_load_ofs(<4 x i32> inreg, i32) #0 { main_body: - %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 0, i32 0, i32 %1, i1 0, i1 0) + %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 %1, i1 0, i1 0) + ret <4 x float> %data +} + +;CHECK-LABEL: {{^}}buffer_load_ofs_imm: +;CHECK: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 offen offset:58 +;CHECK: s_waitcnt +define <4 x float> @buffer_load_ofs_imm(<4 x i32> inreg, i32) #0 { +main_body: + %ofs = add i32 %1, 58 + %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 %ofs, i1 0, i1 0) ret <4 x float> %data } @@ -49,7 +90,7 @@ main_body: ;CHECK: s_waitcnt define <4 x float> @buffer_load_both(<4 x i32> inreg, i32, i32) #0 { main_body: - %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 0, i32 %1, i32 %2, i1 0, i1 0) + %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 %1, i32 %2, i1 0, i1 0) ret <4 x float> %data } @@ -59,11 +100,11 @@ main_body: ;CHECK: s_waitcnt define <4 x float> @buffer_load_both_reversed(<4 x i32> inreg, i32, i32) #0 { main_body: - %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 0, i32 %2, i32 %1, i1 0, i1 0) + %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 %2, i32 %1, i1 0, i1 0) ret <4 x float> %data } -declare <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32>, i32, i32, i32, i32, i1, i1) #1 +declare <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32>, i32, i32, i1, i1) #1 attributes #0 = { "ShaderType"="0" } attributes #1 = { nounwind readonly } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll index 87e6e6da32c..7e254efdcca 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll @@ -2,55 +2,55 @@ ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ;CHECK-LABEL: {{^}}buffer_store: -;CHECK: buffer_store_format_xyzw v[0:3], s[0:3], s4 -;CHECK: buffer_store_format_xyzw v[4:7], s[0:3], s4 glc -;CHECK: buffer_store_format_xyzw v[8:11], s[0:3], s4 slc -define void @buffer_store(<4 x i32> inreg, i32 inreg, <4 x float>, <4 x float>, <4 x float>) #0 { +;CHECK: buffer_store_format_xyzw v[0:3], s[0:3], 0 +;CHECK: buffer_store_format_xyzw v[4:7], s[0:3], 0 glc +;CHECK: buffer_store_format_xyzw v[8:11], s[0:3], 0 slc +define void @buffer_store(<4 x i32> inreg, <4 x float>, <4 x float>, <4 x float>) #0 { main_body: - call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %2, <4 x i32> %0, i32 %1, i32 0, i32 0, i32 0, i1 0, i1 0) - call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %3, <4 x i32> %0, i32 %1, i32 0, i32 0, i32 0, i1 1, i1 0) - call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %4, <4 x i32> %0, i32 %1, i32 0, i32 0, i32 0, i1 0, i1 1) + call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 0, i1 0, i1 0) + call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %2, <4 x i32> %0, i32 0, i32 0, i1 1, i1 0) + call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %3, <4 x i32> %0, i32 0, i32 0, i1 0, i1 1) ret void } ;CHECK-LABEL: {{^}}buffer_store_immoffs: -;CHECK: buffer_store_format_xyzw v[0:3], s[0:3], s4 offset:42 -define void @buffer_store_immoffs(<4 x i32> inreg, i32 inreg, <4 x float>) #0 { +;CHECK: buffer_store_format_xyzw v[0:3], s[0:3], 0 offset:42 +define void @buffer_store_immoffs(<4 x i32> inreg, <4 x float>) #0 { main_body: - call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %2, <4 x i32> %0, i32 %1, i32 42, i32 0, i32 0, i1 0, i1 0) + call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 42, i1 0, i1 0) ret void } ;CHECK-LABEL: {{^}}buffer_store_idx: ;CHECK: buffer_store_format_xyzw v[0:3], v4, s[0:3], 0 idxen -define void @buffer_store_idx(<4 x i32> inreg, i32 inreg, <4 x float>, i32) #0 { +define void @buffer_store_idx(<4 x i32> inreg, <4 x float>, i32) #0 { main_body: - call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %2, <4 x i32> %0, i32 0, i32 0, i32 %3, i32 0, i1 0, i1 0) + call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 %2, i32 0, i1 0, i1 0) ret void } ;CHECK-LABEL: {{^}}buffer_store_ofs: ;CHECK: buffer_store_format_xyzw v[0:3], v4, s[0:3], 0 offen -define void @buffer_store_ofs(<4 x i32> inreg, i32 inreg, <4 x float>, i32) #0 { +define void @buffer_store_ofs(<4 x i32> inreg, <4 x float>, i32) #0 { main_body: - call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %2, <4 x i32> %0, i32 0, i32 0, i32 0, i32 %3, i1 0, i1 0) + call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 %2, i1 0, i1 0) ret void } ;CHECK-LABEL: {{^}}buffer_store_both: ;CHECK: buffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 idxen offen -define void @buffer_store_both(<4 x i32> inreg, i32 inreg, <4 x float>, i32, i32) #0 { +define void @buffer_store_both(<4 x i32> inreg, <4 x float>, i32, i32) #0 { main_body: - call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %2, <4 x i32> %0, i32 0, i32 0, i32 %3, i32 %4, i1 0, i1 0) + call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 %2, i32 %3, i1 0, i1 0) ret void } ;CHECK-LABEL: {{^}}buffer_store_both_reversed: ;CHECK: v_mov_b32_e32 v6, v4 ;CHECK: buffer_store_format_xyzw v[0:3], v[5:6], s[0:3], 0 idxen offen -define void @buffer_store_both_reversed(<4 x i32> inreg, i32 inreg, <4 x float>, i32, i32) #0 { +define void @buffer_store_both_reversed(<4 x i32> inreg, <4 x float>, i32, i32) #0 { main_body: - call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %2, <4 x i32> %0, i32 0, i32 0, i32 %4, i32 %3, i1 0, i1 0) + call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 %3, i32 %2, i1 0, i1 0) ret void } @@ -62,16 +62,16 @@ main_body: ;CHECK: buffer_load_format_xyzw v[0:3], v5, s[0:3], 0 idxen ;CHECK: s_waitcnt vmcnt(0) ;CHECK: buffer_store_format_xyzw v[0:3], v6, s[0:3], 0 idxen -define void @buffer_store_wait(<4 x i32> inreg, i32 inreg, <4 x float>, i32, i32, i32) #0 { +define void @buffer_store_wait(<4 x i32> inreg, <4 x float>, i32, i32, i32) #0 { main_body: - call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %2, <4 x i32> %0, i32 0, i32 0, i32 %3, i32 0, i1 0, i1 0) - %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 0, i32 %4, i32 0, i1 0, i1 0) - call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %data, <4 x i32> %0, i32 0, i32 0, i32 %5, i32 0, i1 0, i1 0) + call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 %2, i32 0, i1 0, i1 0) + %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 %3, i32 0, i1 0, i1 0) + call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %data, <4 x i32> %0, i32 %4, i32 0, i1 0, i1 0) ret void } -declare void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32, i1, i1) #1 -declare <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32>, i32, i32, i32, i32, i1, i1) #2 +declare void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #1 +declare <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32>, i32, i32, i1, i1) #2 attributes #0 = { "ShaderType"="0" } attributes #1 = { nounwind } |

