diff options
Diffstat (limited to 'llvm/test/CodeGen')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/coalescer-subrange-crash.ll | 66 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.ll | 130 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.ll | 123 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.ll | 102 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.ll | 362 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.ll | 53 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll | 210 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.ll | 137 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ll | 435 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.ll | 427 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/si-lod-bias.ll | 59 |
11 files changed, 0 insertions, 2104 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-subrange-crash.ll b/llvm/test/CodeGen/AMDGPU/coalescer-subrange-crash.ll deleted file mode 100644 index ef1b3d25f88..00000000000 --- a/llvm/test/CodeGen/AMDGPU/coalescer-subrange-crash.ll +++ /dev/null @@ -1,66 +0,0 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s -; -; This testcase used to cause the following crash: -; -; *** Couldn't join subrange! -; -; UNREACHABLE executed at lib/CodeGen/RegisterCoalescer.cpp:2666! -; -; The insertelement instructions became subregister definitions: one virtual -; register was defined and re-defined by one group of the consecutive insert- -; elements, and another was defined by the second group. -; Since a copy between the two full registers was present in the program, -; the coalescer tried to merge them. The join algorithm for the main range -; decided that it was correct to do so, while the subrange join unexpectedly -; failed. This was caused by the live interval subranges not being computed -; correctly: subregister defs are not uses for the purpose of subranges. -; -; Test for a valid output: -; CHECK: image_sample_c_d_o -define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615) %arg, [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615) %arg1, [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615) %arg2, [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615) %arg3, [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615) %arg4, float inreg %arg5, i32 inreg %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <3 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, <2 x i32> %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, i32 %arg20, float %arg21, i32 %arg22) #0 { -main_body: - %i.i = extractelement <2 x i32> %arg8, i32 0 - %j.i = extractelement <2 x i32> %arg8, i32 1 - %i.f.i = bitcast i32 %i.i to float - %j.f.i = bitcast i32 %j.i to float - %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 3, i32 0, i32 %arg6) #1 - %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 3, i32 0, i32 %arg6) #1 - %tmp23 = fadd float %p2.i, 0xBFA99999A0000000 - %tmp24 = fadd float %p2.i, 0x3FA99999A0000000 - %tmp25 = bitcast float %tmp23 to i32 - %tmp26 = insertelement <16 x i32> <i32 212739, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp25, i32 1 - %tmp27 = insertelement <16 x i32> %tmp26, i32 undef, i32 2 - %tmp28 = insertelement <16 x i32> %tmp27, i32 undef, i32 3 - %tmp29 = insertelement <16 x i32> %tmp28, i32 undef, i32 4 - %tmp30 = insertelement <16 x i32> %tmp29, i32 0, i32 5 - %tmp31 = insertelement <16 x i32> %tmp30, i32 undef, i32 6 - %tmp32 = insertelement <16 x i32> %tmp31, i32 undef, i32 7 - %tmp33 = insertelement <16 x i32> %tmp32, i32 undef, i32 8 - %tmp33.bc = bitcast <16 x i32> %tmp33 to <16 x float> - %tmp34 = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.v4f32.v16f32.v8i32(<16 x float> %tmp33.bc, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 true) - %tmp35 = extractelement <4 x float> %tmp34, i32 0 - %tmp36 = bitcast float %tmp24 to i32 - %tmp37 = insertelement <16 x i32> <i32 212739, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp36, i32 1 - %tmp38 = insertelement <16 x i32> %tmp37, i32 undef, i32 2 - %tmp39 = insertelement <16 x i32> %tmp38, i32 undef, i32 3 - %tmp40 = insertelement <16 x i32> %tmp39, i32 undef, i32 4 - %tmp41 = insertelement <16 x i32> %tmp40, i32 0, i32 5 - %tmp42 = insertelement <16 x i32> %tmp41, i32 undef, i32 6 - %tmp43 = insertelement <16 x i32> %tmp42, i32 undef, i32 7 - %tmp44 = insertelement <16 x i32> %tmp43, i32 undef, i32 8 - %tmp44.bc = bitcast <16 x i32> %tmp44 to <16 x float> - %tmp45 = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.v4f32.v16f32.v8i32(<16 x float> %tmp44.bc, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 true) - %tmp46 = extractelement <4 x float> %tmp45, i32 0 - %tmp47 = fmul float %tmp35, %tmp46 - %tmp48 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, float %tmp47, 14 - %tmp49 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %tmp48, float %arg21, 24 - ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %tmp49 -} - -declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 -declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.v4f32.v16f32.v8i32(<16 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 - -attributes #0 = { nounwind } -attributes #1 = { nounwind readnone } -attributes #2 = { nounwind readonly } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.ll deleted file mode 100644 index 5dec4ad9c1e..00000000000 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.ll +++ /dev/null @@ -1,130 +0,0 @@ -;RUN: llc < %s -march=amdgcn -mcpu=verde -show-mc-encoding -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=SI -;RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=VI - -;CHECK-LABEL: {{^}}image_atomic_swap: -;CHECK-NOT: s_waitcnt -;SI: image_atomic_swap v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x3c,0xf0,0x00,0x04,0x00,0x00] -;VI: image_atomic_swap v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x40,0xf0,0x00,0x04,0x00,0x00] -;CHECK: s_waitcnt vmcnt(0) -define amdgpu_ps float @image_atomic_swap(<8 x i32> inreg, <4 x i32>, i32) { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.swap.v4i32(i32 %2, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0) - %orig.f = bitcast i32 %orig to float - ret float %orig.f -} - -;CHECK-LABEL: {{^}}image_atomic_swap_v2i32: -;CHECK-NOT: s_waitcnt -;SI: image_atomic_swap v2, v[0:1], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x3c,0xf0,0x00,0x02,0x00,0x00] -;VI: image_atomic_swap v2, v[0:1], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x40,0xf0,0x00,0x02,0x00,0x00] -;CHECK: s_waitcnt vmcnt(0) -define amdgpu_ps float @image_atomic_swap_v2i32(<8 x i32> inreg, <2 x i32>, i32) { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.swap.v2i32(i32 %2, <2 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0) - %orig.f = bitcast i32 %orig to float - ret float %orig.f -} - -;CHECK-LABEL: {{^}}image_atomic_swap_i32: -;CHECK-NOT: s_waitcnt -;SI: image_atomic_swap v1, v0, s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x3c,0xf0,0x00,0x01,0x00,0x00] -;VI: image_atomic_swap v1, v0, s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x40,0xf0,0x00,0x01,0x00,0x00] -;CHECK: s_waitcnt vmcnt(0) -define amdgpu_ps float @image_atomic_swap_i32(<8 x i32> inreg, i32, i32) { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.swap.i32(i32 %2, i32 %1, <8 x i32> %0, i1 0, i1 0, i1 0) - %orig.f = bitcast i32 %orig to float - ret float %orig.f -} - -;CHECK-LABEL: {{^}}image_atomic_cmpswap: -;CHECK-NOT: s_waitcnt -;SI: image_atomic_cmpswap v[4:5], v[0:3], s[0:7] dmask:0x3 unorm glc ; encoding: [0x00,0x33,0x40,0xf0,0x00,0x04,0x00,0x00] -;VI: image_atomic_cmpswap v[4:5], v[0:3], s[0:7] dmask:0x3 unorm glc ; encoding: [0x00,0x33,0x44,0xf0,0x00,0x04,0x00,0x00] -;CHECK: s_waitcnt vmcnt(0) -;CHECK: v_mov_b32_e32 v0, v4 -define amdgpu_ps float @image_atomic_cmpswap(<8 x i32> inreg, <4 x i32>, i32, i32) { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.v4i32(i32 %2, i32 %3, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0) - %orig.f = bitcast i32 %orig to float - ret float %orig.f -} - -;CHECK-LABEL: {{^}}image_atomic_add: -;CHECK-NOT: s_waitcnt -;SI: image_atomic_add v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x44,0xf0,0x00,0x04,0x00,0x00] -;VI: image_atomic_add v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x48,0xf0,0x00,0x04,0x00,0x00] -;CHECK: s_waitcnt vmcnt(0) -define amdgpu_ps float @image_atomic_add(<8 x i32> inreg, <4 x i32>, i32) { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.add.v4i32(i32 %2, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0) - %orig.f = bitcast i32 %orig to float - ret float %orig.f -} - -;CHECK-LABEL: {{^}}image_atomic_sub: -;CHECK-NOT: s_waitcnt -;SI: image_atomic_sub v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x48,0xf0,0x00,0x04,0x00,0x00] -;VI: image_atomic_sub v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x4c,0xf0,0x00,0x04,0x00,0x00] -;CHECK: s_waitcnt vmcnt(0) -define amdgpu_ps float @image_atomic_sub(<8 x i32> inreg, <4 x i32>, i32) { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.sub.v4i32(i32 %2, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0) - %orig.f = bitcast i32 %orig to float - ret float %orig.f -} - -;CHECK-LABEL: {{^}}image_atomic_unchanged: -;CHECK-NOT: s_waitcnt -;CHECK: image_atomic_smin v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x50,0xf0,0x00,0x04,0x00,0x00] -;CHECK: s_waitcnt vmcnt(0) -;CHECK: image_atomic_umin v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x54,0xf0,0x00,0x04,0x00,0x00] -;CHECK: s_waitcnt vmcnt(0) -;CHECK: image_atomic_smax v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x58,0xf0,0x00,0x04,0x00,0x00] -;CHECK: s_waitcnt vmcnt(0) -;CHECK: image_atomic_umax v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x5c,0xf0,0x00,0x04,0x00,0x00] -;CHECK: s_waitcnt vmcnt(0) -;CHECK: image_atomic_and v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x60,0xf0,0x00,0x04,0x00,0x00] -;CHECK: s_waitcnt vmcnt(0) -;CHECK: image_atomic_or v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x64,0xf0,0x00,0x04,0x00,0x00] -;CHECK: s_waitcnt vmcnt(0) -;CHECK: image_atomic_xor v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x68,0xf0,0x00,0x04,0x00,0x00] -;CHECK: s_waitcnt vmcnt(0) -;CHECK: image_atomic_inc v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x6c,0xf0,0x00,0x04,0x00,0x00] -;CHECK: s_waitcnt vmcnt(0) -;CHECK: image_atomic_dec v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x70,0xf0,0x00,0x04,0x00,0x00] -;CHECK: s_waitcnt vmcnt(0) -define amdgpu_ps float @image_atomic_unchanged(<8 x i32> inreg, <4 x i32>, i32) { -main_body: - %t0 = call i32 @llvm.amdgcn.image.atomic.smin.v4i32(i32 %2, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0) - %t1 = call i32 @llvm.amdgcn.image.atomic.umin.v4i32(i32 %t0, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0) - %t2 = call i32 @llvm.amdgcn.image.atomic.smax.v4i32(i32 %t1, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0) - %t3 = call i32 @llvm.amdgcn.image.atomic.umax.v4i32(i32 %t2, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0) - %t4 = call i32 @llvm.amdgcn.image.atomic.and.v4i32(i32 %t3, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0) - %t5 = call i32 @llvm.amdgcn.image.atomic.or.v4i32(i32 %t4, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0) - %t6 = call i32 @llvm.amdgcn.image.atomic.xor.v4i32(i32 %t5, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0) - %t7 = call i32 @llvm.amdgcn.image.atomic.inc.v4i32(i32 %t6, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0) - %t8 = call i32 @llvm.amdgcn.image.atomic.dec.v4i32(i32 %t7, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0) - %out = bitcast i32 %t8 to float - ret float %out -} - -declare i32 @llvm.amdgcn.image.atomic.swap.i32(i32, i32, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.swap.v2i32(i32, <2 x i32>, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.swap.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0 - -declare i32 @llvm.amdgcn.image.atomic.cmpswap.v4i32(i32, i32, <4 x i32>, <8 x i32>,i1, i1, i1) #0 - -declare i32 @llvm.amdgcn.image.atomic.add.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.sub.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.smin.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.umin.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.smax.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.umax.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.and.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.or.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.xor.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.inc.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.dec.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0 - -attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.ll deleted file mode 100644 index 6ed4548487f..00000000000 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.ll +++ /dev/null @@ -1,123 +0,0 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=UNPACKED %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX81 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX9 %s - -; GCN-LABEL: {{^}}image_load_f16 -; GCN: image_load v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1 unorm d16 -define half @image_load_f16(<4 x i32> %coords, <8 x i32> inreg %rsrc) { -main_body: - %tex = call half @llvm.amdgcn.image.load.f16.v4i32.v8i32(<4 x i32> %coords, <8 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false) - ret half %tex -} - -; GCN-LABEL: {{^}}image_load_v2f16: -; UNPACKED: image_load v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x3 unorm d16 -; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] - -; PACKED: image_load v[[HI:[0-9]+]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x3 unorm d16 -; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[HI]] -define half @image_load_v2f16(<4 x i32> %coords, <8 x i32> inreg %rsrc) { -main_body: - %tex = call <2 x half> @llvm.amdgcn.image.load.v2f16.v4i32.v8i32(<4 x i32> %coords, <8 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false) - %elt = extractelement <2 x half> %tex, i32 1 - ret half %elt -} - -; GCN-LABEL: {{^}}image_load_v4f16: -; UNPACKED: image_load v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf unorm d16 -; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] - -; PACKED: image_load v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf unorm d16 -; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[HI]] -define half @image_load_v4f16(<4 x i32> %coords, <8 x i32> inreg %rsrc) { -main_body: - %tex = call <4 x half> @llvm.amdgcn.image.load.v4f16.v4i32.v8i32(<4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false) - %elt = extractelement <4 x half> %tex, i32 3 - ret half %elt -} - -; GCN-LABEL: {{^}}image_load_mip_v4f16: -; UNPACKED: image_load_mip v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf unorm d16 -; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] - -; PACKED: image_load_mip v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf unorm d16 -; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[HI]] -define half @image_load_mip_v4f16(<4 x i32> %coords, <8 x i32> inreg %rsrc) { -main_body: - %tex = call <4 x half> @llvm.amdgcn.image.load.mip.v4f16.v4i32.v8i32(<4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false) - %elt = extractelement <4 x half> %tex, i32 3 - ret half %elt -} - -; GCN-LABEL: {{^}}image_store_f16 -; GCN: s_load_dword s[[LO:[0-9]+]], -; GCN: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[LO]] -; GCN: image_store v[[V_LO]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1 unorm d16 -define amdgpu_kernel void @image_store_f16(half %data, <4 x i32> %coords, <8 x i32> inreg %rsrc) { -main_body: - call void @llvm.amdgcn.image.store.f16.v4i32.v8i32(half %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false) - ret void -} - -; FIXME: Eliminate and to get low bits -; GCN-LABEL: {{^}}image_store_v2f16: -; UNPACKED: s_load_dword [[DATA:s[0-9]+]] -; UNPACKED-DAG: s_lshr_b32 [[UNPACK_1:s[0-9]+]], [[DATA]], 16 -; UNPACKED-DAG: s_and_b32 [[UNPACK_0:s[0-9]+]], [[DATA]], 0xffff -; UNPACKED-DAG: v_mov_b32_e32 v[[V_UNPACK_0:[0-9]+]], [[UNPACK_0]] -; UNPACKED-DAG: v_mov_b32_e32 v[[V_UNPACK_1:[0-9]+]], [[UNPACK_1]] -; UNPACKED: image_store v{{\[}}[[V_UNPACK_0]]:[[V_UNPACK_1]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x3 unorm d16 - -; PACKED: image_store v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x3 unorm d16 -define amdgpu_kernel void @image_store_v2f16(<2 x half> %data, <4 x i32> %coords, <8 x i32> inreg %rsrc) { -main_body: - call void @llvm.amdgcn.image.store.v2f16.v4i32.v8i32(<2 x half> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false) - ret void -} - -; GCN-LABEL: {{^}}image_store_v4f16: -; UNPACKED: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}} -; UNPACKED-DAG: s_lshr_b32 s{{[0-9]+}}, s[[LO]], 16 -; UNPACKED-DAG: s_lshr_b32 s{{[0-9]+}}, s[[HI]], 16 -; UNPACKED: s_and_b32 -; UNPACKED: s_and_b32 -; UNPACKED: image_store v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf unorm d16 - -; PACKED: s_load_dwordx2 s{{\[}}[[DATA0:[0-9]+]]:[[DATA1:[0-9]+]]{{\]}} -; PACKED: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[DATA0]] -; PACKED: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[DATA1]] -; PACKED: image_store v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf unorm d16 -define amdgpu_kernel void @image_store_v4f16(<4 x half> %data, <4 x i32> %coords, <8 x i32> inreg %rsrc) { -main_body: - call void @llvm.amdgcn.image.store.v4f16.v4i32.v8i32(<4 x half> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false) - ret void -} - -; GCN-LABEL: {{^}}image_store_mip_v4f16: -; UNPACKED: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}} -; UNPACKED-DAG: s_lshr_b32 s{{[0-9]+}}, s[[LO]], 16 -; UNPACKED-DAG: s_lshr_b32 s{{[0-9]+}}, s[[HI]], 16 -; UNPACKED: s_and_b32 -; UNPACKED: s_and_b32 -; UNPACKED: image_store_mip v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf unorm d16 - -; PACKED: s_load_dwordx2 s{{\[}}[[DATA0:[0-9]+]]:[[DATA1:[0-9]+]]{{\]}} -; PACKED: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[DATA0]] -; PACKED: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[DATA1]] -; PACKED: image_store_mip v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf unorm d16 -define amdgpu_kernel void @image_store_mip_v4f16(<4 x half> %data, <4 x i32> %coords, <8 x i32> inreg %rsrc) { -main_body: - call void @llvm.amdgcn.image.store.mip.v4f16.v4i32.v8i32(<4 x half> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false) - ret void -} - - -declare half @llvm.amdgcn.image.load.f16.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) -declare <2 x half> @llvm.amdgcn.image.load.v2f16.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) -declare <4 x half> @llvm.amdgcn.image.load.v4f16.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) -declare <4 x half> @llvm.amdgcn.image.load.mip.v4f16.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) - -declare void @llvm.amdgcn.image.store.f16.v4i32.v8i32(half, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) -declare void @llvm.amdgcn.image.store.v2f16.v4i32.v8i32(<2 x half>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) -declare void @llvm.amdgcn.image.store.v4f16.v4i32.v8i32(<4 x half>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) -declare void @llvm.amdgcn.image.store.mip.v4f16.v4i32.v8i32(<4 x half>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.ll deleted file mode 100644 index 2c52b61b218..00000000000 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.ll +++ /dev/null @@ -1,102 +0,0 @@ -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=UNPACKED %s -; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX81 %s -; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX9 %s - - -; GCN-LABEL: {{^}}image_gather4_v4f16: -; UNPACKED: image_gather4 v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 -; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] - -; PACKED: image_gather4 v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 - -; GFX81: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]] -; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] - -; GFX9: global_store_short_d16_hi v[{{[0-9]+:[0-9]+}}], v[[HI]], off -define amdgpu_kernel void @image_gather4_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { -main_body: - %tex = call <4 x half> @llvm.amdgcn.image.gather4.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt = extractelement <4 x half> %tex, i32 3 - store half %elt, half addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}image_gather4_cl_v4f16: -; UNPACKED: image_gather4_cl v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 -; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] - -; PACKED: image_gather4_cl v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 - -; GFX81: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]] -; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] - -; GFX9: global_store_short_d16_hi v[{{[0-9]+:[0-9]+}}], v[[HI]], off -define amdgpu_kernel void @image_gather4_cl_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { -main_body: - %tex = call <4 x half> @llvm.amdgcn.image.gather4.cl.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt = extractelement <4 x half> %tex, i32 3 - store half %elt, half addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}image_gather4_c_v4f16: -; UNPACKED: image_gather4_c v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 -; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] - -; PACKED: image_gather4_c v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 - -; GFX81: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]] -; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] - -; GFX9: global_store_short_d16_hi v[{{[0-9]+:[0-9]+}}], v[[HI]], off -define amdgpu_kernel void @image_gather4_c_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { -main_body: - %tex = call <4 x half> @llvm.amdgcn.image.gather4.c.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt = extractelement <4 x half> %tex, i32 3 - store half %elt, half addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}image_gather4_o_v4f16: -; UNPACKED: image_gather4_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 -; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] - -; PACKED: image_gather4_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 - -; GFX81: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]] -; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] - -; GFX9: global_store_short_d16_hi v[{{[0-9]+:[0-9]+}}], v[[HI]], off -define amdgpu_kernel void @image_gather4_o_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { -main_body: - %tex = call <4 x half> @llvm.amdgcn.image.gather4.o.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt = extractelement <4 x half> %tex, i32 3 - store half %elt, half addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}image_gather4_c_o_v4f16: -; UNPACKED: image_gather4_c_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 -; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] - -; PACKED: image_gather4_c_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 - -; GFX81: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]] -; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] - -; GFX9: global_store_short_d16_hi v[{{[0-9]+:[0-9]+}}], v[[HI]], off -define amdgpu_kernel void @image_gather4_c_o_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { -main_body: - %tex = call <4 x half> @llvm.amdgcn.image.gather4.c.o.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt = extractelement <4 x half> %tex, i32 3 - store half %elt, half addrspace(1)* %out - ret void -} - -declare <4 x half> @llvm.amdgcn.image.gather4.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) - - -declare <4 x half> @llvm.amdgcn.image.gather4.cl.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) -declare <4 x half> @llvm.amdgcn.image.gather4.c.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) -declare <4 x half> @llvm.amdgcn.image.gather4.o.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) -declare <4 x half> @llvm.amdgcn.image.gather4.c.o.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.ll deleted file mode 100644 index 63b8012bb3f..00000000000 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.ll +++ /dev/null @@ -1,362 +0,0 @@ -; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s - -; GCN-LABEL: {{^}}gather4_v2: -; GCN: image_gather4 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_v2(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4: -; GCN: image_gather4 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_cl: -; GCN: image_gather4_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.cl.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_l: -; GCN: image_gather4_l {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_l(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.l.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_b: -; GCN: image_gather4_b {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_b(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.b.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_b_cl: -; GCN: image_gather4_b_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_b_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_b_cl_v8: -; GCN: image_gather4_b_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_b_cl_v8(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_lz_v2: -; GCN: image_gather4_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_lz_v2(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.lz.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_lz: -; GCN: image_gather4_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_lz(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.lz.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - - - -; GCN-LABEL: {{^}}gather4_o: -; GCN: image_gather4_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_o(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_cl_o: -; GCN: image_gather4_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_cl_o(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_cl_o_v8: -; GCN: image_gather4_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_cl_o_v8(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_l_o: -; GCN: image_gather4_l_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_l_o(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.l.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_l_o_v8: -; GCN: image_gather4_l_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_l_o_v8(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.l.o.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_b_o: -; GCN: image_gather4_b_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_b_o(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.b.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_b_o_v8: -; GCN: image_gather4_b_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_b_o_v8(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.b.o.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_b_cl_o: -; GCN: image_gather4_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_b_cl_o(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_lz_o: -; GCN: image_gather4_lz_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_lz_o(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - - -; GCN-LABEL: {{^}}gather4_c: -; GCN: image_gather4_c {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_c(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.c.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_c_cl: -; GCN: image_gather4_c_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_c_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_c_cl_v8: -; GCN: image_gather4_c_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_c_cl_v8(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_c_l: -; GCN: image_gather4_c_l {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_c_l(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.c.l.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_c_l_v8: -; GCN: image_gather4_c_l {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_c_l_v8(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.c.l.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_c_b: -; GCN: image_gather4_c_b {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_c_b(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.c.b.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_c_b_v8: -; GCN: image_gather4_c_b {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_c_b_v8(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.c.b.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_c_b_cl: -; GCN: image_gather4_c_b_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_c_b_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_c_lz: -; GCN: image_gather4_c_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_c_lz(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - - -; GCN-LABEL: {{^}}gather4_c_o: -; GCN: image_gather4_c_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_c_o(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_c_o_v8: -; GCN: image_gather4_c_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_c_o_v8(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_c_cl_o: -; GCN: image_gather4_c_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_c_cl_o(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_c_l_o: -; GCN: image_gather4_c_l_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_c_l_o(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_c_b_o: -; GCN: image_gather4_c_b_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_c_b_o(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_c_b_cl_o: -; GCN: image_gather4_c_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_c_b_cl_o(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_c_lz_o: -; GCN: image_gather4_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_c_lz_o(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_c_lz_o_v8: -; GCN: image_gather4_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_c_lz_o_v8(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -declare <4 x float> @llvm.amdgcn.image.gather4.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.l.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.b.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.lz.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.lz.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 - -declare <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.l.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.l.o.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.b.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.b.o.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 - -declare <4 x float> @llvm.amdgcn.image.gather4.c.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.c.l.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.c.l.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.c.b.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.c.b.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 - -declare <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 - -attributes #0 = { nounwind readnone } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.ll deleted file mode 100644 index dfe4aff7bc1..00000000000 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.ll +++ /dev/null @@ -1,53 +0,0 @@ -; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s - -; GCN-LABEL: {{^}}getlod: -; GCN: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf da -; GCN: s_waitcnt vmcnt(0) -; GCN: store_dwordx4 -define amdgpu_kernel void @getlod(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.f32.v8i32(float undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}getlod_v2: -; GCN: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf da -; GCN: s_waitcnt vmcnt(0) -; GCN: store_dwordx4 -define amdgpu_kernel void @getlod_v2(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}getlod_v4: -; GCN: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf da -; GCN: s_waitcnt vmcnt(0) -; GCN: store_dwordx4 -define amdgpu_kernel void @getlod_v4(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_getlod_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_getlod_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -declare <4 x float> @llvm.amdgcn.image.getlod.v4f32.f32.v8i32(float, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.getlod.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.getlod.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 - - -attributes #0 = { nounwind readnone } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll deleted file mode 100644 index 5afe4c31688..00000000000 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll +++ /dev/null @@ -1,210 +0,0 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VERDE %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s - -; GCN-LABEL: {{^}}image_load_v4i32: -; GCN-NOT: s_waitcnt -; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm -; GCN: s_waitcnt vmcnt(0) -define amdgpu_ps <4 x float> @image_load_v4i32(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 { -main_body: - %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false) - ret <4 x float> %tex -} - -; GCN-LABEL: {{^}}image_load_v2i32: -; GCN-NOT: s_waitcnt -; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm -; GCN: s_waitcnt vmcnt(0) -define amdgpu_ps <4 x float> @image_load_v2i32(<8 x i32> inreg %rsrc, <2 x i32> %c) #0 { -main_body: - %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false) - ret <4 x float> %tex -} - -; GCN-LABEL: {{^}}image_load_i32: -; GCN-NOT: s_waitcnt -; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm -; GCN: s_waitcnt vmcnt(0) -define amdgpu_ps <4 x float> @image_load_i32(<8 x i32> inreg %rsrc, i32 %c) #0 { -main_body: - %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false) - ret <4 x float> %tex -} - -; GCN-LABEL: {{^}}image_load_mip: -; GCN-NOT: s_waitcnt -; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm -; GCN: s_waitcnt vmcnt(0) -define amdgpu_ps <4 x float> @image_load_mip(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 { -main_body: - %tex = call <4 x float> @llvm.amdgcn.image.load.mip.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false) - ret <4 x float> %tex -} - -; GCN-LABEL: {{^}}image_load_1: -; GCN-NOT: s_waitcnt -; GCN: image_load v0, v[0:3], s[0:7] dmask:0x1 unorm -; GCN: s_waitcnt vmcnt(0) -define amdgpu_ps float @image_load_1(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 { -main_body: - %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false) - %elt = extractelement <4 x float> %tex, i32 0 - ret float %elt -} - -; GCN-LABEL: {{^}}image_load_f32_v2i32: -; GCN-NOT: s_waitcnt -; GCN: image_load {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 unorm -; GCN: s_waitcnt vmcnt(0) -define amdgpu_ps float @image_load_f32_v2i32(<8 x i32> inreg %rsrc, <2 x i32> %c) #0 { -main_body: - %tex = call float @llvm.amdgcn.image.load.f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false) - ret float %tex -} - -; GCN-LABEL: {{^}}image_load_v2f32_v4i32: -; GCN-NOT: s_waitcnt -; GCN: image_load {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 unorm -; GCN: s_waitcnt vmcnt(0) -define amdgpu_ps <2 x float> @image_load_v2f32_v4i32(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 { -main_body: - %tex = call <2 x float> @llvm.amdgcn.image.load.v2f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false) - ret <2 x float> %tex -} - -; GCN-LABEL: {{^}}image_store_v4i32: -; GCN-NOT: s_waitcnt -; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm -define amdgpu_ps void @image_store_v4i32(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) #0 { -main_body: - call void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false) - ret void -} - -; GCN-LABEL: {{^}}image_store_v2i32: -; GCN-NOT: s_waitcnt -; GCN: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm -define amdgpu_ps void @image_store_v2i32(<8 x i32> inreg %rsrc, <4 x float> %data, <2 x i32> %coords) #0 { -main_body: - call void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float> %data, <2 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false) - ret void -} - -; GCN-LABEL: {{^}}image_store_i32: -; GCN-NOT: s_waitcnt -; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm -define amdgpu_ps void @image_store_i32(<8 x i32> inreg %rsrc, <4 x float> %data, i32 %coords) #0 { -main_body: - call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %data, i32 %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false) - ret void -} - -; GCN-LABEL: {{^}}image_store_f32_i32: -; GCN-NOT: s_waitcnt -; GCN: image_store {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 unorm -define amdgpu_ps void @image_store_f32_i32(<8 x i32> inreg %rsrc, float %data, i32 %coords) #0 { -main_body: - call void @llvm.amdgcn.image.store.f32.i32.v8i32(float %data, i32 %coords, <8 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false) - ret void -} - -; GCN-LABEL: {{^}}image_store_v2f32_v4i32: -; GCN-NOT: s_waitcnt -; GCN: image_store {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 unorm -define amdgpu_ps void @image_store_v2f32_v4i32(<8 x i32> inreg %rsrc, <2 x float> %data, <4 x i32> %coords) #0 { -main_body: - call void @llvm.amdgcn.image.store.v2f32.v4i32.v8i32(<2 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false) - ret void -} - -; GCN-LABEL: {{^}}image_store_mip: -; GCN-NOT: s_waitcnt -; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm -define amdgpu_ps void @image_store_mip(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) #0 { -main_body: - call void @llvm.amdgcn.image.store.mip.v4f32.v4i32.v8i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false) - ret void -} - -; GCN-LABEL: {{^}}getresinfo: -; GCN-NOT: s_waitcnt -; GCN: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -; GCN: s_waitcnt vmcnt(0) -; GCN: exp -define amdgpu_ps void @getresinfo() #0 { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.getresinfo.v4f32.i32.v8i32(i32 undef, <8 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false) - %r0 = extractelement <4 x float> %r, i32 0 - %r1 = extractelement <4 x float> %r, i32 1 - %r2 = extractelement <4 x float> %r, i32 2 - %r3 = extractelement <4 x float> %r, i32 3 - call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r0, float %r1, float %r2, float %r3, i1 true, i1 true) #0 - ret void -} - -; GCN-LABEL: {{^}}getresinfo_dmask0: -; GCN-NOT: image_get_resinfo -define amdgpu_ps void @getresinfo_dmask0() #0 { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.getresinfo.v4f32.i32.v8i32(i32 undef, <8 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false) - %r0 = extractelement <4 x float> %r, i32 0 - %r1 = extractelement <4 x float> %r, i32 1 - %r2 = extractelement <4 x float> %r, i32 2 - %r3 = extractelement <4 x float> %r, i32 3 - call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r0, float %r1, float %r2, float %r3, i1 true, i1 true) #0 - ret void -} - -; Ideally, the register allocator would avoid the wait here -; -; GCN-LABEL: {{^}}image_store_wait: -; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm -; VERDE: s_waitcnt expcnt(0) -; GCN: image_load v[0:3], v4, s[8:15] dmask:0xf unorm -; GCN: s_waitcnt vmcnt(0) -; GCN: image_store v[0:3], v4, s[16:23] dmask:0xf unorm -define amdgpu_ps void @image_store_wait(<8 x i32> inreg %arg, <8 x i32> inreg %arg1, <8 x i32> inreg %arg2, <4 x float> %arg3, i32 %arg4) #0 { -main_body: - call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %arg3, i32 %arg4, <8 x i32> %arg, i32 15, i1 false, i1 false, i1 false, i1 false) - %data = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %arg4, <8 x i32> %arg1, i32 15, i1 false, i1 false, i1 false, i1 false) - call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %data, i32 %arg4, <8 x i32> %arg2, i32 15, i1 false, i1 false, i1 false, i1 false) - ret void -} - -; SI won't merge ds memory operations, because of the signed offset bug, so -; we only have check lines for VI. -; VI-LABEL: image_load_mmo -; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 -; VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4 -define amdgpu_ps void @image_load_mmo(float addrspace(3)* %lds, <2 x i32> %c, <8 x i32> inreg %rsrc) #0 { -bb: - store float 0.000000e+00, float addrspace(3)* %lds - %tex = call float @llvm.amdgcn.image.load.f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false) - %tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4 - store float 0.000000e+00, float addrspace(3)* %tmp2 - call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tex, float %tex, float %tex, float %tex, i1 true, i1 true) #0 - ret void -} - -declare float @llvm.amdgcn.image.load.f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1 -declare <2 x float> @llvm.amdgcn.image.load.v2f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1 -declare void @llvm.amdgcn.image.store.f32.i32.v8i32(float, i32, <8 x i32>, i32, i1, i1, i1, i1) #0 - - -declare void @llvm.amdgcn.image.store.v2f32.v4i32.v8i32(<2 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0 -declare void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float>, i32, <8 x i32>, i32, i1, i1, i1, i1) #0 -declare void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float>, <2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0 -declare void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0 -declare void @llvm.amdgcn.image.store.mip.v4f32.v4i32.v8i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0 - -declare <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #1 -declare <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1 -declare <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1 -declare <4 x float> @llvm.amdgcn.image.load.mip.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1 -declare <4 x float> @llvm.amdgcn.image.getresinfo.v4f32.i32.v8i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #2 - -declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 - -attributes #0 = { nounwind } -attributes #1 = { nounwind readonly } -attributes #2 = { nounwind readnone } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.ll deleted file mode 100644 index c50b508ed92..00000000000 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.ll +++ /dev/null @@ -1,137 +0,0 @@ -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=UNPACKED %s -; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX81 %s -; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX9 %s - - -; GCN-LABEL: {{^}}image_sample_f16: -; GCN: image_sample v[[HALF:[0-9]+]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1 d16 - -; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] - -; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] - -; GFX9: global_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]], off -define amdgpu_kernel void @image_sample_f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { -main_body: - %tex = call half @llvm.amdgcn.image.sample.f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 1, i1 0, i1 0, i1 0, i1 0, i1 0) - store half %tex, half addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}image_sample_v2f16: -; UNPACKED: image_sample v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x3 d16 -; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] - -; PACKED: image_sample v[[DATA:[0-9]+]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x3 d16 - -; GFX81: v_lshrrev_b32_e32 v[[HI:[0-9]+]], 16, v[[DATA]] -; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] - -; GFX9: global_store_short_d16_hi v[{{[0-9]+:[0-9]+}}], v[[DATA]], off -define amdgpu_kernel void @image_sample_v2f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { -main_body: - %tex = call <2 x half> @llvm.amdgcn.image.sample.v2f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 3, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt = extractelement <2 x half> %tex, i32 1 - store half %elt, half addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}image_sample_v4f16: -; UNPACKED: image_sample v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 -; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] - -; PACKED: image_sample v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 - -; GFX81: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]] -; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] - -; GFX9: global_store_short_d16_hi v[{{[0-9]+:[0-9]+}}], v[[HI]], off -define amdgpu_kernel void @image_sample_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { -main_body: - %tex = call <4 x half> @llvm.amdgcn.image.sample.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt = extractelement <4 x half> %tex, i32 3 - store half %elt, half addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}image_sample_cl_v4f16: -; UNPACKED: image_sample_cl v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 -; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] - -; PACKED: image_sample_cl v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 - -; GFX81: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]] -; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] - -; GFX9: global_store_short_d16_hi v[{{[0-9]+:[0-9]+}}], v[[HI]], off -define amdgpu_kernel void @image_sample_cl_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { -main_body: - %tex = call <4 x half> @llvm.amdgcn.image.sample.cl.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt = extractelement <4 x half> %tex, i32 3 - store half %elt, half addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}image_sample_c_v4f16: -; UNPACKED: image_sample_c v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 -; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] - -; PACKED: image_sample_c v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 - -; GFX81: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]] -; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] - -; GFX9: global_store_short_d16_hi v[{{[0-9]+:[0-9]+}}], v[[HI]], off -define amdgpu_kernel void @image_sample_c_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { -main_body: - %tex = call <4 x half> @llvm.amdgcn.image.sample.c.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt = extractelement <4 x half> %tex, i32 3 - store half %elt, half addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}image_sample_o_v4f16: -; UNPACKED: image_sample_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 -; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] - -; PACKED: image_sample_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 - -; GFX81: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]] -; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] - -; GFX9: global_store_short_d16_hi v[{{[0-9]+:[0-9]+}}], v[[HI]], off -define amdgpu_kernel void @image_sample_o_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { -main_body: - %tex = call <4 x half> @llvm.amdgcn.image.sample.o.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt = extractelement <4 x half> %tex, i32 3 - store half %elt, half addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}image_sample_c_o_v4f16: -; UNPACKED: image_sample_c_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 -; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] - -; PACKED: image_sample_c_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 - -; GFX81: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]] -; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] - -; GFX9: global_store_short_d16_hi v[{{[0-9]+:[0-9]+}}], v[[HI]], off -define amdgpu_kernel void @image_sample_c_o_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { -main_body: - %tex = call <4 x half> @llvm.amdgcn.image.sample.c.o.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt = extractelement <4 x half> %tex, i32 3 - store half %elt, half addrspace(1)* %out - ret void -} - -declare half @llvm.amdgcn.image.sample.f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) -declare <2 x half> @llvm.amdgcn.image.sample.v2f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) -declare <4 x half> @llvm.amdgcn.image.sample.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) - - -declare <4 x half> @llvm.amdgcn.image.sample.cl.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) -declare <4 x half> @llvm.amdgcn.image.sample.c.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) -declare <4 x half> @llvm.amdgcn.image.sample.o.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) -declare <4 x half> @llvm.amdgcn.image.sample.c.o.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ll deleted file mode 100644 index 4f90b0a25ea..00000000000 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ll +++ /dev/null @@ -1,435 +0,0 @@ -; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s - -; GCN-LABEL: {{^}}sample: -; GCN: image_sample {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_cl: -; GCN: image_sample_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.cl.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_d: -; GCN: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_d(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_d_cl: -; GCN: image_sample_d_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_d_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.d.cl.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_l: -; GCN: image_sample_l {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_l(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_b: -; GCN: image_sample_b {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_b(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.b.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_b_cl: -; GCN: image_sample_b_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_b_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.b.cl.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_lz: -; GCN: image_sample_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_lz(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.lz.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_cd: -; GCN: image_sample_cd {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_cd(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.cd.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_cd_cl: -; GCN: image_sample_cd_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_cd_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c: -; GCN: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_cl: -; GCN: image_sample_c_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.cl.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_d: -; GCN: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_d(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.d.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_d_cl: -; GCN: image_sample_c_d_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_d_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_l: -; GCN: image_sample_c_l {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_l(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.l.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_b: -; GCN: image_sample_c_b {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_b(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.b.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_b_cl: -; GCN: image_sample_c_b_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_b_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_lz: -; GCN: image_sample_c_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_lz(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.lz.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_cd: -; GCN: image_sample_c_cd {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_cd(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.cd.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_cd_cl: -; GCN: image_sample_c_cd_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_cd_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_f32: -; GCN: image_sample {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 -define amdgpu_kernel void @sample_f32(float addrspace(1)* %out) { -main_body: - %r = call float @llvm.amdgcn.image.sample.f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 0) - store float %r, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_v2f32: -; GCN: image_sample {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 -define amdgpu_kernel void @sample_v2f32(<2 x float> addrspace(1)* %out) { -main_body: - %r = call <2 x float> @llvm.amdgcn.image.sample.v2f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 0, i1 0, i1 0, i1 0, i1 0) - store <2 x float> %r, <2 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_0: -; GCN: image_sample v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x1{{$}} -define amdgpu_kernel void @adjust_writemask_sample_0(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_01: -; GCN: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x3{{$}} -define amdgpu_kernel void @adjust_writemask_sample_01(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt0 = extractelement <4 x float> %r, i32 0 - %elt1 = extractelement <4 x float> %r, i32 1 - store volatile float %elt0, float addrspace(1)* %out - store volatile float %elt1, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_012: -; GCN: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x7{{$}} -define amdgpu_kernel void @adjust_writemask_sample_012(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt0 = extractelement <4 x float> %r, i32 0 - %elt1 = extractelement <4 x float> %r, i32 1 - %elt2 = extractelement <4 x float> %r, i32 2 - store volatile float %elt0, float addrspace(1)* %out - store volatile float %elt1, float addrspace(1)* %out - store volatile float %elt2, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_12: -; GCN: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x6{{$}} -define amdgpu_kernel void @adjust_writemask_sample_12(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt1 = extractelement <4 x float> %r, i32 1 - %elt2 = extractelement <4 x float> %r, i32 2 - store volatile float %elt1, float addrspace(1)* %out - store volatile float %elt2, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_03: -; GCN: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x9{{$}} -define amdgpu_kernel void @adjust_writemask_sample_03(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt0 = extractelement <4 x float> %r, i32 0 - %elt3 = extractelement <4 x float> %r, i32 3 - store volatile float %elt0, float addrspace(1)* %out - store volatile float %elt3, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_13: -; GCN: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0xa{{$}} -define amdgpu_kernel void @adjust_writemask_sample_13(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt1 = extractelement <4 x float> %r, i32 1 - %elt3 = extractelement <4 x float> %r, i32 3 - store volatile float %elt1, float addrspace(1)* %out - store volatile float %elt3, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_123: -; GCN: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0xe{{$}} -define amdgpu_kernel void @adjust_writemask_sample_123(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt1 = extractelement <4 x float> %r, i32 1 - %elt2 = extractelement <4 x float> %r, i32 2 - %elt3 = extractelement <4 x float> %r, i32 3 - store volatile float %elt1, float addrspace(1)* %out - store volatile float %elt2, float addrspace(1)* %out - store volatile float %elt3, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_variable_dmask_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_variable_dmask_enabled(float addrspace(1)* %out, i32 %dmask) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 %dmask, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - - -; GCN-LABEL: {{^}}adjust_writemask_sample_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_cl_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_cl_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.cl.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_d_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_d_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_d_cl_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_d_cl_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.d.cl.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_l_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_l_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_b_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_b_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.b.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_b_cl_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_b_cl_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.b.cl.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_lz_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_lz_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.lz.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_cd_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_cd_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.cd.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_cd_cl_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_cd_cl_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.d.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.b.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.b.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.lz.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.cd.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 - -declare <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.d.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.l.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.b.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.lz.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.cd.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 - -declare float @llvm.amdgcn.image.sample.f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <2 x float> @llvm.amdgcn.image.sample.v2f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 - -attributes #0 = { nounwind readnone } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.ll deleted file mode 100644 index 42d7bc0e777..00000000000 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.ll +++ /dev/null @@ -1,427 +0,0 @@ -; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s - -; GCN-LABEL: {{^}}sample: -; GCN: image_sample_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_cl: -; GCN: image_sample_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.cl.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_d: -; GCN: image_sample_d_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_d(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.d.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_d_cl: -; GCN: image_sample_d_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_d_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_l: -; GCN: image_sample_l_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_l(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.l.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_b: -; GCN: image_sample_b_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_b(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.b.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_b_cl: -; GCN: image_sample_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_b_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_lz: -; GCN: image_sample_lz_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_lz(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.lz.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_cd: -; GCN: image_sample_cd_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_cd(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.cd.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_cd_cl: -; GCN: image_sample_cd_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_cd_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c: -; GCN: image_sample_c_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_cl: -; GCN: image_sample_c_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_d: -; GCN: image_sample_c_d_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_d(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_d_cl: -; GCN: image_sample_c_d_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_d_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_l: -; GCN: image_sample_c_l_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_l(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_b: -; GCN: image_sample_c_b_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_b(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_b_cl: -; GCN: image_sample_c_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_b_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_lz: -; GCN: image_sample_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_lz(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_cd: -; GCN: image_sample_c_cd_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_cd(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_cd_cl: -; GCN: image_sample_c_cd_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_cd_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_cl_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_cl_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.cl.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_d_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_d_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.d.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_d_cl_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_d_cl_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_l_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_l_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.l.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_b_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_b_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.b.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_b_cl_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_b_cl_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_lz_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_lz_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.lz.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_cd_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_cd_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.cd.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_cd_cl_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_cd_cl_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_c_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_c_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_c_cl_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_c_cl_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_c_d_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_c_d_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_c_d_cl_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_c_d_cl_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_c_l_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_c_l_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_c_b_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_c_b_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_c_b_cl_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_c_b_cl_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_c_lz_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_c_lz_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_c_cd_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_c_cd_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_c_cd_cl_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_c_cd_cl_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -declare <4 x float> @llvm.amdgcn.image.sample.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.d.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.l.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.b.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.lz.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.cd.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 - -declare <4 x float> @llvm.amdgcn.image.sample.c.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 - - -attributes #0 = { nounwind readnone } diff --git a/llvm/test/CodeGen/AMDGPU/si-lod-bias.ll b/llvm/test/CodeGen/AMDGPU/si-lod-bias.ll deleted file mode 100644 index 3b09e1b7a31..00000000000 --- a/llvm/test/CodeGen/AMDGPU/si-lod-bias.ll +++ /dev/null @@ -1,59 +0,0 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s - -; This shader has the potential to generated illegal VGPR to SGPR copies if -; the wrong register class is used for the REG_SEQUENCE instructions. - -; GCN-LABEL: {{^}}main: -; GCN: image_sample_b v{{\[[0-9]:[0-9]\]}}, v{{\[[0-9]:[0-9]\]}}, s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf -define amdgpu_ps void @main(<4 x i32> addrspace(4)* inreg %arg, <4 x i32> addrspace(4)* inreg %arg1, <8 x i32> addrspace(4)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 { -main_body: - %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0 - %tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !0 - %tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 16) - %tmp22 = getelementptr <8 x i32>, <8 x i32> addrspace(4)* %arg2, i32 0 - %tmp23 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp22, !tbaa !0 - %tmp24 = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg1, i32 0 - %tmp25 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp24, !tbaa !0 - %i.i = extractelement <2 x i32> %arg5, i32 0 - %j.i = extractelement <2 x i32> %arg5, i32 1 - %i.f.i = bitcast i32 %i.i to float - %j.f.i = bitcast i32 %j.i to float - %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg3) #0 - %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg3) #0 - %i.i1 = extractelement <2 x i32> %arg5, i32 0 - %j.i2 = extractelement <2 x i32> %arg5, i32 1 - %i.f.i3 = bitcast i32 %i.i1 to float - %j.f.i4 = bitcast i32 %j.i2 to float - %p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 1, i32 0, i32 %arg3) #0 - %p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 1, i32 0, i32 %arg3) #0 - %tmp28 = bitcast float %tmp21 to i32 - %tmp29 = bitcast float %p2.i to i32 - %tmp30 = bitcast float %p2.i6 to i32 - %tmp31 = insertelement <4 x i32> undef, i32 %tmp28, i32 0 - %tmp32 = insertelement <4 x i32> %tmp31, i32 %tmp29, i32 1 - %tmp33 = insertelement <4 x i32> %tmp32, i32 %tmp30, i32 2 - %tmp34 = insertelement <4 x i32> %tmp33, i32 undef, i32 3 - %tmp34.bc = bitcast <4 x i32> %tmp34 to <4 x float> - %tmp35 = call <4 x float> @llvm.amdgcn.image.sample.b.v4f32.v4f32.v8i32(<4 x float> %tmp34.bc, <8 x i32> %tmp23, <4 x i32> %tmp25, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) - %tmp36 = extractelement <4 x float> %tmp35, i32 0 - %tmp37 = extractelement <4 x float> %tmp35, i32 1 - %tmp38 = extractelement <4 x float> %tmp35, i32 2 - %tmp39 = extractelement <4 x float> %tmp35, i32 3 - call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp36, float %tmp37, float %tmp38, float %tmp39, i1 true, i1 true) #0 - ret void -} - -declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 -declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 -declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.b.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 -declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1 - -attributes #0 = { nounwind } -attributes #1 = { nounwind readnone } -attributes #2 = { nounwind readonly } - -!0 = !{!1, !1, i64 0, i32 1} -!1 = !{!"const", !2} -!2 = !{!"tbaa root"} |

