diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-03-12 21:02:54 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-03-12 21:02:54 +0000 |
commit | caf1316f7100e1ba1e7e800eae6a5adf0d48bf90 (patch) | |
tree | 7b79b67ccf3ffe7fe9b56dee66444a586d7714d2 /llvm/test | |
parent | 9df0754b8dbeeb597a22bb0418eb0ee04ce88b56 (diff) | |
download | bcm5719-llvm-caf1316f7100e1ba1e7e800eae6a5adf0d48bf90.tar.gz bcm5719-llvm-caf1316f7100e1ba1e7e800eae6a5adf0d48bf90.zip |
IR: Add immarg attribute
This indicates an intrinsic parameter is required to be a constant,
and should not be replaced with a non-constant value.
Add the attribute to all AMDGPU and generic intrinsics that comments
indicate it should apply to. I scanned other target intrinsics, but I
don't see any obvious comments indicating which arguments are intended
to be only immediates.
This breaks one questionable testcase for the autoupgrade. I'm unclear
on whether the autoupgrade is supposed to really handle declarations
which were never valid. The verifier fails because the attributes now
refer to a parameter past the end of the argument list.
llvm-svn: 355981
Diffstat (limited to 'llvm/test')
28 files changed, 1312 insertions, 471 deletions
diff --git a/llvm/test/Assembler/auto_upgrade_intrinsics.ll b/llvm/test/Assembler/auto_upgrade_intrinsics.ll index bd2a779800a..175bac812a0 100644 --- a/llvm/test/Assembler/auto_upgrade_intrinsics.ll +++ b/llvm/test/Assembler/auto_upgrade_intrinsics.ll @@ -146,5 +146,5 @@ define void @tests.lifetime.start.end() { ; CHECK: declare i32 @llvm.objectsize.i32.p0i8 -; CHECK: declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) -; CHECK: declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) +; CHECK: declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) +; CHECK: declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) diff --git a/llvm/test/Assembler/autoupgrade-invalid-mem-intrinsics.ll b/llvm/test/Assembler/autoupgrade-invalid-mem-intrinsics.ll new file mode 100644 index 00000000000..59308ef27a1 --- /dev/null +++ b/llvm/test/Assembler/autoupgrade-invalid-mem-intrinsics.ll @@ -0,0 +1,15 @@ +; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s + +; Check that remangling code doesn't fail on an intrinsic with wrong signature + +; CHECK: Attribute after last parameter! +; CHECK-NEXT: void (i8*, i8, i64)* @llvm.memset.i64 +declare void @llvm.memset.i64(i8* nocapture, i8, i64) nounwind + +; CHECK: Attribute after last parameter! +; CHECK-NEXT: void (i8*, i8, i64)* @llvm.memcpy.i64 +declare void @llvm.memcpy.i64(i8* nocapture, i8, i64) nounwind + +; CHECK: Attribute after last parameter! +; CHECK-NEXT: void (i8*, i8, i64)* @llvm.memmove.i64 +declare void @llvm.memmove.i64(i8* nocapture, i8, i64) nounwind diff --git a/llvm/test/Assembler/immarg-param-attribute.ll b/llvm/test/Assembler/immarg-param-attribute.ll new file mode 100644 index 00000000000..f31443a8a3e --- /dev/null +++ b/llvm/test/Assembler/immarg-param-attribute.ll @@ -0,0 +1,39 @@ +; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s + +; CHECK: declare void @llvm.test.immarg.intrinsic.i32(i32 immarg) +declare void @llvm.test.immarg.intrinsic.i32(i32 immarg) + +; CHECK: declare void @llvm.test.immarg.intrinsic.f32(float immarg) +declare void @llvm.test.immarg.intrinsic.f32(float immarg) + +; CHECK-LABEL: @call_llvm.test.immarg.intrinsic.i32( +define void @call_llvm.test.immarg.intrinsic.i32() { + ; CHECK: call void @llvm.test.immarg.intrinsic.i32(i32 0) + call void @llvm.test.immarg.intrinsic.i32(i32 0) + + ; CHECK: call void @llvm.test.immarg.intrinsic.i32(i32 0) + call void @llvm.test.immarg.intrinsic.i32(i32 0) + + ; CHECK call void @llvm.test.immarg.intrinsic.i32(i32 1) + call void @llvm.test.immarg.intrinsic.i32(i32 1) + + ; CHECK: call void @llvm.test.immarg.intrinsic.i32(i32 5) + call void @llvm.test.immarg.intrinsic.i32(i32 add (i32 2, i32 3)) + + ; CHECK: call void @llvm.test.immarg.intrinsic.i32(i32 0) + call void @llvm.test.immarg.intrinsic.i32(i32 ptrtoint (i32* null to i32)) + ret void +} + +; CHECK-LABEL: @call_llvm.test.immarg.intrinsic.f32( +define void @call_llvm.test.immarg.intrinsic.f32() { + ; CHECK: call void @llvm.test.immarg.intrinsic.f32(float 1.000000e+00) + call void @llvm.test.immarg.intrinsic.f32(float 1.0) + ret void +} + +define void @on_callsite_and_declaration() { + ; CHECK: call void @llvm.test.immarg.intrinsic.i32(i32 immarg 0) + call void @llvm.test.immarg.intrinsic.i32(i32 immarg 0) + ret void +} diff --git a/llvm/test/Assembler/invalid-immarg.ll b/llvm/test/Assembler/invalid-immarg.ll new file mode 100644 index 00000000000..65f6ba6c5f4 --- /dev/null +++ b/llvm/test/Assembler/invalid-immarg.ll @@ -0,0 +1,34 @@ +; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s + +; CHECK: Attribute 'immarg' is incompatible with other attributes +declare void @llvm.immarg.byval(i32* byval immarg) + +; CHECK: Attribute 'immarg' is incompatible with other attributes +declare void @llvm.immarg.inalloca(i32* inalloca immarg) + +; CHECK: Attribute 'immarg' is incompatible with other attributes +declare void @llvm.immarg.inreg(i32 inreg immarg) + +; CHECK: Attribute 'immarg' is incompatible with other attributes +declare void @llvm.immarg.nest(i32* nest immarg) + +; CHECK: Attribute 'immarg' is incompatible with other attributes +declare void @llvm.immarg.sret(i32* sret immarg) + +; CHECK: Attribute 'immarg' is incompatible with other attributes +declare void @llvm.immarg.zeroext(i32 zeroext immarg) + +; CHECK: Attribute 'immarg' is incompatible with other attributes +declare void @llvm.immarg.signext(i32 signext immarg) + +; CHECK: Attribute 'immarg' is incompatible with other attributes +declare void @llvm.immarg.returned(i32 returned immarg) + +; CHECK: Attribute 'immarg' is incompatible with other attributes +declare void @llvm.immarg.noalias(i32* noalias immarg) + +; CHECK: Attribute 'immarg' is incompatible with other attributes +declare void @llvm.immarg.readnone(i32* readnone immarg) + +; CHECK: Attribute 'immarg' is incompatible with other attributes +declare void @llvm.immarg.readonly(i32* readonly immarg) diff --git a/llvm/test/Assembler/invalid-immarg2.ll b/llvm/test/Assembler/invalid-immarg2.ll new file mode 100644 index 00000000000..f4589108a9f --- /dev/null +++ b/llvm/test/Assembler/invalid-immarg2.ll @@ -0,0 +1,4 @@ +; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s + +; CHECK: error: invalid use of parameter-only attribute on a function +declare void @llvm.immarg.func() immarg diff --git a/llvm/test/Assembler/invalid-immarg3.ll b/llvm/test/Assembler/invalid-immarg3.ll new file mode 100644 index 00000000000..c03d7fafdc2 --- /dev/null +++ b/llvm/test/Assembler/invalid-immarg3.ll @@ -0,0 +1,4 @@ +; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s + +; CHECK: error: invalid use of parameter-only attribute +declare immarg i32 @llvm.immarg.retattr(i32) diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll index 3c5e86eee01..95cd49f8156 100644 --- a/llvm/test/Bitcode/compatibility.ll +++ b/llvm/test/Bitcode/compatibility.ll @@ -1681,6 +1681,10 @@ define i8** @constexpr() { ret i8** getelementptr inbounds ({ [4 x i8*], [4 x i8*] }, { [4 x i8*], [4 x i8*] }* null, i32 0, inrange i32 1, i32 2) } +; immarg attribute +declare void @llvm.test.immarg.intrinsic(i32 immarg) +; CHECK: declare void @llvm.test.immarg.intrinsic(i32 immarg) + ; CHECK: attributes #0 = { alignstack=4 } ; CHECK: attributes #1 = { alignstack=8 } ; CHECK: attributes #2 = { alwaysinline } diff --git a/llvm/test/Bitcode/objectsize-upgrade-7.0.ll b/llvm/test/Bitcode/objectsize-upgrade-7.0.ll index a7e6d497676..7ed6f15f683 100644 --- a/llvm/test/Bitcode/objectsize-upgrade-7.0.ll +++ b/llvm/test/Bitcode/objectsize-upgrade-7.0.ll @@ -9,4 +9,4 @@ define void @callit(i8* %ptr) { } declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1) -; CHECK: declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1, i1) +; CHECK: declare i64 @llvm.objectsize.i64.p0i8(i8*, i1 immarg, i1 immarg, i1 immarg) diff --git a/llvm/test/Bitcode/upgrade-memory-intrinsics.ll b/llvm/test/Bitcode/upgrade-memory-intrinsics.ll index df4cb2bcd36..1bdee928cca 100644 --- a/llvm/test/Bitcode/upgrade-memory-intrinsics.ll +++ b/llvm/test/Bitcode/upgrade-memory-intrinsics.ll @@ -27,9 +27,9 @@ define void @test2(i8* %p1, i8* %p2, i8* %p3) { ret void } -; CHECK: declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) -; CHECK: declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) -; CHECK: declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) +; CHECK: declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) +; CHECK: declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg) +; CHECK: declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1 immarg) declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly , i8* nocapture readonly, i64, i32, i1) declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) diff --git a/llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll b/llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll index 722099fec33..6644449b14a 100644 --- a/llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll +++ b/llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll @@ -70,8 +70,8 @@ define amdgpu_kernel void @store_bitcast_constant_v8i32_to_v16i16(<8 x float> ad ; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source: ; GCN-NOT: store_dword -define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source(<2 x i32> addrspace(1)* %out, i64 %a, i64 %b, i32 %c) #0 { - %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 %c) #1 +define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source(<2 x i32> addrspace(1)* %out, i64 %a, i64 %b) #0 { + %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 999) #1 %bc = bitcast i64 %undef to <2 x i32> store volatile <2 x i32> %bc, <2 x i32> addrspace(1)* %out ret void @@ -79,8 +79,8 @@ define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source(<2 x i32> ; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source_extractelt: ; GCN-NOT: store_dword -define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source_extractelt(i32 addrspace(1)* %out, i64 %a, i64 %b, i32 %c) #0 { - %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 %c) #1 +define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source_extractelt(i32 addrspace(1)* %out, i64 %a, i64 %b) #0 { + %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 9999) #1 %bc = bitcast i64 %undef to <2 x i32> %elt1 = extractelement <2 x i32> %bc, i32 1 store volatile i32 %elt1, i32 addrspace(1)* %out diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll index 2616d84bcd9..c4e631590d5 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll @@ -12,34 +12,6 @@ declare i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* nocapture, i64, i32, i32, i1) declare i32 @llvm.amdgcn.workitem.id.x() #1 -; Make sure no crash on invalid non-constant -; GCN-LABEL: {{^}}invalid_variable_order_lds_atomic_dec_ret_i32: -; CIVI-DAG: s_mov_b32 m0 -; GFX9-NOT: m0 -define amdgpu_kernel void @invalid_variable_order_lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %order.var) #0 { - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 %order.var, i32 0, i1 false) - store i32 %result, i32 addrspace(1)* %out - ret void -} - -; Make sure no crash on invalid non-constant -; GCN-LABEL: {{^}}invalid_variable_scope_lds_atomic_dec_ret_i32: -; CIVI-DAG: s_mov_b32 m0 -; GFX9-NOT: m0 -define amdgpu_kernel void @invalid_variable_scope_lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %scope.var) #0 { - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 %scope.var, i1 false) - store i32 %result, i32 addrspace(1)* %out - ret void -} - -; Make sure no crash on invalid non-constant -; GCN-LABEL: {{^}}invalid_variable_volatile_lds_atomic_dec_ret_i32: -define amdgpu_kernel void @invalid_variable_volatile_lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i1 %volatile.var) #0 { - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 %volatile.var) - store i32 %result, i32 addrspace(1)* %out - ret void -} - ; GCN-LABEL: {{^}}lds_atomic_dec_ret_i32: ; CIVI-DAG: s_mov_b32 m0 ; GFX9-NOT: m0 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll index e36c02715b7..2a5e81a6dd6 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll @@ -395,41 +395,6 @@ define amdgpu_kernel void @test_div_scale_f32_undef_undef_val(float addrspace(1) ret void } -; Undefined selector gets deleted -; SI-LABEL: {{^}}test_div_scale_f32_val_undef_undef: -; SI-NOT: v_div_scale -define amdgpu_kernel void @test_div_scale_f32_val_undef_undef(float addrspace(1)* %out) #0 { - %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 8.0, float undef, i1 undef) - %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 - ret void -} - -; SI-LABEL: {{^}}test_div_scale_f32_undef_undef_undef: -; SI-NOT: v_div_scale -define amdgpu_kernel void @test_div_scale_f32_undef_undef_undef(float addrspace(1)* %out) #0 { - %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float undef, i1 undef) - %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 - ret void -} - -; SI-LABEL: {{^}}test_div_scale_f32_val_val_undef: -; SI-NOT: v_div_scale -define amdgpu_kernel void @test_div_scale_f32_val_val_undef(float addrspace(1)* %out, float addrspace(1)* %in) #0 { - %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - - %a = load volatile float, float addrspace(1)* %gep.0, align 4 - %b = load volatile float, float addrspace(1)* %gep.1, align 4 - - %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 undef) - %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 - ret void -} - ; SI-LABEL: {{^}}test_div_scale_f64_val_undef_val: ; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0{{$}} ; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0x40200000 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll index a6a402b93bf..6ff66b94fdf 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll @@ -8,14 +8,6 @@ declare float @llvm.fabs.f32(float) #0 declare i64 @llvm.amdgcn.fcmp.f16(half, half, i32) #0 declare half @llvm.fabs.f16(half) #0 -; GCN-LABEL: {{^}}v_fcmp_f32_dynamic_cc: -; GCN: s_endpgm -define amdgpu_kernel void @v_fcmp_f32_dynamic_cc(i64 addrspace(1)* %out, float %src0, float %src1, i32 %cc) { - %result = call i64 @llvm.amdgcn.fcmp.f32(float %src0, float %src1, i32 %cc) - store i64 %result, i64 addrspace(1)* %out - ret void -} - ; GCN-LABEL: {{^}}v_fcmp_f32_oeq_with_fabs: ; GCN: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}}, |{{v[0-9]+}}| define amdgpu_kernel void @v_fcmp_f32_oeq_with_fabs(i64 addrspace(1)* %out, float %src, float %a) { diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll index 06ac7da1288..8d5cdf1c270 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll @@ -6,15 +6,6 @@ declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0 declare i64 @llvm.amdgcn.icmp.i16(i16, i16, i32) #0 declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32) #0 -; No crash on invalid input -; GCN-LABEL: {{^}}v_icmp_i32_dynamic_cc: -; GCN: s_endpgm -define amdgpu_kernel void @v_icmp_i32_dynamic_cc(i64 addrspace(1)* %out, i32 %src, i32 %cc) { - %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 %cc) - store i64 %result, i64 addrspace(1)* %out - ret void -} - ; GCN-LABEL: {{^}}v_icmp_i32_eq: ; GCN: v_cmp_eq_u32_e64 define amdgpu_kernel void @v_icmp_i32_eq(i64 addrspace(1)* %out, i32 %src) { @@ -181,15 +172,6 @@ define amdgpu_kernel void @v_icmp_i64_sle(i64 addrspace(1)* %out, i64 %src) { ret void } -; GCN-LABEL: {{^}}v_icmp_i16_dynamic_cc: -; GCN: s_endpgm -define amdgpu_kernel void @v_icmp_i16_dynamic_cc(i64 addrspace(1)* %out, i16 %src, i32 %cc) { - %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 %cc) - store i64 %result, i64 addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}v_icmp_i16_eq: ; VI: v_cmp_eq_u16_e64 ; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x64 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll index 51f047d9b64..00da71e422f 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll @@ -67,9 +67,6 @@ declare void @llvm.amdgcn.raw.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i declare void @llvm.amdgcn.raw.tbuffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32, i32) #0 declare void @llvm.amdgcn.raw.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32) #0 declare void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) #0 -declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i1, i1) #1 attributes #0 = { nounwind } attributes #1 = { nounwind readonly } - - diff --git a/llvm/test/DebugInfo/MIR/X86/kill-after-spill.mir b/llvm/test/DebugInfo/MIR/X86/kill-after-spill.mir index 5110dc349be..88ed269c93d 100644 --- a/llvm/test/DebugInfo/MIR/X86/kill-after-spill.mir +++ b/llvm/test/DebugInfo/MIR/X86/kill-after-spill.mir @@ -124,9 +124,6 @@ declare i8* @memset(i8*, i32, i64) local_unnamed_addr ; Function Attrs: nounwind readnone speculatable - declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1) #1 - - ; Function Attrs: nounwind readnone speculatable declare void @llvm.dbg.value(metadata, metadata, metadata) #1 ; Function Attrs: nounwind diff --git a/llvm/test/LTO/X86/remangle_intrinsics.ll b/llvm/test/LTO/X86/remangle_intrinsics.ll index 72ce8d3d812..112cace0403 100644 --- a/llvm/test/LTO/X86/remangle_intrinsics.ll +++ b/llvm/test/LTO/X86/remangle_intrinsics.ll @@ -19,6 +19,3 @@ define void @foo(%struct.rtx_def* %a, i8 %b, i32 %c) { } declare void @llvm.memset.p0struct.rtx_def.i32(%struct.rtx_def*, i8, i32, i1) - -; Check that remangling code doesn't fail on an intrinsic with wrong signature -declare void @llvm.memset.i64(i8* nocapture, i8, i64) nounwind diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll index 166e90ab7b6..4694a1c5f8d 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll @@ -125,15 +125,6 @@ define i64 @volatile_atomicdec_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i ret i64 %ret } -; CHECK-LABEL: @invalid_variable_volatile_atomicinc_group_to_flat_i64( -; CHECK-NEXT: %1 = addrspacecast i64 addrspace(3)* %group.ptr to i64* -; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %1, i64 %y, i32 0, i32 0, i1 %volatile.var) -define i64 @invalid_variable_volatile_atomicinc_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y, i1 %volatile.var) #0 { - %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64* - %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 %volatile.var) - ret i64 %ret -} - declare i32 @llvm.objectsize.i32.p0i8(i8*, i1, i1, i1) #1 declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1, i1) #1 declare i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* nocapture, i32, i32, i32, i1) #2 diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll index 6030d14c8e1..e0b8fab411c 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll @@ -320,229 +320,229 @@ declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i ; -------------------------------------------------------------------- ; CHECK-LABEL: @raw_buffer_load_f32( -; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret float %data -define amdgpu_ps float @raw_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @raw_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ret float %data } ; CHECK-LABEL: @raw_buffer_load_v1f32( -; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <1 x float> %data -define amdgpu_ps <1 x float> @raw_buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <1 x float> @raw_buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ret <1 x float> %data } ; CHECK-LABEL: @raw_buffer_load_v2f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <2 x float> %data -define amdgpu_ps <2 x float> @raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ret <2 x float> %data } ; CHECK-LABEL: @raw_buffer_load_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <4 x float> %data -define amdgpu_ps <4 x float> @raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <4 x float> @raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ret <4 x float> %data } ; CHECK-LABEL: @extract_elt0_raw_buffer_load_v2f32( -; CHECK: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret float %data -define amdgpu_ps float @extract_elt0_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt0_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt0 = extractelement <2 x float> %data, i32 0 ret float %elt0 } ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v2f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt1_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt1_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <2 x float> %data, i32 1 ret float %elt1 } ; CHECK-LABEL: @extract_elt0_raw_buffer_load_v4f32( -; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret float %data -define amdgpu_ps float @extract_elt0_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt0_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v4f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <4 x float> %data, i32 1 ret float %elt1 } ; CHECK-LABEL: @extract_elt2_raw_buffer_load_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 2 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <4 x float> %data, i32 2 ret float %elt1 } ; CHECK-LABEL: @extract_elt3_raw_buffer_load_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <4 x float> %data, i32 3 ret float %elt1 } ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v4f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <2 x float> -define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2> ; CHECK-NEXT: ret <2 x float> %shuf -define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt2_elt3_raw_buffer_load_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; CHECK-NEXT: ret <2 x float> %shuf -define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_buffer_load_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> ; CHECK-NEXT: ret <3 x float> %shuf -define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_buffer_load_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3> ; CHECK-NEXT: ret <3 x float> %shuf -define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_buffer_load_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3> ; CHECK-NEXT: ret <3 x float> %shuf -define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt0_raw_buffer_load_v3f32( -; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret float %data -define amdgpu_ps float @extract_elt0_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt0_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt0 = extractelement <3 x float> %data, i32 0 ret float %elt0 } ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v3f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <3 x float> %data, i32 1 ret float %elt1 } ; CHECK-LABEL: @extract_elt2_raw_buffer_load_v3f32( -; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <3 x float> %data, i32 2 ret float %elt1 } ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v3f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <2 x float> -define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_v3f32( -; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2> ; CHECK-NEXT: ret <2 x float> %shuf -define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2> ret <2 x float> %shuf } ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_v4f32( -; CHECK-NEXT: %tmp = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %tmp = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32 ; CHECK-NEXT: ret i32 %tmp2 -define i32 @extract0_bitcast_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %tmp = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define i32 @extract0_bitcast_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %tmp = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %tmp1 = bitcast <4 x float> %tmp to <4 x i32> %tmp2 = extractelement <4 x i32> %tmp1, i32 0 ret i32 %tmp2 } ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_v4i32( -; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %tmp2 = bitcast i32 %tmp to float ; CHECK-NEXT: ret float %tmp2 -define float @extract0_bitcast_raw_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %tmp = call <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define float @extract0_bitcast_raw_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %tmp = call <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %tmp1 = bitcast <4 x i32> %tmp to <4 x float> %tmp2 = extractelement <4 x float> %tmp1, i32 0 ret float %tmp2 } ; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_buffer_load_v2f32( -; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0 +; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0 ; CHECK-NEXT: ret float %data -define amdgpu_ps float @preserve_metadata_extract_elt0_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0 +define amdgpu_ps float @preserve_metadata_extract_elt0_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0 %elt0 = extractelement <2 x float> %data, i32 0 ret float %elt0 } @@ -560,229 +560,229 @@ declare <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32>, i32, i32, i32) # ; -------------------------------------------------------------------- ; CHECK-LABEL: @raw_buffer_load_format_f32( -; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret float %data -define amdgpu_ps float @raw_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @raw_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ret float %data } ; CHECK-LABEL: @raw_buffer_load_format_v1f32( -; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <1 x float> %data -define amdgpu_ps <1 x float> @raw_buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <1 x float> @raw_buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ret <1 x float> %data } ; CHECK-LABEL: @raw_buffer_load_format_v2f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <2 x float> %data -define amdgpu_ps <2 x float> @raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ret <2 x float> %data } ; CHECK-LABEL: @raw_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <4 x float> %data -define amdgpu_ps <4 x float> @raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <4 x float> @raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ret <4 x float> %data } ; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v2f32( -; CHECK: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret float %data -define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt0 = extractelement <2 x float> %data, i32 0 ret float %elt0 } ; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v2f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <2 x float> %data, i32 1 ret float %elt1 } ; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret float %data -define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } ; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <4 x float> %data, i32 1 ret float %elt1 } ; CHECK-LABEL: @extract_elt2_raw_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 2 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <4 x float> %data, i32 2 ret float %elt1 } ; CHECK-LABEL: @extract_elt3_raw_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <4 x float> %data, i32 3 ret float %elt1 } ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <2 x float> -define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2> ; CHECK-NEXT: ret <2 x float> %shuf -define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt2_elt3_raw_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; CHECK-NEXT: ret <2 x float> %shuf -define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> ; CHECK-NEXT: ret <3 x float> %shuf -define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3> ; CHECK-NEXT: ret <3 x float> %shuf -define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3> ; CHECK-NEXT: ret <3 x float> %shuf -define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v3f32( -; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret float %data -define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt0 = extractelement <3 x float> %data, i32 0 ret float %elt0 } ; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v3f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <3 x float> %data, i32 1 ret float %elt1 } ; CHECK-LABEL: @extract_elt2_raw_buffer_load_format_v3f32( -; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <3 x float> %data, i32 2 ret float %elt1 } ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_format_v3f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <2 x float> -define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_format_v3f32( -; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2> ; CHECK-NEXT: ret <2 x float> %shuf -define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2> ret <2 x float> %shuf } ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_format_v4f32( -; CHECK-NEXT: %tmp = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %tmp = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32 ; CHECK-NEXT: ret i32 %tmp2 -define i32 @extract0_bitcast_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %tmp = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define i32 @extract0_bitcast_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %tmp = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %tmp1 = bitcast <4 x float> %tmp to <4 x i32> %tmp2 = extractelement <4 x i32> %tmp1, i32 0 ret i32 %tmp2 } ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_format_v4i32( -; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.raw.buffer.load.format.i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.raw.buffer.load.format.i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %tmp2 = bitcast i32 %tmp to float ; CHECK-NEXT: ret float %tmp2 -define float @extract0_bitcast_raw_buffer_load_format_v4i32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %tmp = call <4 x i32> @llvm.amdgcn.raw.buffer.load.format.v4i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define float @extract0_bitcast_raw_buffer_load_format_v4i32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %tmp = call <4 x i32> @llvm.amdgcn.raw.buffer.load.format.v4i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %tmp1 = bitcast <4 x i32> %tmp to <4 x float> %tmp2 = extractelement <4 x float> %tmp1, i32 0 ret float %tmp2 } ; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_buffer_load_format_v2f32( -; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0 +; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0 ; CHECK-NEXT: ret float %data -define amdgpu_ps float @preserve_metadata_extract_elt0_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0 +define amdgpu_ps float @preserve_metadata_extract_elt0_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0 %elt0 = extractelement <2 x float> %data, i32 0 ret float %elt0 } @@ -800,229 +800,229 @@ declare <4 x i32> @llvm.amdgcn.raw.buffer.load.format.v4i32(<4 x i32>, i32, i32, ; -------------------------------------------------------------------- ; CHECK-LABEL: @struct_buffer_load_f32( -; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret float %data -define amdgpu_ps float @struct_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @struct_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ret float %data } ; CHECK-LABEL: @struct_buffer_load_v1f32( -; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <1 x float> %data -define amdgpu_ps <1 x float> @struct_buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <1 x float> @struct_buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ret <1 x float> %data } ; CHECK-LABEL: @struct_buffer_load_v2f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <2 x float> %data -define amdgpu_ps <2 x float> @struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ret <2 x float> %data } ; CHECK-LABEL: @struct_buffer_load_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <4 x float> %data -define amdgpu_ps <4 x float> @struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <4 x float> @struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ret <4 x float> %data } ; CHECK-LABEL: @extract_elt0_struct_buffer_load_v2f32( -; CHECK: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret float %data -define amdgpu_ps float @extract_elt0_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt0_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt0 = extractelement <2 x float> %data, i32 0 ret float %elt0 } ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v2f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt1_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt1_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <2 x float> %data, i32 1 ret float %elt1 } ; CHECK-LABEL: @extract_elt0_struct_buffer_load_v4f32( -; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret float %data -define amdgpu_ps float @extract_elt0_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt0_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v4f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <4 x float> %data, i32 1 ret float %elt1 } ; CHECK-LABEL: @extract_elt2_struct_buffer_load_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 2 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <4 x float> %data, i32 2 ret float %elt1 } ; CHECK-LABEL: @extract_elt3_struct_buffer_load_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <4 x float> %data, i32 3 ret float %elt1 } ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v4f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <2 x float> -define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2> ; CHECK-NEXT: ret <2 x float> %shuf -define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt2_elt3_struct_buffer_load_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; CHECK-NEXT: ret <2 x float> %shuf -define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_buffer_load_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> ; CHECK-NEXT: ret <3 x float> %shuf -define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_buffer_load_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3> ; CHECK-NEXT: ret <3 x float> %shuf -define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_buffer_load_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3> ; CHECK-NEXT: ret <3 x float> %shuf -define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt0_struct_buffer_load_v3f32( -; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret float %data -define amdgpu_ps float @extract_elt0_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt0_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt0 = extractelement <3 x float> %data, i32 0 ret float %elt0 } ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v3f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <3 x float> %data, i32 1 ret float %elt1 } ; CHECK-LABEL: @extract_elt2_struct_buffer_load_v3f32( -; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <3 x float> %data, i32 2 ret float %elt1 } ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v3f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <2 x float> -define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_v3f32( -; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2> ; CHECK-NEXT: ret <2 x float> %shuf -define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2> ret <2 x float> %shuf } ; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_v4f32( -; CHECK-NEXT: %tmp = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %tmp = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32 ; CHECK-NEXT: ret i32 %tmp2 -define i32 @extract0_bitcast_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %tmp = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define i32 @extract0_bitcast_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %tmp = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %tmp1 = bitcast <4 x float> %tmp to <4 x i32> %tmp2 = extractelement <4 x i32> %tmp1, i32 0 ret i32 %tmp2 } ; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_v4i32( -; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %tmp2 = bitcast i32 %tmp to float ; CHECK-NEXT: ret float %tmp2 -define float @extract0_bitcast_struct_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %tmp = call <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define float @extract0_bitcast_struct_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %tmp = call <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %tmp1 = bitcast <4 x i32> %tmp to <4 x float> %tmp2 = extractelement <4 x float> %tmp1, i32 0 ret float %tmp2 } ; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_buffer_load_v2f32( -; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0 +; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0 ; CHECK-NEXT: ret float %data -define amdgpu_ps float @preserve_metadata_extract_elt0_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0 +define amdgpu_ps float @preserve_metadata_extract_elt0_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0 %elt0 = extractelement <2 x float> %data, i32 0 ret float %elt0 } @@ -1040,229 +1040,229 @@ declare <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32>, i32, i32, i32 ; -------------------------------------------------------------------- ; CHECK-LABEL: @struct_buffer_load_format_f32( -; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret float %data -define amdgpu_ps float @struct_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @struct_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ret float %data } ; CHECK-LABEL: @struct_buffer_load_format_v1f32( -; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <1 x float> %data -define amdgpu_ps <1 x float> @struct_buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <1 x float> @struct_buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ret <1 x float> %data } ; CHECK-LABEL: @struct_buffer_load_format_v2f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <2 x float> %data -define amdgpu_ps <2 x float> @struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ret <2 x float> %data } ; CHECK-LABEL: @struct_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <4 x float> %data -define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ret <4 x float> %data } ; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v2f32( -; CHECK: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret float %data -define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt0 = extractelement <2 x float> %data, i32 0 ret float %elt0 } ; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v2f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <2 x float> %data, i32 1 ret float %elt1 } ; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret float %data -define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } ; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <4 x float> %data, i32 1 ret float %elt1 } ; CHECK-LABEL: @extract_elt2_struct_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 2 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <4 x float> %data, i32 2 ret float %elt1 } ; CHECK-LABEL: @extract_elt3_struct_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <4 x float> %data, i32 3 ret float %elt1 } ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <2 x float> -define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2> ; CHECK-NEXT: ret <2 x float> %shuf -define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt2_elt3_struct_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; CHECK-NEXT: ret <2 x float> %shuf -define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> ; CHECK-NEXT: ret <3 x float> %shuf -define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3> ; CHECK-NEXT: ret <3 x float> %shuf -define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3> ; CHECK-NEXT: ret <3 x float> %shuf -define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v3f32( -; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret float %data -define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt0 = extractelement <3 x float> %data, i32 0 ret float %elt0 } ; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v3f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <3 x float> %data, i32 1 ret float %elt1 } ; CHECK-LABEL: @extract_elt2_struct_buffer_load_format_v3f32( -; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <3 x float> %data, i32 2 ret float %elt1 } ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_format_v3f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <2 x float> -define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_format_v3f32( -; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2> ; CHECK-NEXT: ret <2 x float> %shuf -define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2> ret <2 x float> %shuf } ; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_format_v4f32( -; CHECK-NEXT: %tmp = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %tmp = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32 ; CHECK-NEXT: ret i32 %tmp2 -define i32 @extract0_bitcast_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %tmp = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define i32 @extract0_bitcast_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %tmp = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %tmp1 = bitcast <4 x float> %tmp to <4 x i32> %tmp2 = extractelement <4 x i32> %tmp1, i32 0 ret i32 %tmp2 } ; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_format_v4i32( -; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %tmp2 = bitcast i32 %tmp to float ; CHECK-NEXT: ret float %tmp2 -define float @extract0_bitcast_struct_buffer_load_format_v4i32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %tmp = call <4 x i32> @llvm.amdgcn.struct.buffer.load.format.v4i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define float @extract0_bitcast_struct_buffer_load_format_v4i32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %tmp = call <4 x i32> @llvm.amdgcn.struct.buffer.load.format.v4i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %tmp1 = bitcast <4 x i32> %tmp to <4 x float> %tmp2 = extractelement <4 x float> %tmp1, i32 0 ret float %tmp2 } ; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_buffer_load_format_v2f32( -; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0 +; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0 ; CHECK-NEXT: ret float %data -define amdgpu_ps float @preserve_metadata_extract_elt0_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0 +define amdgpu_ps float @preserve_metadata_extract_elt0_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0 %elt0 = extractelement <2 x float> %data, i32 0 ret float %elt0 } @@ -1319,16 +1319,6 @@ define amdgpu_ps float @extract_elt0_image_sample_2d_v4f32_f32(float %s, float % ret float %elt0 } -; CHECK-LABEL: @extract_elt0_invalid_dmask_image_sample_1d_v4f32_f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 %dmask, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) -; CHECK-NEXT: %elt0 = extractelement <4 x float> %data, i32 0 -; CHECK-NEXT: ret float %elt0 -define amdgpu_ps float @extract_elt0_invalid_dmask_image_sample_1d_v4f32_f32(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc, i32 %dmask) #0 { - %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 %dmask, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) - %elt0 = extractelement <4 x float> %data, i32 0 - ret float %elt0 -} - ; CHECK-LABEL: @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32( ; CHECK-NEXT: ret float undef define amdgpu_ps float @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32(float %s, float %t, float %r, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { @@ -2395,14 +2385,7 @@ define amdgpu_ps float @extract_elt0_image_getresinfo_1d_v4f32_i32(i32 %mip, <8 ret float %elt0 } -; Verify that we don't creash on non-constant operand. -define protected <4 x half> @__llvm_amdgcn_image_sample_d_1darray_v4f16_f32_f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 zeroext, i32, i32) local_unnamed_addr { - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.d.1darray.v4f16.f32.f32(i32 %0, float %1, float %2, float %3, float %4, <8 x i32> %5, <4 x i32> %6, i1 zeroext %7, i32 %8, i32 %9) #1 - ret <4 x half> %tmp -} - declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1 -declare <4 x half> @llvm.amdgcn.image.sample.d.1darray.v4f16.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) ; -------------------------------------------------------------------- ; TFE / LWE diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll index b8e19e2bc94..a065d10946d 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll @@ -1078,17 +1078,7 @@ define i64 @sbfe_offset_32_width_32_i64(i64 %src) { ; llvm.amdgcn.exp ; -------------------------------------------------------------------- -declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind inaccessiblememonly - -; Make sure no crashing on invalid variable params -; CHECK-LABEL: @exp_invalid_inputs( -; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 %en, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 true, i1 false) -; CHECK: call void @llvm.amdgcn.exp.f32(i32 %tgt, i32 15, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 true, i1 false) -define void @exp_invalid_inputs(i32 %tgt, i32 %en) { - call void @llvm.amdgcn.exp.f32(i32 0, i32 %en, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) - call void @llvm.amdgcn.exp.f32(i32 %tgt, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) - ret void -} +declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) nounwind inaccessiblememonly ; CHECK-LABEL: @exp_disabled_inputs_to_undef( ; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.000000e+00, float undef, float undef, float undef, i1 true, i1 false) @@ -1136,16 +1126,7 @@ define void @exp_disabled_inputs_to_undef(float %x, float %y, float %z, float %w ; llvm.amdgcn.exp.compr ; -------------------------------------------------------------------- -declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) nounwind inaccessiblememonly - -; CHECK-LABEL: @exp_compr_invalid_inputs( -; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 %en, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> <half 0xH3800, half 0xH4400>, i1 true, i1 false) -; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 %tgt, i32 5, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> <half 0xH3800, half 0xH4400>, i1 true, i1 false) -define void @exp_compr_invalid_inputs(i32 %tgt, i32 %en) { - call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 %en, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false) - call void @llvm.amdgcn.exp.compr.v2f16(i32 %tgt, i32 5, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false) - ret void -} +declare void @llvm.amdgcn.exp.compr.v2f16(i32 immarg, i32 immarg, <2 x half>, <2 x half>, i1 immarg, i1 immarg) nounwind inaccessiblememonly ; CHECK-LABEL: @exp_compr_disabled_inputs_to_undef( ; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> undef, <2 x half> undef, i1 true, i1 false) @@ -1404,17 +1385,9 @@ define float @fmed3_0_1_undef_f32() { ; llvm.amdgcn.icmp ; -------------------------------------------------------------------- -declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) nounwind readnone convergent -declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) nounwind readnone convergent -declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32) nounwind readnone convergent - -; Make sure there's no crash for invalid input -; CHECK-LABEL: @invalid_nonconstant_icmp_code( -; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 %c) -define i64 @invalid_nonconstant_icmp_code(i32 %a, i32 %b, i32 %c) { - %result = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 %c) - ret i64 %result -} +declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32 immarg) nounwind readnone convergent +declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32 immarg) nounwind readnone convergent +declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32 immarg) nounwind readnone convergent ; CHECK-LABEL: @invalid_icmp_code( ; CHECK: %under = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 31) @@ -2012,15 +1985,7 @@ define i64 @fold_icmp_i1_ne_0_icmp_ult_i16(i16 %a, i16 %b) { ; llvm.amdgcn.fcmp ; -------------------------------------------------------------------- -declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32) nounwind readnone convergent - -; Make sure there's no crash for invalid input -; CHECK-LABEL: @invalid_nonconstant_fcmp_code( -; CHECK: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 %c) -define i64 @invalid_nonconstant_fcmp_code(float %a, float %b, i32 %c) { - %result = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 %c) - ret i64 %result -} +declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32 immarg) nounwind readnone convergent ; CHECK-LABEL: @invalid_fcmp_code( ; CHECK: %under = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 -1) diff --git a/llvm/test/Transforms/LowerExpectIntrinsic/PR33346.ll b/llvm/test/Transforms/LowerExpectIntrinsic/PR33346.ll index ca962fbdc8f..5bcf93408b1 100644 --- a/llvm/test/Transforms/LowerExpectIntrinsic/PR33346.ll +++ b/llvm/test/Transforms/LowerExpectIntrinsic/PR33346.ll @@ -7,12 +7,12 @@ bb: store i64 %arg, i64* %tmp, align 8 %tmp1 = load i64, i64* %tmp, align 8 %tmp2 = load i64, i64* %tmp, align 8 - %tmp3 = call i64 @llvm.expect.i64(i64 %tmp1, i64 %tmp2) + %tmp3 = call i64 @llvm.expect.i64(i64 %tmp1, i64 123) ret i64 %tmp3 } ; Function Attrs: nounwind readnone -declare i64 @llvm.expect.i64(i64, i64) +declare i64 @llvm.expect.i64(i64, i64 immarg) !llvm.module.flags = !{!0} diff --git a/llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll b/llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll new file mode 100644 index 00000000000..5b0e5b3aad7 --- /dev/null +++ b/llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll @@ -0,0 +1,552 @@ +; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s + +declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) +define void @buffer_load_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i1 %bool) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: %data0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 %bool, i1 false) + %data0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 %bool, i1 false) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: %data1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 %bool) + %data1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 %bool) + ret void +} + +declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32) +define void @raw_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %arg) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg + ; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %arg) + %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %arg) + ret void +} + +declare float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32>, i32, i32, i32) +define void @raw_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %arg) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg + ; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %arg) + %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %arg) + ret void +} + +declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32) +define void @struct_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %arg) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg + ; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %arg) + %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %arg) + ret void +} + +declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32) +define void @struct_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %arg) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg + ; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %arg) + %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %arg) + ret void +} + +declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) +define void @invalid_image_sample_1d_v4f32_f32(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc, i32 %dmask, i1 %bool, i32 %arg) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %dmask + ; CHECK-NEXT: %data0 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 %dmask, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data0 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 %dmask, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: %data1 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 %bool, i32 0, i32 0) + %data1 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 %bool, i32 0, i32 0) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg + ; CHECK-NEXT: %data2 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 %arg, i32 0) + %data2 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 %arg, i32 0) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg + ; CHECK-NEXT: %data3 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 %arg) + %data3 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 %arg) + ret void +} + +declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) +define void @exp_invalid_inputs(i32 %tgt, i32 %en, i1 %bool) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %en + ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 %en, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 true, i1 false) + call void @llvm.amdgcn.exp.f32(i32 0, i32 %en, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %tgt + ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 %tgt, i32 15, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 true, i1 false) + call void @llvm.amdgcn.exp.f32(i32 %tgt, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 %bool, i1 false) + call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 %bool, i1 false) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 false, i1 %bool) + call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 %bool) + ret void +} + +declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) + +define void @exp_compr_invalid_inputs(i32 %tgt, i32 %en, i1 %bool) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %en + ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 %en, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> <half 0xH3800, half 0xH4400>, i1 true, i1 false) + call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 %en, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %tgt + ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 %tgt, i32 5, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> <half 0xH3800, half 0xH4400>, i1 true, i1 false) + call void @llvm.amdgcn.exp.compr.v2f16(i32 %tgt, i32 5, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 5, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> <half 0xH3800, half 0xH4400>, i1 %bool, i1 false) + call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 5, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 %bool, i1 false) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 5, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> <half 0xH3800, half 0xH4400>, i1 false, i1 %bool) + call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 5, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 false, i1 %bool) + ret void +} + +declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) + +define i64 @invalid_nonconstant_icmp_code(i32 %a, i32 %b, i32 %c) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %c + ; CHECK-NEXT: %result = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 %c) + %result = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 %c) + ret i64 %result +} + +declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32) +define i64 @invalid_nonconstant_fcmp_code(float %a, float %b, i32 %c) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %c + ; CHECK-NEXT: %result = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 %c) + %result = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 %c) + ret i64 %result +} + +declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) +define amdgpu_kernel void @invalid_atomic_inc(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %var, i1 %bool) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 %var, i32 0, i1 false) + %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 %var, i32 0, i1 false) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 %var, i1 false) + %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 %var, i1 false) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: %result2 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 %bool) + %result2 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 %bool) + ret void +} + +declare i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) +define amdgpu_kernel void @invalid_atomic_dec(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %var, i1 %bool) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %result0 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 %var, i32 0, i1 false) + %result0 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 %var, i32 0, i1 false) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %result1 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 %var, i1 false) + %result1 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 %var, i1 false) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: %result2 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 %bool) + %result2 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 %bool) + ret void +} + +declare { float, i1 } @llvm.amdgcn.div.scale.f32(float, float, i1) +define amdgpu_kernel void @test_div_scale_f32_val_undef_undef(float addrspace(1)* %out) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK: i1 undef + ; CHECK: %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 8.000000e+00, float undef, i1 undef) + %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 8.0, float undef, i1 undef) + %result0 = extractvalue { float, i1 } %result, 0 + store float %result0, float addrspace(1)* %out, align 4 + ret void +} + +declare void @llvm.amdgcn.init.exec(i64) +define amdgpu_ps void @init_exec(i64 %var) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i64 %var + ; CHECK-NEXT: call void @llvm.amdgcn.init.exec(i64 %var) + call void @llvm.amdgcn.init.exec(i64 %var) + ret void +} + +declare i32 @llvm.amdgcn.s.sendmsg(i32, i32) +define void @sendmsg(i32 %arg0, i32 %arg1) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg0 + ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.s.sendmsg(i32 %arg0, i32 %arg1) + %val = call i32 @llvm.amdgcn.s.sendmsg(i32 %arg0, i32 %arg1) + ret void +} + +declare i32 @llvm.amdgcn.s.sendmsghalt(i32, i32) +define void @sendmsghalt(i32 %arg0, i32 %arg1) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg0 + ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.s.sendmsghalt(i32 %arg0, i32 %arg1) + %val = call i32 @llvm.amdgcn.s.sendmsghalt(i32 %arg0, i32 %arg1) + ret void +} + +declare i32 @llvm.amdgcn.s.waitcnt(i32) +define void @waitcnt(i32 %arg0) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg0 + ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.s.waitcnt(i32 %arg0) + %val = call i32 @llvm.amdgcn.s.waitcnt(i32 %arg0) + ret void +} + +declare i32 @llvm.amdgcn.s.getreg(i32) +define void @getreg(i32 %arg0, i32 %arg1) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg0 + ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.s.getreg(i32 %arg0) + %val = call i32 @llvm.amdgcn.s.getreg(i32 %arg0) + ret void +} + +declare i32 @llvm.amdgcn.s.sleep(i32) +define void @sleep(i32 %arg0, i32 %arg1) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg0 + ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.s.sleep(i32 %arg0) + %val = call i32 @llvm.amdgcn.s.sleep(i32 %arg0) + ret void +} + +declare i32 @llvm.amdgcn.s.incperflevel(i32) +define void @incperflevel(i32 %arg0, i32 %arg1) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg0 + ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.s.incperflevel(i32 %arg0) + %val = call i32 @llvm.amdgcn.s.incperflevel(i32 %arg0) + ret void +} + +declare i32 @llvm.amdgcn.s.decperflevel(i32) +define void @decperflevel(i32 %arg0, i32 %arg1) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg0 + ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.s.decperflevel(i32 %arg0) + %val = call i32 @llvm.amdgcn.s.decperflevel(i32 %arg0) + ret void +} + +declare i32 @llvm.amdgcn.ds.swizzle(i32, i32) +define void @ds_swizzle(i32 %arg0, i32 %arg1) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg1 + ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.ds.swizzle(i32 %arg0, i32 %arg1) + %val = call i32 @llvm.amdgcn.ds.swizzle(i32 %arg0, i32 %arg1) + ret void +} + +declare i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* nocapture, i32, i32, i32, i1, i32, i1, i1) +define amdgpu_kernel void @ds_ordered_add(i32 addrspace(2)* %gds, i32 addrspace(1)* %out, i32 %var, i1 %bool) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val0 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 %var, i32 0, i1 false, i32 1, i1 true, i1 true) + %val0 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 %var, i32 0, i1 false, i32 1, i1 true, i1 true) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val1 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 %var, i1 false, i32 1, i1 true, i1 true) + %val1 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 %var, i1 false, i32 1, i1 true, i1 true) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: %val2 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 %bool, i32 1, i1 true, i1 true) + %val2 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 %bool, i32 1, i1 true, i1 true) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val3 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 %var, i1 true, i1 true) + %val3 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 %var, i1 true, i1 true) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: %val4 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 %bool, i1 true) + %val4 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 %bool, i1 true) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: %val5 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 %bool) + %val5 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 %bool) + ret void +} + +declare i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* nocapture, i32, i32, i32, i1, i32, i1, i1) +define amdgpu_kernel void @ds_ordered_swap(i32 addrspace(2)* %gds, i32 addrspace(1)* %out, i32 %var, i1 %bool) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val0 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 %var, i32 0, i1 false, i32 1, i1 true, i1 true) + %val0 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 %var, i32 0, i1 false, i32 1, i1 true, i1 true) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val1 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 %var, i1 false, i32 1, i1 true, i1 true) + %val1 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 %var, i1 false, i32 1, i1 true, i1 true) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: %val2 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 %bool, i32 1, i1 true, i1 true) + %val2 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 %bool, i32 1, i1 true, i1 true) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val3 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 %var, i1 true, i1 true) + %val3 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 %var, i1 true, i1 true) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: %val4 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 %bool, i1 true) + %val4 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 %bool, i1 true) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: %val5 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 %bool) + %val5 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 %bool) + ret void +} + +declare i32 @llvm.amdgcn.mov.dpp.i32(i32, i32, i32, i32, i1) +define amdgpu_kernel void @mov_dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %var, i1 %bool) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in1, i32 %var, i32 1, i32 1, i1 true) + %val0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in1, i32 %var, i32 1, i32 1, i1 1) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val1 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in1, i32 1, i32 %var, i32 1, i1 true) + %val1 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in1, i32 1, i32 %var, i32 1, i1 1) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val2 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in1, i32 1, i32 1, i32 %var, i1 true) + %val2 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in1, i32 1, i32 1, i32 %var, i1 1) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: %val3 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in1, i32 1, i32 1, i32 1, i1 %bool) + %val3 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in1, i32 1, i32 1, i32 1, i1 %bool) + ret void +} + +declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1) +define amdgpu_kernel void @update_dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %in2, i32 %var, i1 %bool) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 %var, i32 1, i32 1, i1 true) + %val0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 %var, i32 1, i32 1, i1 1) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val1 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 %var, i32 1, i1 true) + %val1 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 %var, i32 1, i1 1) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val2 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 %var, i1 true) + %val2 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 %var, i1 1) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: %val3 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 %bool) + %val3 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 %bool) + ret void +} + +declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) +define amdgpu_ps void @load_1d(<8 x i32> inreg %rsrc, i32 %s, i32 %var) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val0 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 %var, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + %val0 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 %var, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 %var, i32 0) + %val1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 %var, i32 0) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val2 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 %var) + %val2 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 %var) + ret void +} + +declare {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32, i32, <8 x i32>, i32, i32) +define amdgpu_ps void @load_1d_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %val) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %val + ; CHECK-NEXT: %val0 = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 %val, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) + %val0 = call {<4 x float>, i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 %val, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %val + ; CHECK-NEXT: %val1 = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0) + %val1 = call {<4 x float>, i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %val + ; CHECK-NEXT: %val2 = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 1, i32 %val) + %val2 = call {<4 x float>, i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 1, i32 %val) + ret void +} + +declare {<4 x float>, i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) +define amdgpu_ps void @sample_1d_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s, i32 %var, i1 %bool) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val0 = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 %var, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 0) + %val0 = call {<4 x float>, i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 %var, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 0) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: %val1 = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 16, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 %bool, i32 1, i32 0) + %val1 = call {<4 x float>, i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 16, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 %bool, i32 1, i32 0) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val2 = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 16, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 %var, i32 0) + %val2 = call {<4 x float>, i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 16, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 %var, i32 0) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val3 = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 %var, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 %var) + %val3 = call {<4 x float>, i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 %var, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 %var) + ret void +} + +declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) +define amdgpu_ps void @load_1d_a16(<8 x i32> inreg %rsrc, <2 x i16> %coords, i16 %s, i32 %var) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val0 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 %var, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + %val0 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 %var, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 %var, i32 0) + %val1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 %var, i32 0) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val2 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 %var) + %val2 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 %var) + ret void +} + +declare i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i32) +define amdgpu_ps void @raw_buffer_atomic_swap(<4 x i32> inreg %rsrc, i32 %data, i32 %var) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val2 = call i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 %var) + %val2 = call i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 %var) + ret void +} + +declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) +define amdgpu_ps void @atomic_swap_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %val) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %val + ; CHECK-NEXT: %val0 = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0) + %val0 = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %val + ; CHECK-NEXT: %val1 = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 %val) + %val1 = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 %val) + ret void +} + +declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0 +define amdgpu_ps void @atomic_cmpswap_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i32 %s, i32 %val) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %val + ; CHECK-NEXT: %val0 = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0) + %val0 = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %val + ; CHECK-NEXT: %val1 = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 %val) + %val1 = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 %val) + ret void +} + +declare float @llvm.amdgcn.fdot2(<2 x half>, <2 x half>, float, i1) +define float @test_fdot2(<2 x half> %arg0, <2 x half> %arg1, float %arg2, i1 %arg3) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %arg3 + ; CHECK-NEXT: %val = call float @llvm.amdgcn.fdot2(<2 x half> %arg0, <2 x half> %arg1, float %arg2, i1 %arg3) + %val = call float @llvm.amdgcn.fdot2(<2 x half> %arg0, <2 x half> %arg1, float %arg2, i1 %arg3) + ret float %val +} + +declare i32 @llvm.amdgcn.sdot2(<2 x i16>, <2 x i16>, i32, i1) +define i32 @test_sdot2(<2 x i16> %arg0, <2 x i16> %arg1, i32 %arg2, i1 %arg3) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %arg3 + ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.sdot2(<2 x i16> %arg0, <2 x i16> %arg1, i32 %arg2, i1 %arg3) + %val = call i32 @llvm.amdgcn.sdot2(<2 x i16> %arg0, <2 x i16> %arg1, i32 %arg2, i1 %arg3) + ret i32 %val +} + +declare i32 @llvm.amdgcn.udot2(<2 x i16>, <2 x i16>, i32, i1) +define i32 @test_udot2(<2 x i16> %arg0, <2 x i16> %arg1, i32 %arg2, i1 %arg3) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %arg3 + ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.udot2(<2 x i16> %arg0, <2 x i16> %arg1, i32 %arg2, i1 %arg3) + %val = call i32 @llvm.amdgcn.udot2(<2 x i16> %arg0, <2 x i16> %arg1, i32 %arg2, i1 %arg3) + ret i32 %val +} + +declare i32 @llvm.amdgcn.sdot4(i32, i32, i32, i1) +define i32 @test_sdot4(i32 %arg0, i32 %arg1, i32 %arg2, i1 %arg3) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %arg3 + ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.sdot4(i32 %arg0, i32 %arg1, i32 %arg2, i1 %arg3) + %val = call i32 @llvm.amdgcn.sdot4(i32 %arg0, i32 %arg1, i32 %arg2, i1 %arg3) + ret i32 %val +} + +declare i32 @llvm.amdgcn.udot4(i32, i32, i32, i1) +define i32 @test_udot4(i32 %arg0, i32 %arg1, i32 %arg2, i1 %arg3) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %arg3 + ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.udot4(i32 %arg0, i32 %arg1, i32 %arg2, i1 %arg3) + %val = call i32 @llvm.amdgcn.udot4(i32 %arg0, i32 %arg1, i32 %arg2, i1 %arg3) + ret i32 %val +} diff --git a/llvm/test/Verifier/AMDGPU/lit.local.cfg b/llvm/test/Verifier/AMDGPU/lit.local.cfg new file mode 100644 index 00000000000..2a665f06be7 --- /dev/null +++ b/llvm/test/Verifier/AMDGPU/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'AMDGPU' in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Verifier/cttz-undef-arg.ll b/llvm/test/Verifier/cttz-undef-arg.ll index 66c5396443a..8c982e0924b 100644 --- a/llvm/test/Verifier/cttz-undef-arg.ll +++ b/llvm/test/Verifier/cttz-undef-arg.ll @@ -5,11 +5,13 @@ declare i32 @llvm.cttz.i32(i32, i1) define void @f(i32 %x, i1 %is_not_zero) { entry: -; CHECK: is_zero_undef argument of bit counting intrinsics must be a constant int +; CHECK: immarg operand has non-immediate parameter +; CHECK-NEXT: i1 %is_not_zero ; CHECK-NEXT: @llvm.ctlz.i32 call i32 @llvm.ctlz.i32(i32 %x, i1 %is_not_zero) -; CHECK: is_zero_undef argument of bit counting intrinsics must be a constant int +; CHECK: immarg operand has non-immediate parameter +; CHECK-NEXT: i1 %is_not_zero ; CHECK-NEXT: @llvm.cttz.i32 call i32 @llvm.cttz.i32(i32 %x, i1 %is_not_zero) ret void diff --git a/llvm/test/Verifier/element-wise-atomic-memory-intrinsics.ll b/llvm/test/Verifier/element-wise-atomic-memory-intrinsics.ll index 81c8ba16b97..f22f8eecbcc 100644 --- a/llvm/test/Verifier/element-wise-atomic-memory-intrinsics.ll +++ b/llvm/test/Verifier/element-wise-atomic-memory-intrinsics.ll @@ -1,8 +1,11 @@ ; RUN: not opt -verify < %s 2>&1 | FileCheck %s define void @test_memcpy(i8* %P, i8* %Q, i32 %A, i32 %E) { - ; CHECK: element size of the element-wise unordered atomic memory intrinsic must be a constant int + ; CHECK: immarg operand has non-immediate parameter + ; CHECK: i32 %E + ; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1, i32 %E) call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1, i32 %E) + ; CHECK: element size of the element-wise atomic memory intrinsic must be a power of 2 call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1, i32 3) @@ -21,11 +24,15 @@ define void @test_memcpy(i8* %P, i8* %Q, i32 %A, i32 %E) { ret void } + declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind define void @test_memmove(i8* %P, i8* %Q, i32 %A, i32 %E) { - ; CHECK: element size of the element-wise unordered atomic memory intrinsic must be a constant int + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %E + ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1, i32 %E) call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1, i32 %E) + ; CHECK: element size of the element-wise atomic memory intrinsic must be a power of 2 call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1, i32 3) @@ -44,11 +51,15 @@ define void @test_memmove(i8* %P, i8* %Q, i32 %A, i32 %E) { ret void } + declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind define void @test_memset(i8* %P, i8 %V, i32 %A, i32 %E) { - ; CHECK: element size of the element-wise unordered atomic memory intrinsic must be a constant int + ; CHECK: immarg operand has non-immediate parameter + ; CHECK: i32 %E + ; CHECK: call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 1, i32 %E) call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 1, i32 %E) + ; CHECK: element size of the element-wise atomic memory intrinsic must be a power of 2 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 1, i32 3) diff --git a/llvm/test/Verifier/frameescape.ll b/llvm/test/Verifier/frameescape.ll index c03134159db..0850b9495d9 100644 --- a/llvm/test/Verifier/frameescape.ll +++ b/llvm/test/Verifier/frameescape.ll @@ -47,7 +47,10 @@ define internal void @k(i32 %n) { call i8* @llvm.localrecover(i8* bitcast(void()* @f to i8*), i8* null, i32 %n) ret void } -; CHECK: idx argument of llvm.localrecover must be a constant int + +; CHECK: immarg operand has non-immediate parameter +; CHECK-NEXT: i32 %n +; CHECK-NEXT: %1 = call i8* @llvm.localrecover(i8* bitcast (void ()* @f to i8*), i8* null, i32 %n) define internal void @l(i8* %b) { %a = alloca i8 diff --git a/llvm/test/Verifier/immarg-param-attribute-invalid.ll b/llvm/test/Verifier/immarg-param-attribute-invalid.ll new file mode 100644 index 00000000000..5b923c5be52 --- /dev/null +++ b/llvm/test/Verifier/immarg-param-attribute-invalid.ll @@ -0,0 +1,107 @@ +; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s + +declare void @llvm.test.immarg.intrinsic.i32(i32 immarg) +declare void @llvm.test.immarg.intrinsic.v2i32(<2 x i32> immarg) +declare void @llvm.test.immarg.intrinsic.f32(float immarg) +declare void @llvm.test.immarg.intrinsic.v2f32(<2 x float> immarg) +declare void @llvm.test.immarg.intrinsic.2ai32([2 x i32] immarg) + +@gv = global i32 undef, align 4 + +define void @call_llvm.test.immarg.intrinsic.i32(i32 %arg) { +; CHECK: immarg operand has non-immediate parameter +; CHECK-NEXT: i32 undef +; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.i32(i32 undef) + call void @llvm.test.immarg.intrinsic.i32(i32 undef) + +; CHECK: immarg operand has non-immediate parameter +; CHECK-NEXT: i32 %arg +; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.i32(i32 %arg) + call void @llvm.test.immarg.intrinsic.i32(i32 %arg) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 ptrtoint (i32* @gv to i32) + ; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.i32(i32 ptrtoint (i32* @gv to i32)) + call void @llvm.test.immarg.intrinsic.i32(i32 ptrtoint (i32* @gv to i32)) + ret void +} + +define void @call_llvm.test.immarg.intrinsic.f32() { +; CHECK: immarg operand has non-immediate parameter +; CHECK-NEXT: float undef +; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.f32(float undef) + call void @llvm.test.immarg.intrinsic.f32(float undef) + ret void +} + +define void @call_llvm.test.immarg.intrinsic.v2i32() { +; CHECK: immarg operand has non-immediate parameter +; CHECK-NEXT: <2 x i32> zeroinitializer +; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.v2i32(<2 x i32> zeroinitializer) + call void @llvm.test.immarg.intrinsic.v2i32(<2 x i32> zeroinitializer) + +; CHECK: immarg operand has non-immediate parameter +; CHECK-NEXT: <2 x i32> <i32 1, i32 2> +; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.v2i32(<2 x i32> <i32 1, i32 2>) + call void @llvm.test.immarg.intrinsic.v2i32(<2 x i32> <i32 1, i32 2>) + +; CHECK: immarg operand has non-immediate parameter +; CHECK-NEXT: <2 x i32> undef +; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.v2i32(<2 x i32> undef) + call void @llvm.test.immarg.intrinsic.v2i32(<2 x i32> undef) + ret void +} + +define void @call_llvm.test.immarg.intrinsic.v2f32() { +; CHECK: immarg operand has non-immediate parameter +; CHECK-NEXT: <2 x float> zeroinitializer +; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.v2f32(<2 x float> zeroinitializer) + call void @llvm.test.immarg.intrinsic.v2f32(<2 x float> zeroinitializer) + +; CHECK: immarg operand has non-immediate parameter +; CHECK-NEXT: <2 x float> <float 1.000000e+00, float 2.000000e+00> +; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.v2f32(<2 x float> <float 1.000000e+00, float 2.000000e+00>) + call void @llvm.test.immarg.intrinsic.v2f32(<2 x float> <float 1.0, float 2.0>) + ret void +} + +define void @call_llvm.test.immarg.intrinsic.2ai32() { +; CHECK: immarg operand has non-immediate parameter +; CHECK-NEXT: [2 x i32] zeroinitializer +; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.2ai32([2 x i32] zeroinitializer) + call void @llvm.test.immarg.intrinsic.2ai32([2 x i32] zeroinitializer) + +; CHECK: immarg operand has non-immediate parameter +; CHECK-NEXT: [2 x i32] [i32 1, i32 2] +; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.2ai32([2 x i32] [i32 1, i32 2]) + call void @llvm.test.immarg.intrinsic.2ai32([2 x i32] [i32 1, i32 2]) + ret void +} + +; CHECK: immarg attribute only applies to intrinsics +; CHECK-NEXT: void (i32)* @not_an_intrinsic +declare void @not_an_intrinsic(i32 immarg) + +declare void @llvm.test.intrinsic(i32) +declare void @func(i32) + +define void @only_on_callsite() { +; CHECK: immarg attribute only applies to intrinsics +; CHECK-NEXT: call void @func(i32 immarg 0) +; CHECK-NEXT: immarg may not apply only to call sites +; CHECK-NEXT: i32 0 +; CHECK-NEXT: call void @func(i32 immarg 0) + call void @func(i32 immarg 0) + +; CHECK: immarg may not apply only to call sites +; CHECK-NEXT: i32 0 +; CHECK-NEXT: call void @llvm.test.intrinsic(i32 immarg 0) + call void @llvm.test.intrinsic(i32 immarg 0) + ret void +} + +; CHECK: immarg attribute only applies to intrinsics +; CHECK: void (i32)* @on_function_definition +define void @on_function_definition(i32 immarg %arg) { + ret void +} diff --git a/llvm/test/Verifier/intrinsic-immarg.ll b/llvm/test/Verifier/intrinsic-immarg.ll new file mode 100644 index 00000000000..4a701ddd419 --- /dev/null +++ b/llvm/test/Verifier/intrinsic-immarg.ll @@ -0,0 +1,223 @@ +; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s + +declare i8* @llvm.returnaddress(i32) +define void @return_address(i32 %var) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %result = call i8* @llvm.returnaddress(i32 %var) + %result = call i8* @llvm.returnaddress(i32 %var) + ret void +} + +declare i8* @llvm.frameaddress(i32) +define void @frame_address(i32 %var) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %result = call i8* @llvm.frameaddress(i32 %var) + %result = call i8* @llvm.frameaddress(i32 %var) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) +define void @memcpy(i8* %dest, i8* %src, i1 %is.volatile) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %is.volatile + ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 8, i1 %is.volatile) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 8, i1 %is.volatile) + ret void +} + +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) +define void @memmove(i8* %dest, i8* %src, i1 %is.volatile) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %is.volatile + ; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 8, i1 %is.volatile) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 8, i1 %is.volatile) + ret void +} + +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) +define void @memset(i8* %dest, i8 %val, i1 %is.volatile) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %is.volatile + ; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %dest, i8 %val, i32 8, i1 %is.volatile) + call void @llvm.memset.p0i8.i32(i8* %dest, i8 %val, i32 8, i1 %is.volatile) + ret void +} + + +declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1, i1) +define void @objectsize(i8* %ptr, i1 %a, i1 %b, i1 %c) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %a + ; CHECK-NEXT: %val0 = call i64 @llvm.objectsize.i64.p0i8(i8* %ptr, i1 %a, i1 false, i1 false) + %val0 = call i64 @llvm.objectsize.i64.p0i8(i8* %ptr, i1 %a, i1 false, i1 false) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %b + ; CHECK-NEXT: %val1 = call i64 @llvm.objectsize.i64.p0i8(i8* %ptr, i1 false, i1 %b, i1 false) + %val1 = call i64 @llvm.objectsize.i64.p0i8(i8* %ptr, i1 false, i1 %b, i1 false) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %c + ; CHECK-NEXT: %val2 = call i64 @llvm.objectsize.i64.p0i8(i8* %ptr, i1 false, i1 false, i1 %c) + %val2 = call i64 @llvm.objectsize.i64.p0i8(i8* %ptr, i1 false, i1 false, i1 %c) + ret void +} + +declare i8 @llvm.expect.i8(i8, i8) +define i8 @expect(i8 %arg0, i8 %arg1) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i8 %arg1 + ; CHECK-NEXT: %ret = call i8 @llvm.expect.i8(i8 %arg0, i8 %arg1) + %ret = call i8 @llvm.expect.i8(i8 %arg0, i8 %arg1) + ret i8 %ret +} + +declare i64 @llvm.smul.fix.i64(i64, i64, i32) +define i64 @smul_fix(i64 %arg0, i64 %arg1, i32 %arg2) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg2 + ; CHECK-NEXT: %ret = call i64 @llvm.smul.fix.i64(i64 %arg0, i64 %arg1, i32 %arg2) + %ret = call i64 @llvm.smul.fix.i64(i64 %arg0, i64 %arg1, i32 %arg2) + ret i64 %ret +} + +declare i64 @llvm.umul.fix.i64(i64, i64, i32) +define i64 @umul_fix(i64 %arg0, i64 %arg1, i32 %arg2) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg2 + ; CHECK-NEXT: %ret = call i64 @llvm.umul.fix.i64(i64 %arg0, i64 %arg1, i32 %arg2) + %ret = call i64 @llvm.umul.fix.i64(i64 %arg0, i64 %arg1, i32 %arg2) + ret i64 %ret +} + +declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>*, i32, <2 x i1>, <2 x double>) +define <2 x double> @masked_load(<2 x i1> %mask, <2 x double>* %addr, <2 x double> %dst, i32 %align) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %align + ; CHECK-NEXT: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 %align, <2 x i1> %mask, <2 x double> %dst) + %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 %align, <2 x i1> %mask, <2 x double> %dst) + ret <2 x double> %res +} + +declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>) +define void @masked_store(<4 x i1> %mask, <4 x i32>* %addr, <4 x i32> %val, i32 %align) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %align + ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 %align, <4 x i1> %mask) + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 %align, <4 x i1> %mask) + ret void +} + +declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*>, i32, <2 x i1>, <2 x double>) +define <2 x double> @test_gather(<2 x double*> %ptrs, <2 x i1> %mask, <2 x double> %src0, i32 %align) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK: i32 %align + ; CHECK: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 %align, <2 x i1> %mask, <2 x double> %src0) + %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 %align, <2 x i1> %mask, <2 x double> %src0) + ret <2 x double> %res +} + +declare void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32>, <8 x i32*>, i32, <8 x i1>) +define void @test_scatter_8i32(<8 x i32> %a1, <8 x i32*> %ptr, <8 x i1> %mask, i32 %align) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %align + ; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 %align, <8 x i1> %mask) + call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 %align, <8 x i1> %mask) + ret void +} + +declare void @llvm.lifetime.start.p0i8(i64, i8*) +define void @test_lifetime_start(i64 %arg0, i8* %ptr) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i64 %arg0 + ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 %arg0, i8* %ptr) + call void @llvm.lifetime.start.p0i8(i64 %arg0, i8* %ptr) + ret void +} + +declare void @llvm.lifetime.end.p0i8(i64, i8*) +define void @test_lifetime_end(i64 %arg0, i8* %ptr) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i64 %arg0 + ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 %arg0, i8* %ptr) + call void @llvm.lifetime.end.p0i8(i64 %arg0, i8* %ptr) + ret void +} + +declare void @llvm.invariant.start.p0i8(i64, i8*) +define void @test_invariant_start(i64 %arg0, i8* %ptr) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i64 %arg0 + ; CHECK-NEXT: call void @llvm.invariant.start.p0i8(i64 %arg0, i8* %ptr) + call void @llvm.invariant.start.p0i8(i64 %arg0, i8* %ptr) + ret void +} + +declare void @llvm.invariant.end.p0i8({}*, i64, i8*) +define void @test_invariant_end({}* %scope, i64 %arg1, i8* %ptr) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i64 %arg1 + ; CHECK-NEXT: call void @llvm.invariant.end.p0i8({}* %scope, i64 %arg1, i8* %ptr) + call void @llvm.invariant.end.p0i8({}* %scope, i64 %arg1, i8* %ptr) + ret void +} + +declare void @llvm.prefetch(i8*, i32, i32, i32) +define void @test_prefetch(i8* %ptr, i32 %arg0, i32 %arg1) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg0 + ; CHECK-NEXT: call void @llvm.prefetch(i8* %ptr, i32 %arg0, i32 0, i32 0) + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg1 + call void @llvm.prefetch(i8* %ptr, i32 %arg0, i32 0, i32 0) + call void @llvm.prefetch(i8* %ptr, i32 0, i32 %arg1, i32 0) + ret void +} + +declare void @llvm.localrecover(i8*, i8*, i32) +define void @test_localrecover(i8* %func, i8* %fp, i32 %idx) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %idx + ; CHECK-NEXT: call void @llvm.localrecover(i8* %func, i8* %fp, i32 %idx) + call void @llvm.localrecover(i8* %func, i8* %fp, i32 %idx) + ret void +} + +declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) + +define private void @f() { + ret void +} + +define void @calls_statepoint(i8 addrspace(1)* %arg0, i64 %arg1, i32 %arg2, i32 %arg4, i32 %arg5) gc "statepoint-example" { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i64 %arg1 + ; CHECK-NEXT: %safepoint0 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 %arg1, i32 0, void ()* @f, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg0, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg0, i8 addrspace(1)* %arg0) + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg2 + ; CHECK-NEXT: %safepoint1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 %arg2, void ()* @f, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg0, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg0, i8 addrspace(1)* %arg0) + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg4 + ; CHECK-NEXT: %safepoint2 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @f, i32 %arg4, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg0, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg0, i8 addrspace(1)* %arg0) + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg5 + ; CHECK-NEXT: %safepoint3 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @f, i32 0, i32 %arg5, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg0, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg0, i8 addrspace(1)* %arg0) + %cast = bitcast i8 addrspace(1)* %arg0 to i64 addrspace(1)* + %safepoint0 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 %arg1, i32 0, void ()* @f, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg0, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg0, i8 addrspace(1)* %arg0) + %safepoint1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 %arg2, void ()* @f, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg0, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg0, i8 addrspace(1)* %arg0) + %safepoint2 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @f, i32 %arg4, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg0, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg0, i8 addrspace(1)* %arg0) + %safepoint3 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @f, i32 0, i32 %arg5, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg0, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg0, i8 addrspace(1)* %arg0) + ret void +} + +declare void @llvm.hwasan.check.memaccess(i8*, i8*, i32) + +define void @hwasan_check_memaccess(i8* %arg0,i8* %arg1, i32 %arg2) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK: i32 %arg2 + ; CHECK: call void @llvm.hwasan.check.memaccess(i8* %arg0, i8* %arg1, i32 %arg2) + call void @llvm.hwasan.check.memaccess(i8* %arg0,i8* %arg1, i32 %arg2) + ret void +} |