IR: Add immarg attribute

This indicates an intrinsic parameter is required to be a constant, and should not be replaced with a non-constant value. Add the attribute to all AMDGPU and generic intrinsics that comments indicate it should apply to. I scanned other target intrinsics, but I don't see any obvious comments indicating which arguments are intended to be only immediates. This breaks one questionable testcase for the autoupgrade. I'm unclear on whether the autoupgrade is supposed to really handle declarations which were never valid. The verifier fails because the attributes now refer to a parameter past the end of the argument list. llvm-svn: 355981
author: Matt Arsenault <Matthew.Arsenault@amd.com> 2019-03-12 21:02:54 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> 2019-03-12 21:02:54 +0000
commit: caf1316f7100e1ba1e7e800eae6a5adf0d48bf90 (patch)
tree: 7b79b67ccf3ffe7fe9b56dee66444a586d7714d2 /llvm/test
parent: 9df0754b8dbeeb597a22bb0418eb0ee04ce88b56 (diff)
download: bcm5719-llvm-caf1316f7100e1ba1e7e800eae6a5adf0d48bf90.tar.gz
bcm5719-llvm-caf1316f7100e1ba1e7e800eae6a5adf0d48bf90.zip
28 files changed, 1312 insertions, 471 deletions
diff --git a/llvm/test/Assembler/auto_upgrade_intrinsics.ll b/llvm/test/Assembler/auto_upgrade_intrinsics.ll
index bd2a779800a..175bac812a0 100644
--- a/llvm/test/Assembler/auto_upgrade_intrinsics.ll
+++ b/llvm/test/Assembler/auto_upgrade_intrinsics.ll
@@ -146,5 +146,5 @@ define void @tests.lifetime.start.end() {
 ; CHECK: declare i32 @llvm.objectsize.i32.p0i8
 
 
-; CHECK: declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
-; CHECK: declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
+; CHECK: declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
+; CHECK: declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
diff --git a/llvm/test/Assembler/autoupgrade-invalid-mem-intrinsics.ll b/llvm/test/Assembler/autoupgrade-invalid-mem-intrinsics.ll
new file mode 100644
index 00000000000..59308ef27a1
--- /dev/null
+++ b/llvm/test/Assembler/autoupgrade-invalid-mem-intrinsics.ll
@@ -0,0 +1,15 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+; Check that remangling code doesn't fail on an intrinsic with wrong signature
+
+; CHECK: Attribute after last parameter!
+; CHECK-NEXT: void (i8*, i8, i64)* @llvm.memset.i64
+declare void @llvm.memset.i64(i8* nocapture, i8, i64) nounwind
+
+; CHECK: Attribute after last parameter!
+; CHECK-NEXT: void (i8*, i8, i64)* @llvm.memcpy.i64
+declare void @llvm.memcpy.i64(i8* nocapture, i8, i64) nounwind
+
+; CHECK: Attribute after last parameter!
+; CHECK-NEXT: void (i8*, i8, i64)* @llvm.memmove.i64
+declare void @llvm.memmove.i64(i8* nocapture, i8, i64) nounwind
diff --git a/llvm/test/Assembler/immarg-param-attribute.ll b/llvm/test/Assembler/immarg-param-attribute.ll
new file mode 100644
index 00000000000..f31443a8a3e
--- /dev/null
+++ b/llvm/test/Assembler/immarg-param-attribute.ll
@@ -0,0 +1,39 @@
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
+
+; CHECK: declare void @llvm.test.immarg.intrinsic.i32(i32 immarg)
+declare void @llvm.test.immarg.intrinsic.i32(i32 immarg)
+
+; CHECK: declare void @llvm.test.immarg.intrinsic.f32(float immarg)
+declare void @llvm.test.immarg.intrinsic.f32(float immarg)
+
+; CHECK-LABEL: @call_llvm.test.immarg.intrinsic.i32(
+define void @call_llvm.test.immarg.intrinsic.i32() {
+  ; CHECK: call void @llvm.test.immarg.intrinsic.i32(i32 0)
+  call void @llvm.test.immarg.intrinsic.i32(i32 0)
+
+  ; CHECK: call void @llvm.test.immarg.intrinsic.i32(i32 0)
+  call void @llvm.test.immarg.intrinsic.i32(i32 0)
+
+  ; CHECK call void @llvm.test.immarg.intrinsic.i32(i32 1)
+  call void @llvm.test.immarg.intrinsic.i32(i32 1)
+
+  ; CHECK: call void @llvm.test.immarg.intrinsic.i32(i32 5)
+  call void @llvm.test.immarg.intrinsic.i32(i32 add (i32 2, i32 3))
+
+  ; CHECK: call void @llvm.test.immarg.intrinsic.i32(i32 0)
+  call void @llvm.test.immarg.intrinsic.i32(i32 ptrtoint (i32* null to i32))
+  ret void
+}
+
+; CHECK-LABEL: @call_llvm.test.immarg.intrinsic.f32(
+define void @call_llvm.test.immarg.intrinsic.f32() {
+  ; CHECK: call void @llvm.test.immarg.intrinsic.f32(float 1.000000e+00)
+  call void @llvm.test.immarg.intrinsic.f32(float 1.0)
+  ret void
+}
+
+define void @on_callsite_and_declaration() {
+  ; CHECK: call void @llvm.test.immarg.intrinsic.i32(i32 immarg 0)
+  call void @llvm.test.immarg.intrinsic.i32(i32 immarg 0)
+  ret void
+}
diff --git a/llvm/test/Assembler/invalid-immarg.ll b/llvm/test/Assembler/invalid-immarg.ll
new file mode 100644
index 00000000000..65f6ba6c5f4
--- /dev/null
+++ b/llvm/test/Assembler/invalid-immarg.ll
@@ -0,0 +1,34 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+; CHECK: Attribute 'immarg' is incompatible with other attributes
+declare void @llvm.immarg.byval(i32* byval immarg)
+
+; CHECK: Attribute 'immarg' is incompatible with other attributes
+declare void @llvm.immarg.inalloca(i32* inalloca immarg)
+
+; CHECK: Attribute 'immarg' is incompatible with other attributes
+declare void @llvm.immarg.inreg(i32 inreg immarg)
+
+; CHECK: Attribute 'immarg' is incompatible with other attributes
+declare void @llvm.immarg.nest(i32* nest immarg)
+
+; CHECK: Attribute 'immarg' is incompatible with other attributes
+declare void @llvm.immarg.sret(i32* sret immarg)
+
+; CHECK: Attribute 'immarg' is incompatible with other attributes
+declare void @llvm.immarg.zeroext(i32 zeroext immarg)
+
+; CHECK: Attribute 'immarg' is incompatible with other attributes
+declare void @llvm.immarg.signext(i32 signext immarg)
+
+; CHECK: Attribute 'immarg' is incompatible with other attributes
+declare void @llvm.immarg.returned(i32 returned immarg)
+
+; CHECK: Attribute 'immarg' is incompatible with other attributes
+declare void @llvm.immarg.noalias(i32* noalias immarg)
+
+; CHECK: Attribute 'immarg' is incompatible with other attributes
+declare void @llvm.immarg.readnone(i32* readnone immarg)
+
+; CHECK: Attribute 'immarg' is incompatible with other attributes
+declare void @llvm.immarg.readonly(i32* readonly immarg)
diff --git a/llvm/test/Assembler/invalid-immarg2.ll b/llvm/test/Assembler/invalid-immarg2.ll
new file mode 100644
index 00000000000..f4589108a9f
--- /dev/null
+++ b/llvm/test/Assembler/invalid-immarg2.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+; CHECK: error: invalid use of parameter-only attribute on a function
+declare void @llvm.immarg.func() immarg
diff --git a/llvm/test/Assembler/invalid-immarg3.ll b/llvm/test/Assembler/invalid-immarg3.ll
new file mode 100644
index 00000000000..c03d7fafdc2
--- /dev/null
+++ b/llvm/test/Assembler/invalid-immarg3.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+; CHECK: error: invalid use of parameter-only attribute
+declare immarg i32 @llvm.immarg.retattr(i32)
diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll
index 3c5e86eee01..95cd49f8156 100644
--- a/llvm/test/Bitcode/compatibility.ll
+++ b/llvm/test/Bitcode/compatibility.ll
@@ -1681,6 +1681,10 @@ define i8** @constexpr() {
   ret i8** getelementptr inbounds ({ [4 x i8*], [4 x i8*] }, { [4 x i8*], [4 x i8*] }* null, i32 0, inrange i32 1, i32 2)
 }
 
+; immarg attribute
+declare void @llvm.test.immarg.intrinsic(i32 immarg)
+; CHECK: declare void @llvm.test.immarg.intrinsic(i32 immarg)
+
 ; CHECK: attributes #0 = { alignstack=4 }
 ; CHECK: attributes #1 = { alignstack=8 }
 ; CHECK: attributes #2 = { alwaysinline }
diff --git a/llvm/test/Bitcode/objectsize-upgrade-7.0.ll b/llvm/test/Bitcode/objectsize-upgrade-7.0.ll
index a7e6d497676..7ed6f15f683 100644
--- a/llvm/test/Bitcode/objectsize-upgrade-7.0.ll
+++ b/llvm/test/Bitcode/objectsize-upgrade-7.0.ll
@@ -9,4 +9,4 @@ define void @callit(i8* %ptr) {
 }
 
 declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1)
-; CHECK: declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1, i1)
+; CHECK: declare i64 @llvm.objectsize.i64.p0i8(i8*, i1 immarg, i1 immarg, i1 immarg)
diff --git a/llvm/test/Bitcode/upgrade-memory-intrinsics.ll b/llvm/test/Bitcode/upgrade-memory-intrinsics.ll
index df4cb2bcd36..1bdee928cca 100644
--- a/llvm/test/Bitcode/upgrade-memory-intrinsics.ll
+++ b/llvm/test/Bitcode/upgrade-memory-intrinsics.ll
@@ -27,9 +27,9 @@ define void @test2(i8* %p1, i8* %p2, i8* %p3) {
   ret void
 }
 
-; CHECK: declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1)
-; CHECK: declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)
-; CHECK: declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)
+; CHECK: declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg)
+; CHECK: declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg)
+; CHECK: declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1 immarg)
 declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1)
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly , i8* nocapture readonly, i64, i32, i1)
 declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
diff --git a/llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll b/llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll
index 722099fec33..6644449b14a 100644
--- a/llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll
+++ b/llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll
@@ -70,8 +70,8 @@ define amdgpu_kernel void @store_bitcast_constant_v8i32_to_v16i16(<8 x float> ad
 
 ; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source:
 ; GCN-NOT: store_dword
-define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source(<2 x i32> addrspace(1)* %out, i64 %a, i64 %b, i32 %c) #0 {
-  %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 %c) #1
+define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source(<2 x i32> addrspace(1)* %out, i64 %a, i64 %b) #0 {
+  %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 999) #1
   %bc = bitcast i64 %undef to <2 x i32>
   store volatile <2 x i32> %bc, <2 x i32> addrspace(1)* %out
   ret void
@@ -79,8 +79,8 @@ define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source(<2 x i32>
 
 ; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source_extractelt:
 ; GCN-NOT: store_dword
-define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source_extractelt(i32 addrspace(1)* %out, i64 %a, i64 %b, i32 %c) #0 {
-  %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 %c) #1
+define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source_extractelt(i32 addrspace(1)* %out, i64 %a, i64 %b) #0 {
+  %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 9999) #1
   %bc = bitcast i64 %undef to <2 x i32>
   %elt1 = extractelement <2 x i32> %bc, i32 1
   store volatile i32 %elt1, i32 addrspace(1)* %out
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll
index 2616d84bcd9..c4e631590d5 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll
@@ -12,34 +12,6 @@ declare i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* nocapture, i64, i32, i32, i1)
 
 declare i32 @llvm.amdgcn.workitem.id.x() #1
 
-; Make sure no crash on invalid non-constant
-; GCN-LABEL: {{^}}invalid_variable_order_lds_atomic_dec_ret_i32:
-; CIVI-DAG: s_mov_b32 m0
-; GFX9-NOT: m0
-define amdgpu_kernel void @invalid_variable_order_lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %order.var) #0 {
-  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 %order.var, i32 0, i1 false)
-  store i32 %result, i32 addrspace(1)* %out
-  ret void
-}
-
-; Make sure no crash on invalid non-constant
-; GCN-LABEL: {{^}}invalid_variable_scope_lds_atomic_dec_ret_i32:
-; CIVI-DAG: s_mov_b32 m0
-; GFX9-NOT: m0
-define amdgpu_kernel void @invalid_variable_scope_lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %scope.var) #0 {
-  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 %scope.var, i1 false)
-  store i32 %result, i32 addrspace(1)* %out
-  ret void
-}
-
-; Make sure no crash on invalid non-constant
-; GCN-LABEL: {{^}}invalid_variable_volatile_lds_atomic_dec_ret_i32:
-define amdgpu_kernel void @invalid_variable_volatile_lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i1 %volatile.var) #0 {
-  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 %volatile.var)
-  store i32 %result, i32 addrspace(1)* %out
-  ret void
-}
-
 ; GCN-LABEL: {{^}}lds_atomic_dec_ret_i32:
 ; CIVI-DAG: s_mov_b32 m0
 ; GFX9-NOT: m0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll
index e36c02715b7..2a5e81a6dd6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll
@@ -395,41 +395,6 @@ define amdgpu_kernel void @test_div_scale_f32_undef_undef_val(float addrspace(1)
   ret void
 }
 
-; Undefined selector gets deleted
-; SI-LABEL: {{^}}test_div_scale_f32_val_undef_undef:
-; SI-NOT: v_div_scale
-define amdgpu_kernel void @test_div_scale_f32_val_undef_undef(float addrspace(1)* %out) #0 {
-  %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 8.0, float undef, i1 undef)
-  %result0 = extractvalue { float, i1 } %result, 0
-  store float %result0, float addrspace(1)* %out, align 4
-  ret void
-}
-
-; SI-LABEL: {{^}}test_div_scale_f32_undef_undef_undef:
-; SI-NOT: v_div_scale
-define amdgpu_kernel void @test_div_scale_f32_undef_undef_undef(float addrspace(1)* %out) #0 {
-  %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float undef, i1 undef)
-  %result0 = extractvalue { float, i1 } %result, 0
-  store float %result0, float addrspace(1)* %out, align 4
-  ret void
-}
-
-; SI-LABEL: {{^}}test_div_scale_f32_val_val_undef:
-; SI-NOT: v_div_scale
-define amdgpu_kernel void @test_div_scale_f32_val_val_undef(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
-
-  %a = load volatile float, float addrspace(1)* %gep.0, align 4
-  %b = load volatile float, float addrspace(1)* %gep.1, align 4
-
-  %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 undef)
-  %result0 = extractvalue { float, i1 } %result, 0
-  store float %result0, float addrspace(1)* %out, align 4
-  ret void
-}
-
 ; SI-LABEL: {{^}}test_div_scale_f64_val_undef_val:
 ; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0{{$}}
 ; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0x40200000
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll
index a6a402b93bf..6ff66b94fdf 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll
@@ -8,14 +8,6 @@ declare float @llvm.fabs.f32(float) #0
 declare i64 @llvm.amdgcn.fcmp.f16(half, half, i32) #0
 declare half @llvm.fabs.f16(half) #0
 
-; GCN-LABEL: {{^}}v_fcmp_f32_dynamic_cc:
-; GCN: s_endpgm
-define amdgpu_kernel void @v_fcmp_f32_dynamic_cc(i64 addrspace(1)* %out, float %src0, float %src1, i32 %cc) {
-  %result = call i64 @llvm.amdgcn.fcmp.f32(float %src0, float %src1, i32 %cc)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
 ; GCN-LABEL: {{^}}v_fcmp_f32_oeq_with_fabs:
 ; GCN: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}}, |{{v[0-9]+}}|
 define amdgpu_kernel void @v_fcmp_f32_oeq_with_fabs(i64 addrspace(1)* %out, float %src, float %a) {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll
index 06ac7da1288..8d5cdf1c270 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll
@@ -6,15 +6,6 @@ declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0
 declare i64 @llvm.amdgcn.icmp.i16(i16, i16, i32) #0
 declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32) #0
 
-; No crash on invalid input
-; GCN-LABEL: {{^}}v_icmp_i32_dynamic_cc:
-; GCN: s_endpgm
-define amdgpu_kernel void @v_icmp_i32_dynamic_cc(i64 addrspace(1)* %out, i32 %src, i32 %cc) {
-  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 %cc)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
 ; GCN-LABEL: {{^}}v_icmp_i32_eq:
 ; GCN: v_cmp_eq_u32_e64
 define amdgpu_kernel void @v_icmp_i32_eq(i64 addrspace(1)* %out, i32 %src) {
@@ -181,15 +172,6 @@ define amdgpu_kernel void @v_icmp_i64_sle(i64 addrspace(1)* %out, i64 %src) {
   ret void
 }
 
-; GCN-LABEL: {{^}}v_icmp_i16_dynamic_cc:
-; GCN: s_endpgm
-define amdgpu_kernel void @v_icmp_i16_dynamic_cc(i64 addrspace(1)* %out, i16 %src, i32 %cc) {
-  %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 %cc)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-; GCN-LABEL: {{^}}v_icmp_i16_eq:
 ; VI: v_cmp_eq_u16_e64
 
 ; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x64
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll
index 51f047d9b64..00da71e422f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll
@@ -67,9 +67,6 @@ declare void @llvm.amdgcn.raw.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i
 declare void @llvm.amdgcn.raw.tbuffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32, i32) #0
 declare void @llvm.amdgcn.raw.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32) #0
 declare void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i1, i1) #1
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
-
-
diff --git a/llvm/test/DebugInfo/MIR/X86/kill-after-spill.mir b/llvm/test/DebugInfo/MIR/X86/kill-after-spill.mir
index 5110dc349be..88ed269c93d 100644
--- a/llvm/test/DebugInfo/MIR/X86/kill-after-spill.mir
+++ b/llvm/test/DebugInfo/MIR/X86/kill-after-spill.mir
@@ -124,9 +124,6 @@
   declare i8* @memset(i8*, i32, i64) local_unnamed_addr
 
   ; Function Attrs: nounwind readnone speculatable
-  declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1) #1
-
-  ; Function Attrs: nounwind readnone speculatable
   declare void @llvm.dbg.value(metadata, metadata, metadata) #1
 
   ; Function Attrs: nounwind
diff --git a/llvm/test/LTO/X86/remangle_intrinsics.ll b/llvm/test/LTO/X86/remangle_intrinsics.ll
index 72ce8d3d812..112cace0403 100644
--- a/llvm/test/LTO/X86/remangle_intrinsics.ll
+++ b/llvm/test/LTO/X86/remangle_intrinsics.ll
@@ -19,6 +19,3 @@ define void @foo(%struct.rtx_def* %a, i8 %b, i32 %c) {
 }
 
 declare void @llvm.memset.p0struct.rtx_def.i32(%struct.rtx_def*, i8, i32, i1)
-
-; Check that remangling code doesn't fail on an intrinsic with wrong signature
-declare void @llvm.memset.i64(i8* nocapture, i8, i64) nounwind
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll
index 166e90ab7b6..4694a1c5f8d 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll
@@ -125,15 +125,6 @@ define i64 @volatile_atomicdec_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i
   ret i64 %ret
 }
 
-; CHECK-LABEL: @invalid_variable_volatile_atomicinc_group_to_flat_i64(
-; CHECK-NEXT: %1 = addrspacecast i64 addrspace(3)* %group.ptr to i64*
-; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %1, i64 %y, i32 0, i32 0, i1 %volatile.var)
-define i64 @invalid_variable_volatile_atomicinc_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y, i1 %volatile.var) #0 {
-  %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64*
-  %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 %volatile.var)
-  ret i64 %ret
-}
-
 declare i32 @llvm.objectsize.i32.p0i8(i8*, i1, i1, i1) #1
 declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1, i1) #1
 declare i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* nocapture, i32, i32, i32, i1) #2
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll
index 6030d14c8e1..e0b8fab411c 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll
@@ -320,229 +320,229 @@ declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i
 ; --------------------------------------------------------------------
 
 ; CHECK-LABEL: @raw_buffer_load_f32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @raw_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @raw_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   ret float %data
 }
 
 ; CHECK-LABEL: @raw_buffer_load_v1f32(
-; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret <1 x float> %data
-define amdgpu_ps <1 x float> @raw_buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <1 x float> @raw_buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   ret <1 x float> %data
 }
 
 ; CHECK-LABEL: @raw_buffer_load_v2f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret <2 x float> %data
-define amdgpu_ps <2 x float> @raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   ret <2 x float> %data
 }
 
 ; CHECK-LABEL: @raw_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret <4 x float> %data
-define amdgpu_ps <4 x float> @raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <4 x float> @raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   ret <4 x float> %data
 }
 
 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_v2f32(
-; CHECK: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt0_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %elt0 = extractelement <2 x float> %data, i32 0
   ret float %elt0
 }
 
 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v2f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
 ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt1_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt1_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %elt1 = extractelement <2 x float> %data, i32 1
   ret float %elt1
 }
 
 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_v4f32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt0_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %elt0 = extractelement <4 x float> %data, i32 0
   ret float %elt0
 }
 
 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
 ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %elt1 = extractelement <4 x float> %data, i32 1
   ret float %elt1
 }
 
 ; CHECK-LABEL: @extract_elt2_raw_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 2
 ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %elt1 = extractelement <4 x float> %data, i32 2
   ret float %elt1
 }
 
 ; CHECK-LABEL: @extract_elt3_raw_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3
 ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %elt1 = extractelement <4 x float> %data, i32 3
   ret float %elt1
 }
 
 ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret <2 x float>
-define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT: ret <2 x float> %shuf
-define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt2_elt3_raw_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT: ret <2 x float> %shuf
-define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
 ; CHECK-NEXT: ret <3 x float> %shuf
-define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
   ret <3 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
 ; CHECK-NEXT: ret <3 x float> %shuf
-define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
   ret <3 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
 ; CHECK-NEXT: ret <3 x float> %shuf
-define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
   ret <3 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_v3f32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt0_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %elt0 = extractelement <3 x float> %data, i32 0
   ret float %elt0
 }
 
 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v3f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
 ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %elt1 = extractelement <3 x float> %data, i32 1
   ret float %elt1
 }
 
 ; CHECK-LABEL: @extract_elt2_raw_buffer_load_v3f32(
-; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2
 ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %elt1 = extractelement <3 x float> %data, i32 2
   ret float %elt1
 }
 
 ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v3f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret <2 x float>
-define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_v3f32(
-; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT: ret <2 x float> %shuf
-define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_v4f32(
-; CHECK-NEXT: %tmp = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %tmp = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32
 ; CHECK-NEXT: ret i32 %tmp2
-define i32 @extract0_bitcast_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %tmp = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define i32 @extract0_bitcast_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %tmp = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
   %tmp2 = extractelement <4 x i32> %tmp1, i32 0
   ret i32 %tmp2
 }
 
 ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_v4i32(
-; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %tmp2 = bitcast i32 %tmp to float
 ; CHECK-NEXT: ret float %tmp2
-define float @extract0_bitcast_raw_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %tmp = call <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define float @extract0_bitcast_raw_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %tmp = call <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %tmp1 = bitcast <4 x i32> %tmp to <4 x float>
   %tmp2 = extractelement <4 x float> %tmp1, i32 0
   ret float %tmp2
 }
 
 ; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_buffer_load_v2f32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0
+; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
 ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @preserve_metadata_extract_elt0_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0
+define amdgpu_ps float @preserve_metadata_extract_elt0_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
   %elt0 = extractelement <2 x float> %data, i32 0
   ret float %elt0
 }
@@ -560,229 +560,229 @@ declare <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32>, i32, i32, i32) #
 ; --------------------------------------------------------------------
 
 ; CHECK-LABEL: @raw_buffer_load_format_f32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @raw_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @raw_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   ret float %data
 }
 
 ; CHECK-LABEL: @raw_buffer_load_format_v1f32(
-; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret <1 x float> %data
-define amdgpu_ps <1 x float> @raw_buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <1 x float> @raw_buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   ret <1 x float> %data
 }
 
 ; CHECK-LABEL: @raw_buffer_load_format_v2f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret <2 x float> %data
-define amdgpu_ps <2 x float> @raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   ret <2 x float> %data
 }
 
 ; CHECK-LABEL: @raw_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret <4 x float> %data
-define amdgpu_ps <4 x float> @raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <4 x float> @raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   ret <4 x float> %data
 }
 
 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v2f32(
-; CHECK: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %elt0 = extractelement <2 x float> %data, i32 0
   ret float %elt0
 }
 
 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v2f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
 ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %elt1 = extractelement <2 x float> %data, i32 1
   ret float %elt1
 }
 
 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %elt0 = extractelement <4 x float> %data, i32 0
   ret float %elt0
 }
 
 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
 ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %elt1 = extractelement <4 x float> %data, i32 1
   ret float %elt1
 }
 
 ; CHECK-LABEL: @extract_elt2_raw_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 2
 ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %elt1 = extractelement <4 x float> %data, i32 2
   ret float %elt1
 }
 
 ; CHECK-LABEL: @extract_elt3_raw_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3
 ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %elt1 = extractelement <4 x float> %data, i32 3
   ret float %elt1
 }
 
 ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret <2 x float>
-define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT: ret <2 x float> %shuf
-define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt2_elt3_raw_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT: ret <2 x float> %shuf
-define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
 ; CHECK-NEXT: ret <3 x float> %shuf
-define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
   ret <3 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
 ; CHECK-NEXT: ret <3 x float> %shuf
-define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
   ret <3 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
 ; CHECK-NEXT: ret <3 x float> %shuf
-define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
   ret <3 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v3f32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %elt0 = extractelement <3 x float> %data, i32 0
   ret float %elt0
 }
 
 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v3f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
 ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %elt1 = extractelement <3 x float> %data, i32 1
   ret float %elt1
 }
 
 ; CHECK-LABEL: @extract_elt2_raw_buffer_load_format_v3f32(
-; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2
 ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %elt1 = extractelement <3 x float> %data, i32 2
   ret float %elt1
 }
 
 ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_format_v3f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret <2 x float>
-define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_format_v3f32(
-; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT: ret <2 x float> %shuf
-define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_format_v4f32(
-; CHECK-NEXT: %tmp = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %tmp = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32
 ; CHECK-NEXT: ret i32 %tmp2
-define i32 @extract0_bitcast_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %tmp = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define i32 @extract0_bitcast_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %tmp = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
   %tmp2 = extractelement <4 x i32> %tmp1, i32 0
   ret i32 %tmp2
 }
 
 ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_format_v4i32(
-; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.raw.buffer.load.format.i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.raw.buffer.load.format.i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %tmp2 = bitcast i32 %tmp to float
 ; CHECK-NEXT: ret float %tmp2
-define float @extract0_bitcast_raw_buffer_load_format_v4i32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %tmp = call <4 x i32> @llvm.amdgcn.raw.buffer.load.format.v4i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define float @extract0_bitcast_raw_buffer_load_format_v4i32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %tmp = call <4 x i32> @llvm.amdgcn.raw.buffer.load.format.v4i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
   %tmp1 = bitcast <4 x i32> %tmp to <4 x float>
   %tmp2 = extractelement <4 x float> %tmp1, i32 0
   ret float %tmp2
 }
 
 ; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_buffer_load_format_v2f32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0
+; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
 ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @preserve_metadata_extract_elt0_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0
+define amdgpu_ps float @preserve_metadata_extract_elt0_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
   %elt0 = extractelement <2 x float> %data, i32 0
   ret float %elt0
 }
@@ -800,229 +800,229 @@ declare <4 x i32> @llvm.amdgcn.raw.buffer.load.format.v4i32(<4 x i32>, i32, i32,
 ; --------------------------------------------------------------------
 
 ; CHECK-LABEL: @struct_buffer_load_f32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @struct_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @struct_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   ret float %data
 }
 
 ; CHECK-LABEL: @struct_buffer_load_v1f32(
-; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret <1 x float> %data
-define amdgpu_ps <1 x float> @struct_buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <1 x float> @struct_buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   ret <1 x float> %data
 }
 
 ; CHECK-LABEL: @struct_buffer_load_v2f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret <2 x float> %data
-define amdgpu_ps <2 x float> @struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   ret <2 x float> %data
 }
 
 ; CHECK-LABEL: @struct_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret <4 x float> %data
-define amdgpu_ps <4 x float> @struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <4 x float> @struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   ret <4 x float> %data
 }
 
 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_v2f32(
-; CHECK: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt0_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %elt0 = extractelement <2 x float> %data, i32 0
   ret float %elt0
 }
 
 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v2f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
 ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt1_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt1_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %elt1 = extractelement <2 x float> %data, i32 1
   ret float %elt1
 }
 
 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_v4f32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt0_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %elt0 = extractelement <4 x float> %data, i32 0
   ret float %elt0
 }
 
 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
 ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %elt1 = extractelement <4 x float> %data, i32 1
   ret float %elt1
 }
 
 ; CHECK-LABEL: @extract_elt2_struct_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 2
 ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %elt1 = extractelement <4 x float> %data, i32 2
   ret float %elt1
 }
 
 ; CHECK-LABEL: @extract_elt3_struct_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3
 ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %elt1 = extractelement <4 x float> %data, i32 3
   ret float %elt1
 }
 
 ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret <2 x float>
-define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT: ret <2 x float> %shuf
-define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt2_elt3_struct_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT: ret <2 x float> %shuf
-define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
 ; CHECK-NEXT: ret <3 x float> %shuf
-define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
   ret <3 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
 ; CHECK-NEXT: ret <3 x float> %shuf
-define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
   ret <3 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
 ; CHECK-NEXT: ret <3 x float> %shuf
-define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
   ret <3 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_v3f32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt0_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %elt0 = extractelement <3 x float> %data, i32 0
   ret float %elt0
 }
 
 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v3f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
 ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %elt1 = extractelement <3 x float> %data, i32 1
   ret float %elt1
 }
 
 ; CHECK-LABEL: @extract_elt2_struct_buffer_load_v3f32(
-; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2
 ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %elt1 = extractelement <3 x float> %data, i32 2
   ret float %elt1
 }
 
 ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v3f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret <2 x float>
-define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_v3f32(
-; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT: ret <2 x float> %shuf
-define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_v4f32(
-; CHECK-NEXT: %tmp = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %tmp = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32
 ; CHECK-NEXT: ret i32 %tmp2
-define i32 @extract0_bitcast_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %tmp = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define i32 @extract0_bitcast_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %tmp = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
   %tmp2 = extractelement <4 x i32> %tmp1, i32 0
   ret i32 %tmp2
 }
 
 ; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_v4i32(
-; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %tmp2 = bitcast i32 %tmp to float
 ; CHECK-NEXT: ret float %tmp2
-define float @extract0_bitcast_struct_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %tmp = call <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define float @extract0_bitcast_struct_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %tmp = call <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %tmp1 = bitcast <4 x i32> %tmp to <4 x float>
   %tmp2 = extractelement <4 x float> %tmp1, i32 0
   ret float %tmp2
 }
 
 ; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_buffer_load_v2f32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0
+; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
 ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @preserve_metadata_extract_elt0_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0
+define amdgpu_ps float @preserve_metadata_extract_elt0_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
   %elt0 = extractelement <2 x float> %data, i32 0
   ret float %elt0
 }
@@ -1040,229 +1040,229 @@ declare <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32>, i32, i32, i32
 ; --------------------------------------------------------------------
 
 ; CHECK-LABEL: @struct_buffer_load_format_f32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @struct_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @struct_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   ret float %data
 }
 
 ; CHECK-LABEL: @struct_buffer_load_format_v1f32(
-; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret <1 x float> %data
-define amdgpu_ps <1 x float> @struct_buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <1 x float> @struct_buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   ret <1 x float> %data
 }
 
 ; CHECK-LABEL: @struct_buffer_load_format_v2f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret <2 x float> %data
-define amdgpu_ps <2 x float> @struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   ret <2 x float> %data
 }
 
 ; CHECK-LABEL: @struct_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret <4 x float> %data
-define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   ret <4 x float> %data
 }
 
 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v2f32(
-; CHECK: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %elt0 = extractelement <2 x float> %data, i32 0
   ret float %elt0
 }
 
 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v2f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
 ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %elt1 = extractelement <2 x float> %data, i32 1
   ret float %elt1
 }
 
 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %elt0 = extractelement <4 x float> %data, i32 0
   ret float %elt0
 }
 
 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
 ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %elt1 = extractelement <4 x float> %data, i32 1
   ret float %elt1
 }
 
 ; CHECK-LABEL: @extract_elt2_struct_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 2
 ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %elt1 = extractelement <4 x float> %data, i32 2
   ret float %elt1
 }
 
 ; CHECK-LABEL: @extract_elt3_struct_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3
 ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %elt1 = extractelement <4 x float> %data, i32 3
   ret float %elt1
 }
 
 ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret <2 x float>
-define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT: ret <2 x float> %shuf
-define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt2_elt3_struct_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT: ret <2 x float> %shuf
-define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
 ; CHECK-NEXT: ret <3 x float> %shuf
-define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
   ret <3 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
 ; CHECK-NEXT: ret <3 x float> %shuf
-define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
   ret <3 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
 ; CHECK-NEXT: ret <3 x float> %shuf
-define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
   ret <3 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v3f32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %elt0 = extractelement <3 x float> %data, i32 0
   ret float %elt0
 }
 
 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v3f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
 ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %elt1 = extractelement <3 x float> %data, i32 1
   ret float %elt1
 }
 
 ; CHECK-LABEL: @extract_elt2_struct_buffer_load_format_v3f32(
-; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2
 ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %elt1 = extractelement <3 x float> %data, i32 2
   ret float %elt1
 }
 
 ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_format_v3f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: ret <2 x float>
-define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_format_v3f32(
-; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT: ret <2 x float> %shuf
-define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_format_v4f32(
-; CHECK-NEXT: %tmp = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %tmp = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32
 ; CHECK-NEXT: ret i32 %tmp2
-define i32 @extract0_bitcast_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %tmp = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define i32 @extract0_bitcast_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %tmp = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
   %tmp2 = extractelement <4 x i32> %tmp1, i32 0
   ret i32 %tmp2
 }
 
 ; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_format_v4i32(
-; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
 ; CHECK-NEXT: %tmp2 = bitcast i32 %tmp to float
 ; CHECK-NEXT: ret float %tmp2
-define float @extract0_bitcast_struct_buffer_load_format_v4i32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %tmp = call <4 x i32> @llvm.amdgcn.struct.buffer.load.format.v4i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define float @extract0_bitcast_struct_buffer_load_format_v4i32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %tmp = call <4 x i32> @llvm.amdgcn.struct.buffer.load.format.v4i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
   %tmp1 = bitcast <4 x i32> %tmp to <4 x float>
   %tmp2 = extractelement <4 x float> %tmp1, i32 0
   ret float %tmp2
 }
 
 ; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_buffer_load_format_v2f32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0
+; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
 ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @preserve_metadata_extract_elt0_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0
+define amdgpu_ps float @preserve_metadata_extract_elt0_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
   %elt0 = extractelement <2 x float> %data, i32 0
   ret float %elt0
 }
@@ -1319,16 +1319,6 @@ define amdgpu_ps float @extract_elt0_image_sample_2d_v4f32_f32(float %s, float %
   ret float %elt0
 }
 
-; CHECK-LABEL: @extract_elt0_invalid_dmask_image_sample_1d_v4f32_f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 %dmask, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
-; CHECK-NEXT: %elt0 = extractelement <4 x float> %data, i32 0
-; CHECK-NEXT: ret float %elt0
-define amdgpu_ps float @extract_elt0_invalid_dmask_image_sample_1d_v4f32_f32(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc, i32 %dmask) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 %dmask, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
-  %elt0 = extractelement <4 x float> %data, i32 0
-  ret float %elt0
-}
-
 ; CHECK-LABEL: @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32(
 ; CHECK-NEXT: ret float undef
 define amdgpu_ps float @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32(float %s, float %t, float %r, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
@@ -2395,14 +2385,7 @@ define amdgpu_ps float @extract_elt0_image_getresinfo_1d_v4f32_i32(i32 %mip, <8
   ret float %elt0
 }
 
-; Verify that we don't creash on non-constant operand.
-define protected <4 x half> @__llvm_amdgcn_image_sample_d_1darray_v4f16_f32_f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 zeroext, i32, i32) local_unnamed_addr {
-  %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.d.1darray.v4f16.f32.f32(i32 %0, float %1, float %2, float %3, float %4, <8 x i32> %5, <4 x i32> %6, i1 zeroext %7, i32 %8, i32 %9) #1
-  ret <4 x half> %tmp
-}
-
 declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x half> @llvm.amdgcn.image.sample.d.1darray.v4f16.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
 
 ; --------------------------------------------------------------------
 ; TFE / LWE
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index b8e19e2bc94..a065d10946d 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -1078,17 +1078,7 @@ define i64 @sbfe_offset_32_width_32_i64(i64 %src) {
 ; llvm.amdgcn.exp
 ; --------------------------------------------------------------------
 
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind inaccessiblememonly
-
-; Make sure no crashing on invalid variable params
-; CHECK-LABEL: @exp_invalid_inputs(
-; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 %en, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 true, i1 false)
-; CHECK: call void @llvm.amdgcn.exp.f32(i32 %tgt, i32 15, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 true, i1 false)
-define void @exp_invalid_inputs(i32 %tgt, i32 %en) {
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 %en, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
-  call void @llvm.amdgcn.exp.f32(i32 %tgt, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
-  ret void
-}
+declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) nounwind inaccessiblememonly
 
 ; CHECK-LABEL: @exp_disabled_inputs_to_undef(
 ; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.000000e+00, float undef, float undef, float undef, i1 true, i1 false)
@@ -1136,16 +1126,7 @@ define void @exp_disabled_inputs_to_undef(float %x, float %y, float %z, float %w
 ; llvm.amdgcn.exp.compr
 ; --------------------------------------------------------------------
 
-declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) nounwind inaccessiblememonly
-
-; CHECK-LABEL: @exp_compr_invalid_inputs(
-; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 %en, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> <half 0xH3800, half 0xH4400>, i1 true, i1 false)
-; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 %tgt, i32 5, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> <half 0xH3800, half 0xH4400>, i1 true, i1 false)
-define void @exp_compr_invalid_inputs(i32 %tgt, i32 %en) {
-  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 %en, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
-  call void @llvm.amdgcn.exp.compr.v2f16(i32 %tgt, i32 5, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
-  ret void
-}
+declare void @llvm.amdgcn.exp.compr.v2f16(i32 immarg, i32 immarg, <2 x half>, <2 x half>, i1 immarg, i1 immarg) nounwind inaccessiblememonly
 
 ; CHECK-LABEL: @exp_compr_disabled_inputs_to_undef(
 ; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> undef, <2 x half> undef, i1 true, i1 false)
@@ -1404,17 +1385,9 @@ define float @fmed3_0_1_undef_f32() {
 ; llvm.amdgcn.icmp
 ; --------------------------------------------------------------------
 
-declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) nounwind readnone convergent
-declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) nounwind readnone convergent
-declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32) nounwind readnone convergent
-
-; Make sure there's no crash for invalid input
-; CHECK-LABEL: @invalid_nonconstant_icmp_code(
-; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 %c)
-define i64 @invalid_nonconstant_icmp_code(i32 %a, i32 %b, i32 %c) {
-  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 %c)
-  ret i64 %result
-}
+declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32 immarg) nounwind readnone convergent
+declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32 immarg) nounwind readnone convergent
+declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32 immarg) nounwind readnone convergent
 
 ; CHECK-LABEL: @invalid_icmp_code(
 ; CHECK: %under = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 31)
@@ -2012,15 +1985,7 @@ define i64 @fold_icmp_i1_ne_0_icmp_ult_i16(i16 %a, i16 %b) {
 ; llvm.amdgcn.fcmp
 ; --------------------------------------------------------------------
 
-declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32) nounwind readnone convergent
-
-; Make sure there's no crash for invalid input
-; CHECK-LABEL: @invalid_nonconstant_fcmp_code(
-; CHECK: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 %c)
-define i64 @invalid_nonconstant_fcmp_code(float %a, float %b, i32 %c) {
-  %result = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 %c)
-  ret i64 %result
-}
+declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32 immarg) nounwind readnone convergent
 
 ; CHECK-LABEL: @invalid_fcmp_code(
 ; CHECK: %under = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 -1)
diff --git a/llvm/test/Transforms/LowerExpectIntrinsic/PR33346.ll b/llvm/test/Transforms/LowerExpectIntrinsic/PR33346.ll
index ca962fbdc8f..5bcf93408b1 100644
--- a/llvm/test/Transforms/LowerExpectIntrinsic/PR33346.ll
+++ b/llvm/test/Transforms/LowerExpectIntrinsic/PR33346.ll
@@ -7,12 +7,12 @@ bb:
   store i64 %arg, i64* %tmp, align 8
   %tmp1 = load i64, i64* %tmp, align 8
   %tmp2 = load i64, i64* %tmp, align 8
-  %tmp3 = call i64 @llvm.expect.i64(i64 %tmp1, i64 %tmp2)
+  %tmp3 = call i64 @llvm.expect.i64(i64 %tmp1, i64 123)
   ret i64 %tmp3
 }
 
 ; Function Attrs: nounwind readnone
-declare i64 @llvm.expect.i64(i64, i64)
+declare i64 @llvm.expect.i64(i64, i64 immarg)
 
 
 !llvm.module.flags = !{!0}
diff --git a/llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll b/llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll
new file mode 100644
index 00000000000..5b0e5b3aad7
--- /dev/null
+++ b/llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll
@@ -0,0 +1,552 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1)
+define void @buffer_load_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i1 %bool) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: %data0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 %bool, i1 false)
+  %data0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 %bool, i1 false)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: %data1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 %bool)
+  %data1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 %bool)
+  ret void
+}
+
+declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32)
+define void @raw_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %arg) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg
+  ; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %arg)
+  %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %arg)
+  ret void
+}
+
+declare float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32>, i32, i32, i32)
+define void @raw_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %arg) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg
+  ; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %arg)
+  %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %arg)
+  ret void
+}
+
+declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32)
+define void @struct_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %arg) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg
+  ; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %arg)
+  %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %arg)
+  ret void
+}
+
+declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32)
+define void @struct_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %arg) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg
+  ; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %arg)
+  %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %arg)
+  ret void
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32)
+define void @invalid_image_sample_1d_v4f32_f32(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc, i32 %dmask, i1 %bool, i32 %arg) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %dmask
+  ; CHECK-NEXT: %data0 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 %dmask, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %data0 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 %dmask, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: %data1 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 %bool, i32 0, i32 0)
+  %data1 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 %bool, i32 0, i32 0)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg
+  ; CHECK-NEXT:   %data2 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 %arg, i32 0)
+  %data2 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 %arg, i32 0)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg
+  ; CHECK-NEXT:   %data3 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 %arg)
+  %data3 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 %arg)
+  ret void
+}
+
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1)
+define void @exp_invalid_inputs(i32 %tgt, i32 %en, i1 %bool) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %en
+  ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 %en, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 %en, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %tgt
+  ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 %tgt, i32 15, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.f32(i32 %tgt, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 %bool, i1 false)
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 %bool, i1 false)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 false, i1 %bool)
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 %bool)
+  ret void
+}
+
+declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1)
+
+define void @exp_compr_invalid_inputs(i32 %tgt, i32 %en, i1 %bool) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %en
+  ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 %en, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> <half 0xH3800, half 0xH4400>, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 %en, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %tgt
+  ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 %tgt, i32 5, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> <half 0xH3800, half 0xH4400>, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 %tgt, i32 5, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 5, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> <half 0xH3800, half 0xH4400>, i1 %bool, i1 false)
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 5, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 %bool, i1 false)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 5, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> <half 0xH3800, half 0xH4400>, i1 false, i1 %bool)
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 5, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 false, i1 %bool)
+  ret void
+}
+
+declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32)
+
+define i64 @invalid_nonconstant_icmp_code(i32 %a, i32 %b, i32 %c) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %c
+  ; CHECK-NEXT: %result = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 %c)
+  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 %c)
+  ret i64 %result
+}
+
+declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32)
+define i64 @invalid_nonconstant_fcmp_code(float %a, float %b, i32 %c) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %c
+  ; CHECK-NEXT: %result = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 %c)
+  %result = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 %c)
+  ret i64 %result
+}
+
+declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1)
+define amdgpu_kernel void @invalid_atomic_inc(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %var, i1 %bool) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 %var, i32 0, i1 false)
+  %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 %var, i32 0, i1 false)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 %var, i1 false)
+  %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 %var, i1 false)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: %result2 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 %bool)
+  %result2 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 %bool)
+  ret void
+}
+
+declare i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1)
+define amdgpu_kernel void @invalid_atomic_dec(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %var, i1 %bool) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %result0 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 %var, i32 0, i1 false)
+  %result0 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 %var, i32 0, i1 false)
+
+   ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %result1 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 %var, i1 false)
+  %result1 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 %var, i1 false)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: %result2 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 %bool)
+  %result2 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 %bool)
+  ret void
+}
+
+declare { float, i1 } @llvm.amdgcn.div.scale.f32(float, float, i1)
+define amdgpu_kernel void @test_div_scale_f32_val_undef_undef(float addrspace(1)* %out) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK: i1 undef
+  ; CHECK: %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 8.000000e+00, float undef, i1 undef)
+  %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 8.0, float undef, i1 undef)
+  %result0 = extractvalue { float, i1 } %result, 0
+  store float %result0, float addrspace(1)* %out, align 4
+  ret void
+}
+
+declare void @llvm.amdgcn.init.exec(i64)
+define amdgpu_ps void @init_exec(i64 %var) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i64 %var
+  ; CHECK-NEXT: call void @llvm.amdgcn.init.exec(i64 %var)
+  call void @llvm.amdgcn.init.exec(i64 %var)
+  ret void
+}
+
+declare i32 @llvm.amdgcn.s.sendmsg(i32, i32)
+define void @sendmsg(i32 %arg0, i32 %arg1) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg0
+  ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.s.sendmsg(i32 %arg0, i32 %arg1)
+  %val = call i32 @llvm.amdgcn.s.sendmsg(i32 %arg0, i32 %arg1)
+  ret void
+}
+
+declare i32 @llvm.amdgcn.s.sendmsghalt(i32, i32)
+define void @sendmsghalt(i32 %arg0, i32 %arg1) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg0
+  ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.s.sendmsghalt(i32 %arg0, i32 %arg1)
+  %val = call i32 @llvm.amdgcn.s.sendmsghalt(i32 %arg0, i32 %arg1)
+  ret void
+}
+
+declare i32 @llvm.amdgcn.s.waitcnt(i32)
+define void @waitcnt(i32 %arg0) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg0
+  ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.s.waitcnt(i32 %arg0)
+  %val = call i32 @llvm.amdgcn.s.waitcnt(i32 %arg0)
+  ret void
+}
+
+declare i32 @llvm.amdgcn.s.getreg(i32)
+define void @getreg(i32 %arg0, i32 %arg1) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg0
+  ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.s.getreg(i32 %arg0)
+  %val = call i32 @llvm.amdgcn.s.getreg(i32 %arg0)
+  ret void
+}
+
+declare i32 @llvm.amdgcn.s.sleep(i32)
+define void @sleep(i32 %arg0, i32 %arg1) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg0
+  ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.s.sleep(i32 %arg0)
+  %val = call i32 @llvm.amdgcn.s.sleep(i32 %arg0)
+  ret void
+}
+
+declare i32 @llvm.amdgcn.s.incperflevel(i32)
+define void @incperflevel(i32 %arg0, i32 %arg1) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg0
+  ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.s.incperflevel(i32 %arg0)
+  %val = call i32 @llvm.amdgcn.s.incperflevel(i32 %arg0)
+  ret void
+}
+
+declare i32 @llvm.amdgcn.s.decperflevel(i32)
+define void @decperflevel(i32 %arg0, i32 %arg1) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg0
+  ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.s.decperflevel(i32 %arg0)
+  %val = call i32 @llvm.amdgcn.s.decperflevel(i32 %arg0)
+  ret void
+}
+
+declare i32 @llvm.amdgcn.ds.swizzle(i32, i32)
+define void @ds_swizzle(i32 %arg0, i32 %arg1) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg1
+  ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.ds.swizzle(i32 %arg0, i32 %arg1)
+  %val = call i32 @llvm.amdgcn.ds.swizzle(i32 %arg0, i32 %arg1)
+  ret void
+}
+
+declare i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* nocapture, i32, i32, i32, i1, i32, i1, i1)
+define amdgpu_kernel void @ds_ordered_add(i32 addrspace(2)* %gds, i32 addrspace(1)* %out, i32 %var, i1 %bool) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val0 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 %var, i32 0, i1 false, i32 1, i1 true, i1 true)
+  %val0 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 %var, i32 0, i1 false, i32 1, i1 true, i1 true)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val1 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 %var, i1 false, i32 1, i1 true, i1 true)
+  %val1 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 %var, i1 false, i32 1, i1 true, i1 true)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: %val2 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 %bool, i32 1, i1 true, i1 true)
+  %val2 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 %bool, i32 1, i1 true, i1 true)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val3 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 %var, i1 true, i1 true)
+  %val3 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 %var, i1 true, i1 true)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: %val4 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 %bool, i1 true)
+  %val4 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 %bool, i1 true)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: %val5 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 %bool)
+  %val5 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 %bool)
+  ret void
+}
+
+declare i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* nocapture, i32, i32, i32, i1, i32, i1, i1)
+define amdgpu_kernel void @ds_ordered_swap(i32 addrspace(2)* %gds, i32 addrspace(1)* %out, i32 %var, i1 %bool) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val0 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 %var, i32 0, i1 false, i32 1, i1 true, i1 true)
+  %val0 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 %var, i32 0, i1 false, i32 1, i1 true, i1 true)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val1 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 %var, i1 false, i32 1, i1 true, i1 true)
+  %val1 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 %var, i1 false, i32 1, i1 true, i1 true)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: %val2 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 %bool, i32 1, i1 true, i1 true)
+  %val2 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 %bool, i32 1, i1 true, i1 true)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val3 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 %var, i1 true, i1 true)
+  %val3 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 %var, i1 true, i1 true)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: %val4 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 %bool, i1 true)
+  %val4 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 %bool, i1 true)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: %val5 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 %bool)
+  %val5 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 %bool)
+  ret void
+}
+
+declare i32 @llvm.amdgcn.mov.dpp.i32(i32, i32, i32, i32, i1)
+define amdgpu_kernel void @mov_dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %var, i1 %bool) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in1, i32 %var, i32 1, i32 1, i1 true)
+  %val0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in1, i32 %var, i32 1, i32 1, i1 1)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val1 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in1, i32 1, i32 %var, i32 1, i1 true)
+  %val1 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in1, i32 1, i32 %var, i32 1, i1 1)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val2 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in1, i32 1, i32 1, i32 %var, i1 true)
+  %val2 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in1, i32 1, i32 1, i32 %var, i1 1)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: %val3 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in1, i32 1, i32 1, i32 1, i1 %bool)
+  %val3 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in1, i32 1, i32 1, i32 1, i1 %bool)
+  ret void
+}
+
+declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1)
+define amdgpu_kernel void @update_dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %in2, i32 %var, i1 %bool) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 %var, i32 1, i32 1, i1 true)
+  %val0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 %var, i32 1, i32 1, i1 1)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val1 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 %var, i32 1, i1 true)
+  %val1 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 %var, i32 1, i1 1)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val2 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 %var, i1 true)
+  %val2 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 %var, i1 1)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: %val3 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 %bool)
+  %val3 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 %bool)
+  ret void
+}
+
+declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32)
+define amdgpu_ps void @load_1d(<8 x i32> inreg %rsrc, i32 %s, i32 %var) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val0 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 %var, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  %val0 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 %var, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 %var, i32 0)
+  %val1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 %var, i32 0)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val2 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 %var)
+  %val2 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 %var)
+  ret void
+}
+
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32, i32, <8 x i32>, i32, i32)
+define amdgpu_ps void @load_1d_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %val) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %val
+  ; CHECK-NEXT: %val0 = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 %val, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
+  %val0 = call {<4 x float>, i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 %val, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %val
+  ; CHECK-NEXT: %val1 = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0)
+  %val1 = call {<4 x float>, i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %val
+  ; CHECK-NEXT: %val2 = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 1, i32 %val)
+  %val2 = call {<4 x float>, i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 1, i32 %val)
+  ret void
+}
+
+declare {<4 x float>, i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32)
+define amdgpu_ps void @sample_1d_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s, i32 %var, i1 %bool) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val0 = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 %var, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 0)
+  %val0 = call {<4 x float>, i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 %var, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 0)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: %val1 = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 16, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 %bool, i32 1, i32 0)
+  %val1 = call {<4 x float>, i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 16, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 %bool, i32 1, i32 0)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val2 = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 16, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 %var, i32 0)
+  %val2 = call {<4 x float>, i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 16, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 %var, i32 0)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val3 = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 %var, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 %var)
+  %val3 = call {<4 x float>, i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 %var, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 %var)
+  ret void
+}
+
+declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32, i16, <8 x i32>, i32, i32)
+define amdgpu_ps void @load_1d_a16(<8 x i32> inreg %rsrc, <2 x i16> %coords, i16 %s, i32 %var) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val0 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 %var, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  %val0 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 %var, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 %var, i32 0)
+  %val1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 %var, i32 0)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val2 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 %var)
+  %val2 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 %var)
+  ret void
+}
+
+declare i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i32)
+define amdgpu_ps void @raw_buffer_atomic_swap(<4 x i32> inreg %rsrc, i32 %data, i32 %var) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val2 = call i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 %var)
+  %val2 = call i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 %var)
+  ret void
+}
+
+declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32, i32, <8 x i32>, i32, i32)
+define amdgpu_ps void @atomic_swap_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %val) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %val
+  ; CHECK-NEXT: %val0 = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0)
+  %val0 = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %val
+  ; CHECK-NEXT: %val1 = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 %val)
+  %val1 = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 %val)
+  ret void
+}
+
+declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0
+define amdgpu_ps void @atomic_cmpswap_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i32 %s, i32 %val) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %val
+  ; CHECK-NEXT: %val0 = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0)
+  %val0 = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %val
+  ; CHECK-NEXT: %val1 = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 %val)
+  %val1 = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 %val)
+  ret void
+}
+
+declare float @llvm.amdgcn.fdot2(<2 x half>, <2 x half>, float, i1)
+define float @test_fdot2(<2 x half> %arg0, <2 x half> %arg1, float %arg2, i1 %arg3) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %arg3
+  ; CHECK-NEXT: %val = call float @llvm.amdgcn.fdot2(<2 x half> %arg0, <2 x half> %arg1, float %arg2, i1 %arg3)
+  %val = call float @llvm.amdgcn.fdot2(<2 x half> %arg0, <2 x half> %arg1, float %arg2, i1 %arg3)
+  ret float %val
+}
+
+declare i32 @llvm.amdgcn.sdot2(<2 x i16>, <2 x i16>, i32, i1)
+define i32 @test_sdot2(<2 x i16> %arg0, <2 x i16> %arg1, i32 %arg2, i1 %arg3) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %arg3
+  ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.sdot2(<2 x i16> %arg0, <2 x i16> %arg1, i32 %arg2, i1 %arg3)
+  %val = call i32 @llvm.amdgcn.sdot2(<2 x i16> %arg0, <2 x i16> %arg1, i32 %arg2, i1 %arg3)
+  ret i32 %val
+}
+
+declare i32 @llvm.amdgcn.udot2(<2 x i16>, <2 x i16>, i32, i1)
+define i32 @test_udot2(<2 x i16> %arg0, <2 x i16> %arg1, i32 %arg2, i1 %arg3) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %arg3
+  ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.udot2(<2 x i16> %arg0, <2 x i16> %arg1, i32 %arg2, i1 %arg3)
+  %val = call i32 @llvm.amdgcn.udot2(<2 x i16> %arg0, <2 x i16> %arg1, i32 %arg2, i1 %arg3)
+  ret i32 %val
+}
+
+declare i32 @llvm.amdgcn.sdot4(i32, i32, i32, i1)
+define i32 @test_sdot4(i32 %arg0, i32 %arg1, i32 %arg2, i1 %arg3) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %arg3
+  ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.sdot4(i32 %arg0, i32 %arg1, i32 %arg2, i1 %arg3)
+  %val = call i32 @llvm.amdgcn.sdot4(i32 %arg0, i32 %arg1, i32 %arg2, i1 %arg3)
+  ret i32 %val
+}
+
+declare i32 @llvm.amdgcn.udot4(i32, i32, i32, i1)
+define i32 @test_udot4(i32 %arg0, i32 %arg1, i32 %arg2, i1 %arg3) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %arg3
+  ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.udot4(i32 %arg0, i32 %arg1, i32 %arg2, i1 %arg3)
+  %val = call i32 @llvm.amdgcn.udot4(i32 %arg0, i32 %arg1, i32 %arg2, i1 %arg3)
+  ret i32 %val
+}
diff --git a/llvm/test/Verifier/AMDGPU/lit.local.cfg b/llvm/test/Verifier/AMDGPU/lit.local.cfg
new file mode 100644
index 00000000000..2a665f06be7
--- /dev/null
+++ b/llvm/test/Verifier/AMDGPU/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'AMDGPU' in config.root.targets:
+    config.unsupported = True
diff --git a/llvm/test/Verifier/cttz-undef-arg.ll b/llvm/test/Verifier/cttz-undef-arg.ll
index 66c5396443a..8c982e0924b 100644
--- a/llvm/test/Verifier/cttz-undef-arg.ll
+++ b/llvm/test/Verifier/cttz-undef-arg.ll
@@ -5,11 +5,13 @@ declare i32 @llvm.cttz.i32(i32, i1)
 
 define void @f(i32 %x, i1 %is_not_zero) {
 entry:
-; CHECK: is_zero_undef argument of bit counting intrinsics must be a constant int
+; CHECK: immarg operand has non-immediate parameter
+; CHECK-NEXT: i1 %is_not_zero
 ; CHECK-NEXT: @llvm.ctlz.i32
   call i32 @llvm.ctlz.i32(i32 %x, i1 %is_not_zero)
 
-; CHECK: is_zero_undef argument of bit counting intrinsics must be a constant int
+; CHECK: immarg operand has non-immediate parameter
+; CHECK-NEXT: i1 %is_not_zero
 ; CHECK-NEXT: @llvm.cttz.i32
   call i32 @llvm.cttz.i32(i32 %x, i1 %is_not_zero)
   ret void
diff --git a/llvm/test/Verifier/element-wise-atomic-memory-intrinsics.ll b/llvm/test/Verifier/element-wise-atomic-memory-intrinsics.ll
index 81c8ba16b97..f22f8eecbcc 100644
--- a/llvm/test/Verifier/element-wise-atomic-memory-intrinsics.ll
+++ b/llvm/test/Verifier/element-wise-atomic-memory-intrinsics.ll
@@ -1,8 +1,11 @@
 ; RUN: not opt -verify < %s 2>&1 | FileCheck %s
 
 define void @test_memcpy(i8* %P, i8* %Q, i32 %A, i32 %E) {
-  ; CHECK: element size of the element-wise unordered atomic memory intrinsic must be a constant int
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK: i32 %E
+  ; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1, i32 %E)
   call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1, i32 %E)
+
   ; CHECK: element size of the element-wise atomic memory intrinsic must be a power of 2
   call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1, i32 3)
 
@@ -21,11 +24,15 @@ define void @test_memcpy(i8* %P, i8* %Q, i32 %A, i32 %E) {
 
   ret void
 }
+
 declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
 
 define void @test_memmove(i8* %P, i8* %Q, i32 %A, i32 %E) {
-  ; CHECK: element size of the element-wise unordered atomic memory intrinsic must be a constant int
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %E
+  ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1, i32 %E)
   call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1, i32 %E)
+
   ; CHECK: element size of the element-wise atomic memory intrinsic must be a power of 2
   call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1, i32 3)
 
@@ -44,11 +51,15 @@ define void @test_memmove(i8* %P, i8* %Q, i32 %A, i32 %E) {
 
   ret void
 }
+
 declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
 
 define void @test_memset(i8* %P, i8 %V, i32 %A, i32 %E) {
-  ; CHECK: element size of the element-wise unordered atomic memory intrinsic must be a constant int
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK: i32 %E
+  ; CHECK: call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 1, i32 %E)
   call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 1, i32 %E)
+
   ; CHECK: element size of the element-wise atomic memory intrinsic must be a power of 2
   call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 1, i32 3)
 
diff --git a/llvm/test/Verifier/frameescape.ll b/llvm/test/Verifier/frameescape.ll
index c03134159db..0850b9495d9 100644
--- a/llvm/test/Verifier/frameescape.ll
+++ b/llvm/test/Verifier/frameescape.ll
@@ -47,7 +47,10 @@ define internal void @k(i32 %n) {
   call i8* @llvm.localrecover(i8* bitcast(void()* @f to i8*), i8* null, i32 %n)
   ret void
 }
-; CHECK: idx argument of llvm.localrecover must be a constant int
+
+; CHECK: immarg operand has non-immediate parameter
+; CHECK-NEXT: i32 %n
+; CHECK-NEXT: %1 = call i8* @llvm.localrecover(i8* bitcast (void ()* @f to i8*), i8* null, i32 %n)
 
 define internal void @l(i8* %b) {
   %a = alloca i8
diff --git a/llvm/test/Verifier/immarg-param-attribute-invalid.ll b/llvm/test/Verifier/immarg-param-attribute-invalid.ll
new file mode 100644
index 00000000000..5b923c5be52
--- /dev/null
+++ b/llvm/test/Verifier/immarg-param-attribute-invalid.ll
@@ -0,0 +1,107 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+declare void @llvm.test.immarg.intrinsic.i32(i32 immarg)
+declare void @llvm.test.immarg.intrinsic.v2i32(<2 x i32> immarg)
+declare void @llvm.test.immarg.intrinsic.f32(float immarg)
+declare void @llvm.test.immarg.intrinsic.v2f32(<2 x float> immarg)
+declare void @llvm.test.immarg.intrinsic.2ai32([2 x i32] immarg)
+
+@gv = global i32 undef, align 4
+
+define void @call_llvm.test.immarg.intrinsic.i32(i32 %arg) {
+; CHECK: immarg operand has non-immediate parameter
+; CHECK-NEXT: i32 undef
+; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.i32(i32 undef)
+  call void @llvm.test.immarg.intrinsic.i32(i32 undef)
+
+; CHECK: immarg operand has non-immediate parameter
+; CHECK-NEXT: i32 %arg
+; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.i32(i32 %arg)
+  call void @llvm.test.immarg.intrinsic.i32(i32 %arg)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 ptrtoint (i32* @gv to i32)
+  ; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.i32(i32 ptrtoint (i32* @gv to i32))
+  call void @llvm.test.immarg.intrinsic.i32(i32 ptrtoint (i32* @gv to i32))
+  ret void
+}
+
+define void @call_llvm.test.immarg.intrinsic.f32() {
+; CHECK: immarg operand has non-immediate parameter
+; CHECK-NEXT: float undef
+; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.f32(float undef)
+  call void @llvm.test.immarg.intrinsic.f32(float undef)
+  ret void
+}
+
+define void @call_llvm.test.immarg.intrinsic.v2i32() {
+; CHECK: immarg operand has non-immediate parameter
+; CHECK-NEXT: <2 x i32> zeroinitializer
+; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.v2i32(<2 x i32> zeroinitializer)
+  call void @llvm.test.immarg.intrinsic.v2i32(<2 x i32> zeroinitializer)
+
+; CHECK: immarg operand has non-immediate parameter
+; CHECK-NEXT: <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.v2i32(<2 x i32> <i32 1, i32 2>)
+  call void @llvm.test.immarg.intrinsic.v2i32(<2 x i32> <i32 1, i32 2>)
+
+; CHECK: immarg operand has non-immediate parameter
+; CHECK-NEXT: <2 x i32> undef
+; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.v2i32(<2 x i32> undef)
+  call void @llvm.test.immarg.intrinsic.v2i32(<2 x i32> undef)
+  ret void
+}
+
+define void @call_llvm.test.immarg.intrinsic.v2f32() {
+; CHECK: immarg operand has non-immediate parameter
+; CHECK-NEXT: <2 x float> zeroinitializer
+; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.v2f32(<2 x float> zeroinitializer)
+  call void @llvm.test.immarg.intrinsic.v2f32(<2 x float> zeroinitializer)
+
+; CHECK: immarg operand has non-immediate parameter
+; CHECK-NEXT: <2 x float> <float 1.000000e+00, float 2.000000e+00>
+; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.v2f32(<2 x float> <float 1.000000e+00, float 2.000000e+00>)
+  call void @llvm.test.immarg.intrinsic.v2f32(<2 x float> <float 1.0, float 2.0>)
+  ret void
+}
+
+define void @call_llvm.test.immarg.intrinsic.2ai32() {
+; CHECK: immarg operand has non-immediate parameter
+; CHECK-NEXT: [2 x i32] zeroinitializer
+; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.2ai32([2 x i32] zeroinitializer)
+  call void @llvm.test.immarg.intrinsic.2ai32([2 x i32] zeroinitializer)
+
+; CHECK: immarg operand has non-immediate parameter
+; CHECK-NEXT: [2 x i32] [i32 1, i32 2]
+; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.2ai32([2 x i32] [i32 1, i32 2])
+  call void @llvm.test.immarg.intrinsic.2ai32([2 x i32] [i32 1, i32 2])
+  ret void
+}
+
+; CHECK: immarg attribute only applies to intrinsics
+; CHECK-NEXT: void (i32)* @not_an_intrinsic
+declare void @not_an_intrinsic(i32 immarg)
+
+declare void @llvm.test.intrinsic(i32)
+declare void @func(i32)
+
+define void @only_on_callsite() {
+; CHECK: immarg attribute only applies to intrinsics
+; CHECK-NEXT: call void @func(i32 immarg 0)
+; CHECK-NEXT: immarg may not apply only to call sites
+; CHECK-NEXT: i32 0
+; CHECK-NEXT: call void @func(i32 immarg 0)
+  call void @func(i32 immarg 0)
+
+; CHECK: immarg may not apply only to call sites
+; CHECK-NEXT: i32 0
+; CHECK-NEXT: call void @llvm.test.intrinsic(i32 immarg 0)
+  call void @llvm.test.intrinsic(i32 immarg 0)
+  ret void
+}
+
+; CHECK: immarg attribute only applies to intrinsics
+; CHECK: void (i32)* @on_function_definition
+define void @on_function_definition(i32 immarg %arg) {
+  ret void
+}
diff --git a/llvm/test/Verifier/intrinsic-immarg.ll b/llvm/test/Verifier/intrinsic-immarg.ll
new file mode 100644
index 00000000000..4a701ddd419
--- /dev/null
+++ b/llvm/test/Verifier/intrinsic-immarg.ll
@@ -0,0 +1,223 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+declare i8* @llvm.returnaddress(i32)
+define void @return_address(i32 %var) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %result = call i8* @llvm.returnaddress(i32 %var)
+  %result = call i8* @llvm.returnaddress(i32 %var)
+  ret void
+}
+
+declare i8* @llvm.frameaddress(i32)
+define void @frame_address(i32 %var) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %result = call i8* @llvm.frameaddress(i32 %var)
+  %result = call i8* @llvm.frameaddress(i32 %var)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1)
+define void @memcpy(i8* %dest, i8* %src, i1 %is.volatile) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %is.volatile
+  ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 8, i1 %is.volatile)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 8, i1 %is.volatile)
+  ret void
+}
+
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1)
+define void @memmove(i8* %dest, i8* %src, i1 %is.volatile) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %is.volatile
+  ; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 8, i1 %is.volatile)
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 8, i1 %is.volatile)
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1)
+define void @memset(i8* %dest, i8 %val, i1 %is.volatile) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %is.volatile
+  ; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %dest, i8 %val, i32 8, i1 %is.volatile)
+  call void @llvm.memset.p0i8.i32(i8* %dest, i8 %val, i32 8, i1 %is.volatile)
+  ret void
+}
+
+
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1, i1)
+define void @objectsize(i8* %ptr, i1 %a, i1 %b, i1 %c) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %a
+  ; CHECK-NEXT: %val0 = call i64 @llvm.objectsize.i64.p0i8(i8* %ptr, i1 %a, i1 false, i1 false)
+  %val0 = call i64 @llvm.objectsize.i64.p0i8(i8* %ptr, i1 %a, i1 false, i1 false)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %b
+  ; CHECK-NEXT: %val1 = call i64 @llvm.objectsize.i64.p0i8(i8* %ptr, i1 false, i1 %b, i1 false)
+  %val1 = call i64 @llvm.objectsize.i64.p0i8(i8* %ptr, i1 false, i1 %b, i1 false)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %c
+  ; CHECK-NEXT: %val2 = call i64 @llvm.objectsize.i64.p0i8(i8* %ptr, i1 false, i1 false, i1 %c)
+  %val2 = call i64 @llvm.objectsize.i64.p0i8(i8* %ptr, i1 false, i1 false, i1 %c)
+  ret void
+}
+
+declare i8 @llvm.expect.i8(i8, i8)
+define i8 @expect(i8 %arg0, i8 %arg1) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i8 %arg1
+  ; CHECK-NEXT: %ret = call i8 @llvm.expect.i8(i8 %arg0, i8 %arg1)
+  %ret = call i8 @llvm.expect.i8(i8 %arg0, i8 %arg1)
+  ret i8 %ret
+}
+
+declare i64 @llvm.smul.fix.i64(i64, i64, i32)
+define i64 @smul_fix(i64 %arg0, i64 %arg1, i32 %arg2) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg2
+  ; CHECK-NEXT: %ret = call i64 @llvm.smul.fix.i64(i64 %arg0, i64 %arg1, i32 %arg2)
+  %ret = call i64 @llvm.smul.fix.i64(i64 %arg0, i64 %arg1, i32 %arg2)
+  ret i64 %ret
+}
+
+declare i64 @llvm.umul.fix.i64(i64, i64, i32)
+define i64 @umul_fix(i64 %arg0, i64 %arg1, i32 %arg2) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg2
+  ; CHECK-NEXT: %ret = call i64 @llvm.umul.fix.i64(i64 %arg0, i64 %arg1, i32 %arg2)
+  %ret = call i64 @llvm.umul.fix.i64(i64 %arg0, i64 %arg1, i32 %arg2)
+  ret i64 %ret
+}
+
+declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>*, i32, <2 x i1>, <2 x double>)
+define <2 x double> @masked_load(<2 x i1> %mask, <2 x double>* %addr, <2 x double> %dst, i32 %align) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %align
+  ; CHECK-NEXT: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 %align, <2 x i1> %mask, <2 x double> %dst)
+  %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 %align, <2 x i1> %mask, <2 x double> %dst)
+  ret <2 x double> %res
+}
+
+declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>)
+define void @masked_store(<4 x i1> %mask, <4 x i32>* %addr, <4 x i32> %val, i32 %align) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %align
+  ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 %align, <4 x i1> %mask)
+  call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 %align, <4 x i1> %mask)
+  ret void
+}
+
+declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*>, i32, <2 x i1>, <2 x double>)
+define <2 x double> @test_gather(<2 x double*> %ptrs, <2 x i1> %mask, <2 x double> %src0, i32 %align)  {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK: i32 %align
+  ; CHECK: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 %align, <2 x i1> %mask, <2 x double> %src0)
+  %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 %align, <2 x i1> %mask, <2 x double> %src0)
+  ret <2 x double> %res
+}
+
+declare void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32>, <8 x i32*>, i32, <8 x i1>)
+define void @test_scatter_8i32(<8 x i32> %a1, <8 x i32*> %ptr, <8 x i1> %mask, i32 %align) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %align
+  ; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 %align, <8 x i1> %mask)
+  call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 %align, <8 x i1> %mask)
+  ret void
+}
+
+declare void @llvm.lifetime.start.p0i8(i64, i8*)
+define void @test_lifetime_start(i64 %arg0, i8* %ptr) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i64 %arg0
+  ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 %arg0, i8* %ptr)
+  call void @llvm.lifetime.start.p0i8(i64 %arg0, i8* %ptr)
+  ret void
+}
+
+declare void @llvm.lifetime.end.p0i8(i64, i8*)
+define void @test_lifetime_end(i64 %arg0, i8* %ptr) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i64 %arg0
+  ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 %arg0, i8* %ptr)
+  call void @llvm.lifetime.end.p0i8(i64 %arg0, i8* %ptr)
+  ret void
+}
+
+declare void @llvm.invariant.start.p0i8(i64, i8*)
+define void @test_invariant_start(i64 %arg0, i8* %ptr) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i64 %arg0
+  ; CHECK-NEXT: call void @llvm.invariant.start.p0i8(i64 %arg0, i8* %ptr)
+  call void @llvm.invariant.start.p0i8(i64 %arg0, i8* %ptr)
+  ret void
+}
+
+declare void @llvm.invariant.end.p0i8({}*, i64, i8*)
+define void @test_invariant_end({}* %scope, i64 %arg1, i8* %ptr) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i64 %arg1
+  ; CHECK-NEXT: call void @llvm.invariant.end.p0i8({}* %scope, i64 %arg1, i8* %ptr)
+  call void @llvm.invariant.end.p0i8({}* %scope, i64 %arg1, i8* %ptr)
+  ret void
+}
+
+declare void @llvm.prefetch(i8*, i32, i32, i32)
+define void @test_prefetch(i8* %ptr, i32 %arg0, i32 %arg1) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg0
+  ; CHECK-NEXT: call void @llvm.prefetch(i8* %ptr, i32 %arg0, i32 0, i32 0)
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT:  i32 %arg1
+  call void @llvm.prefetch(i8* %ptr, i32 %arg0, i32 0, i32 0)
+  call void @llvm.prefetch(i8* %ptr, i32 0, i32 %arg1, i32 0)
+  ret void
+}
+
+declare void @llvm.localrecover(i8*, i8*, i32)
+define void @test_localrecover(i8* %func, i8* %fp, i32 %idx) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %idx
+  ; CHECK-NEXT: call void @llvm.localrecover(i8* %func, i8* %fp, i32 %idx)
+  call void @llvm.localrecover(i8* %func, i8* %fp, i32 %idx)
+  ret void
+}
+
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+
+define private void @f() {
+  ret void
+}
+
+define void @calls_statepoint(i8 addrspace(1)* %arg0, i64 %arg1, i32 %arg2, i32 %arg4, i32 %arg5) gc "statepoint-example" {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i64 %arg1
+  ; CHECK-NEXT: %safepoint0 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 %arg1, i32 0, void ()* @f, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg0, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg0, i8 addrspace(1)* %arg0)
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg2
+  ; CHECK-NEXT: %safepoint1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 %arg2, void ()* @f, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg0, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg0, i8 addrspace(1)* %arg0)
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg4
+  ; CHECK-NEXT: %safepoint2 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @f, i32 %arg4, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg0, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg0, i8 addrspace(1)* %arg0)
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg5
+  ; CHECK-NEXT: %safepoint3 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @f, i32 0, i32 %arg5, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg0, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg0, i8 addrspace(1)* %arg0)
+  %cast = bitcast i8 addrspace(1)* %arg0 to i64 addrspace(1)*
+  %safepoint0 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 %arg1, i32 0, void ()* @f, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg0, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg0, i8 addrspace(1)* %arg0)
+  %safepoint1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 %arg2, void ()* @f, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg0, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg0, i8 addrspace(1)* %arg0)
+  %safepoint2 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @f, i32 %arg4, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg0, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg0, i8 addrspace(1)* %arg0)
+  %safepoint3 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @f, i32 0, i32 %arg5, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg0, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg0, i8 addrspace(1)* %arg0)
+  ret void
+}
+
+declare void @llvm.hwasan.check.memaccess(i8*, i8*, i32)
+
+define void @hwasan_check_memaccess(i8* %arg0,i8* %arg1, i32 %arg2) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK: i32 %arg2
+  ; CHECK: call void @llvm.hwasan.check.memaccess(i8* %arg0, i8* %arg1, i32 %arg2)
+  call void @llvm.hwasan.check.memaccess(i8* %arg0,i8* %arg1, i32 %arg2)
+  ret void
+}
author	Matt Arsenault <Matthew.Arsenault@amd.com>	2019-03-12 21:02:54 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	2019-03-12 21:02:54 +0000
commit	caf1316f7100e1ba1e7e800eae6a5adf0d48bf90 (patch)
tree	7b79b67ccf3ffe7fe9b56dee66444a586d7714d2 /llvm/test
parent	9df0754b8dbeeb597a22bb0418eb0ee04ce88b56 (diff)
download	bcm5719-llvm-caf1316f7100e1ba1e7e800eae6a5adf0d48bf90.tar.gz bcm5719-llvm-caf1316f7100e1ba1e7e800eae6a5adf0d48bf90.zip