diff options
| author | Tom Stellard <thomas.stellard@amd.com> | 2016-10-12 16:35:29 +0000 | 
|---|---|---|
| committer | Tom Stellard <thomas.stellard@amd.com> | 2016-10-12 16:35:29 +0000 | 
| commit | fac248cb5fb78968b8d90c14e53fcbbbba7fef66 (patch) | |
| tree | 84dfc0396fb4f76367dd5d63fbdeafb9e7bd22ac | |
| parent | fa53c86dc1cac3b9434a6b4a3d653e326128fbfb (diff) | |
| download | bcm5719-llvm-fac248cb5fb78968b8d90c14e53fcbbbba7fef66.tar.gz bcm5719-llvm-fac248cb5fb78968b8d90c14e53fcbbbba7fef66.zip | |
AMDGPU/SI: Change mimg intrinsic signatures
This makes more fields overridable and removes redundant bits.
Patch by: Changpeng Fang
llvm-svn: 284024
| -rw-r--r-- | llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 21 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/MIMGInstructions.td | 41 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll | 58 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll | 12 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/wqm.ll | 10 | 
5 files changed, 83 insertions, 59 deletions
| diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index bb62299dafb..cc4fd4ce5f4 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -229,29 +229,30 @@ def int_amdgcn_atomic_dec : Intrinsic<[llvm_anyint_ty],  >;  class AMDGPUImageLoad : Intrinsic < -  [llvm_v4f32_ty],    // vdata(VGPR) +  [llvm_anyfloat_ty], // vdata(VGPR)    [llvm_anyint_ty,    // vaddr(VGPR) -   llvm_v8i32_ty,     // rsrc(SGPR) +   llvm_anyint_ty,    // rsrc(SGPR)     llvm_i32_ty,       // dmask(imm) -   llvm_i1_ty,        // r128(imm) -   llvm_i1_ty,        // da(imm)     llvm_i1_ty,        // glc(imm) -   llvm_i1_ty],       // slc(imm) +   llvm_i1_ty,        // slc(imm) +   llvm_i1_ty,        // lwe(imm) +   llvm_i1_ty],       // da(imm)    [IntrReadMem]>;  def int_amdgcn_image_load : AMDGPUImageLoad;  def int_amdgcn_image_load_mip : AMDGPUImageLoad; +def int_amdgcn_image_getresinfo : AMDGPUImageLoad;  class AMDGPUImageStore : Intrinsic <    [], -  [llvm_v4f32_ty,     // vdata(VGPR) +  [llvm_anyfloat_ty,  // vdata(VGPR)     llvm_anyint_ty,    // vaddr(VGPR) -   llvm_v8i32_ty,     // rsrc(SGPR) +   llvm_anyint_ty,    // rsrc(SGPR)     llvm_i32_ty,       // dmask(imm) -   llvm_i1_ty,        // r128(imm) -   llvm_i1_ty,        // da(imm)     llvm_i1_ty,        // glc(imm) -   llvm_i1_ty],       // slc(imm) +   llvm_i1_ty,        // slc(imm) +   llvm_i1_ty,        // lwe(imm) +   llvm_i1_ty],       // da(imm)    []>;  def int_amdgcn_image_store : AMDGPUImageStore; diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index 629643c022c..795260e9c05 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -401,32 +401,36 @@ multiclass ImagePatterns<SDPatternOperator name, string opcode> {    def : ImagePattern<name, !cast<MIMG>(opcode # _V4_V4), v4i32>;  } -class ImageLoadPattern<SDPatternOperator name, MIMG opcode, ValueType vt> : Pat < -  (name vt:$addr, v8i32:$rsrc, imm:$dmask, imm:$r128, imm:$da, imm:$glc, -        imm:$slc), -  (opcode $addr, $rsrc, +multiclass ImageLoadPattern<SDPatternOperator name, MIMG opcode, ValueType vt> { +  def : Pat < +    (v4f32 (name vt:$addr, v8i32:$rsrc, i32:$dmask, i1:$glc, i1:$slc, i1:$lwe, +                i1:$da)), +    (opcode $addr, $rsrc,            (as_i32imm $dmask), 1, (as_i1imm $glc), (as_i1imm $slc), -          (as_i1imm $r128), 0, 0, (as_i1imm $da)) ->; +          0, 0, (as_i1imm $lwe), (as_i1imm $da)) +  >; +}  multiclass ImageLoadPatterns<SDPatternOperator name, string opcode> { -  def : ImageLoadPattern<name, !cast<MIMG>(opcode # _V4_V1), i32>; -  def : ImageLoadPattern<name, !cast<MIMG>(opcode # _V4_V2), v2i32>; -  def : ImageLoadPattern<name, !cast<MIMG>(opcode # _V4_V4), v4i32>; +  defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V4_V1), i32>; +  defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V4_V2), v2i32>; +  defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V4_V4), v4i32>;  } -class ImageStorePattern<SDPatternOperator name, MIMG opcode, ValueType vt> : Pat < -  (name v4f32:$data, vt:$addr, v8i32:$rsrc, i32:$dmask, imm:$r128, imm:$da, -        imm:$glc, imm:$slc), -  (opcode $data, $addr, $rsrc, +multiclass ImageStorePattern<SDPatternOperator name, MIMG opcode, ValueType vt> { +  def : Pat < +    (name v4f32:$data, vt:$addr, v8i32:$rsrc, i32:$dmask, i1:$glc, i1:$slc, +          i1:$lwe, i1:$da), +    (opcode $data, $addr, $rsrc,            (as_i32imm $dmask), 1, (as_i1imm $glc), (as_i1imm $slc), -          (as_i1imm $r128), 0, 0, (as_i1imm $da)) ->; +          0, 0, (as_i1imm $lwe), (as_i1imm $da)) +  >; +}  multiclass ImageStorePatterns<SDPatternOperator name, string opcode> { -  def : ImageStorePattern<name, !cast<MIMG>(opcode # _V4_V1), i32>; -  def : ImageStorePattern<name, !cast<MIMG>(opcode # _V4_V2), v2i32>; -  def : ImageStorePattern<name, !cast<MIMG>(opcode # _V4_V4), v4i32>; +  defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V4_V1), i32>; +  defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V4_V2), v2i32>; +  defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V4_V4), v4i32>;  }  class ImageAtomicPattern<SDPatternOperator name, MIMG opcode, ValueType vt> : Pat < @@ -554,6 +558,7 @@ def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V4, v4i32>;  // Image load  defm : ImageLoadPatterns<int_amdgcn_image_load, "IMAGE_LOAD">;  defm : ImageLoadPatterns<int_amdgcn_image_load_mip, "IMAGE_LOAD_MIP">; +defm : ImageLoadPattern<int_amdgcn_image_getresinfo, IMAGE_GET_RESINFO_V4_V1, i32>;  // Image store  defm : ImageStorePatterns<int_amdgcn_image_store, "IMAGE_STORE">; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll index f0d23b93119..5fe03f09176 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll @@ -6,7 +6,7 @@  ;CHECK: s_waitcnt vmcnt(0)  define amdgpu_ps <4 x float> @image_load_v4i32(<8 x i32> inreg %rsrc, <4 x i32> %c) {  main_body: -  %tex = call <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) +  %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)    ret <4 x float> %tex  } @@ -15,7 +15,7 @@ main_body:  ;CHECK: s_waitcnt vmcnt(0)  define amdgpu_ps <4 x float> @image_load_v2i32(<8 x i32> inreg %rsrc, <2 x i32> %c) {  main_body: -  %tex = call <4 x float> @llvm.amdgcn.image.load.v2i32(<2 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) +  %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)    ret <4 x float> %tex  } @@ -24,7 +24,7 @@ main_body:  ;CHECK: s_waitcnt vmcnt(0)  define amdgpu_ps <4 x float> @image_load_i32(<8 x i32> inreg %rsrc, i32 %c) {  main_body: -  %tex = call <4 x float> @llvm.amdgcn.image.load.i32(i32 %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) +  %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)    ret <4 x float> %tex  } @@ -33,7 +33,7 @@ main_body:  ;CHECK: s_waitcnt vmcnt(0)  define amdgpu_ps <4 x float> @image_load_mip(<8 x i32> inreg %rsrc, <4 x i32> %c) {  main_body: -  %tex = call <4 x float> @llvm.amdgcn.image.load.mip.v4i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) +  %tex = call <4 x float> @llvm.amdgcn.image.load.mip.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)    ret <4 x float> %tex  } @@ -42,7 +42,7 @@ main_body:  ;CHECK: s_waitcnt vmcnt(0)  define amdgpu_ps float @image_load_1(<8 x i32> inreg %rsrc, <4 x i32> %c) {  main_body: -  %tex = call <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) +  %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)    %elt = extractelement <4 x float> %tex, i32 0  ; Only first component used, test that dmask etc. is changed accordingly    ret float %elt @@ -52,7 +52,7 @@ main_body:  ;CHECK: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm  define amdgpu_ps void @image_store_v4i32(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) {  main_body: -  call void @llvm.amdgcn.image.store.v4i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) +  call void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)    ret void  } @@ -60,7 +60,7 @@ main_body:  ;CHECK: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm  define amdgpu_ps void @image_store_v2i32(<8 x i32> inreg %rsrc, <4 x float> %data, <2 x i32> %coords) {  main_body: -  call void @llvm.amdgcn.image.store.v2i32(<4 x float> %data, <2 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) +  call void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float> %data, <2 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)    ret void  } @@ -68,7 +68,7 @@ main_body:  ;CHECK: image_store v[0:3], v4, s[0:7] dmask:0xf unorm  define amdgpu_ps void @image_store_i32(<8 x i32> inreg %rsrc, <4 x float> %data, i32 %coords) {  main_body: -  call void @llvm.amdgcn.image.store.i32(<4 x float> %data, i32 %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) +  call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %data, i32 %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)    ret void  } @@ -76,10 +76,24 @@ main_body:  ;CHECK: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm  define amdgpu_ps void @image_store_mip(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) {  main_body: -  call void @llvm.amdgcn.image.store.mip.v4i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) +  call void @llvm.amdgcn.image.store.mip.v4f32.v4i32.v8i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)    ret void  } +;CHECK-LABEL: {{^}}getresinfo: +;CHECK: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define amdgpu_ps void @getresinfo() { +main_body: +  %r = call <4 x float> @llvm.amdgcn.image.getresinfo.v4f32.i32.v8i32(i32 undef, <8 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0) +  %r0 = extractelement <4 x float> %r, i32 0 +  %r1 = extractelement <4 x float> %r, i32 1 +  %r2 = extractelement <4 x float> %r, i32 2 +  %r3 = extractelement <4 x float> %r, i32 3 +  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) +  ret void +} + +  ; Ideally, the register allocator would avoid the wait here  ;  ;CHECK-LABEL: {{^}}image_store_wait: @@ -90,21 +104,25 @@ main_body:  ;CHECK: image_store v[0:3], v4, s[16:23] dmask:0xf unorm  define amdgpu_ps void @image_store_wait(<8 x i32> inreg, <8 x i32> inreg, <8 x i32> inreg, <4 x float>, i32) {  main_body: -  call void @llvm.amdgcn.image.store.i32(<4 x float> %3, i32 %4, <8 x i32> %0, i32 15, i1 0, i1 0, i1 0, i1 0) -  %data = call <4 x float> @llvm.amdgcn.image.load.i32(i32 %4, <8 x i32> %1, i32 15, i1 0, i1 0, i1 0, i1 0) -  call void @llvm.amdgcn.image.store.i32(<4 x float> %data, i32 %4, <8 x i32> %2, i32 15, i1 0, i1 0, i1 0, i1 0) +  call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %3, i32 %4, <8 x i32> %0, i32 15, i1 0, i1 0, i1 0, i1 0) +  %data = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %4, <8 x i32> %1, i32 15, i1 0, i1 0, i1 0, i1 0) +  call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %data, i32 %4, <8 x i32> %2, i32 15, i1 0, i1 0, i1 0, i1 0)    ret void  } -declare void @llvm.amdgcn.image.store.i32(<4 x float>, i32, <8 x i32>, i32, i1, i1, i1, i1) #0 -declare void @llvm.amdgcn.image.store.v2i32(<4 x float>, <2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0 -declare void @llvm.amdgcn.image.store.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0 -declare void @llvm.amdgcn.image.store.mip.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0 +declare void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float>, i32, <8 x i32>, i32, i1, i1, i1, i1) #0 +declare void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float>, <2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0 +declare void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0 +declare void @llvm.amdgcn.image.store.mip.v4f32.v4i32.v8i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0 + +declare <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #1 +declare <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1 +declare <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1 +declare <4 x float> @llvm.amdgcn.image.load.mip.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1 + +declare <4 x float> @llvm.amdgcn.image.getresinfo.v4f32.i32.v8i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.load.i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #1 -declare <4 x float> @llvm.amdgcn.image.load.v2i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1 -declare <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1 -declare <4 x float> @llvm.amdgcn.image.load.mip.v4i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1 +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)  attributes #0 = { nounwind }  attributes #1 = { nounwind readonly } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll index c2d48f99aac..d73986ac575 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll @@ -7,9 +7,9 @@  ; CHECK-NEXT: image_store  ; CHECK-NEXT: s_endpgm  define amdgpu_ps void @test1(<8 x i32> inreg %rsrc, <4 x float> %d0, <4 x float> %d1, i32 %c0, i32 %c1) { -  call void @llvm.amdgcn.image.store.i32(<4 x float> %d0, i32 %c0, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 1, i1 0) +  call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %d0, i32 %c0, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 1, i1 0)    call void @llvm.amdgcn.s.waitcnt(i32 3840) ; 0xf00 -  call void @llvm.amdgcn.image.store.i32(<4 x float> %d1, i32 %c1, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 1, i1 0) +  call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %d1, i32 %c1, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 1, i1 0)    ret void  } @@ -22,17 +22,17 @@ define amdgpu_ps void @test1(<8 x i32> inreg %rsrc, <4 x float> %d0, <4 x float>  ; CHECK: s_waitcnt  ; CHECK-NEXT: image_store  define amdgpu_ps void @test2(<8 x i32> inreg %rsrc, i32 %c) { -  %t = call <4 x float> @llvm.amdgcn.image.load.i32(i32 %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) +  %t = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)    call void @llvm.amdgcn.s.waitcnt(i32 3840) ; 0xf00    %c.1 = mul i32 %c, 2 -  call void @llvm.amdgcn.image.store.i32(<4 x float> %t, i32 %c.1, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) +  call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %t, i32 %c.1, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)    ret void  }  declare void @llvm.amdgcn.s.waitcnt(i32) #0 -declare <4 x float> @llvm.amdgcn.image.load.i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #1 -declare void @llvm.amdgcn.image.store.i32(<4 x float>, i32, <8 x i32>, i32, i1, i1, i1, i1) #0 +declare <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #1 +declare void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float>, i32, <8 x i32>, i32, i1, i1, i1, i1) #0  attributes #0 = { nounwind }  attributes #1 = { nounwind readonly } diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll index c068c7c7b6e..1e7fd8f699a 100644 --- a/llvm/test/CodeGen/AMDGPU/wqm.ll +++ b/llvm/test/CodeGen/AMDGPU/wqm.ll @@ -7,8 +7,8 @@  ;CHECK-NOT: s_wqm  define amdgpu_ps <4 x float> @test1(<8 x i32> inreg %rsrc, <4 x i32> %c) {  main_body: -  %tex = call <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) -  call void @llvm.amdgcn.image.store.v4i32(<4 x float> %tex, <4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) +  %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) +  call void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float> %tex, <4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)    ret <4 x float> %tex  } @@ -366,7 +366,7 @@ main_body:  ; CHECK: ; return  define amdgpu_ps <4 x float> @test_loop_vcc(<4 x float> %in) nounwind {  entry: -  call void @llvm.amdgcn.image.store.v4i32(<4 x float> %in, <4 x i32> undef, <8 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0) +  call void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float> %in, <4 x i32> undef, <8 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0)    br label %loop  loop: @@ -502,11 +502,11 @@ end:  } -declare void @llvm.amdgcn.image.store.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1 +declare void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1  declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #1  declare void @llvm.amdgcn.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #1 -declare <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2 +declare <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2  declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #2  declare <4 x float> @llvm.SI.image.sample.i32(i32, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #3 | 

