diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/MIMGInstructions.td | 10 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll | 24 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll | 3 |
3 files changed, 27 insertions, 10 deletions
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index 30a2df51038..651265fc54d 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -71,9 +71,9 @@ class MIMG_Store_Helper <bits<7> op, string asm, r128:$r128, tfe:$tfe, lwe:$lwe, da:$da), asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da", dns>, MIMGe<op> { let ssamp = 0; - let mayLoad = 1; // TableGen requires this for matching with the intrinsics + let mayLoad = 0; let mayStore = 1; - let hasSideEffects = 1; + let hasSideEffects = 0; let hasPostISelHook = 0; let DisableWQM = 1; } @@ -103,10 +103,10 @@ class MIMG_Atomic_Helper <string asm, RegisterClass data_rc, (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc, dmask:$dmask, unorm:$unorm, GLC:$glc, slc:$slc, r128:$r128, tfe:$tfe, lwe:$lwe, da:$da), - asm#" $vdst, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da" - > { + asm#" $vdst, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"> { + let mayLoad = 1; let mayStore = 1; - let hasSideEffects = 1; + let hasSideEffects = 1; // FIXME: Remove this let hasPostISelHook = 0; let DisableWQM = 1; let Constraints = "$vdst = $vdata"; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll index d9be4a4d019..fac0d665bf0 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll @@ -157,12 +157,13 @@ main_body: ; Ideally, the register allocator would avoid the wait here ; +; XXX - Is this really allowed? Are the resource descriptors allowed to alias? ; GCN-LABEL: {{^}}image_store_wait: +; GCN: image_load v[5:8], v4, s[8:15] dmask:0xf unorm ; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm -; GCN: s_waitcnt expcnt(0) -; GCN: image_load v[0:3], v4, s[8:15] dmask:0xf unorm -; GCN: s_waitcnt vmcnt(0) -; GCN: image_store v[0:3], v4, s[16:23] dmask:0xf unorm +; GCN: s_waitcnt vmcnt(1) +; GCN: image_store v[5:8], v4, s[16:23] dmask:0xf unorm +; GCN-NEXT: s_endpgm define amdgpu_ps void @image_store_wait(<8 x i32> inreg %arg, <8 x i32> inreg %arg1, <8 x i32> inreg %arg2, <4 x float> %arg3, i32 %arg4) #0 { main_body: call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %arg3, i32 %arg4, <8 x i32> %arg, i32 15, i1 false, i1 false, i1 false, i1 false) @@ -171,6 +172,21 @@ main_body: ret void } +; The same image resource is used so reordering is not OK. +; GCN-LABEL: {{^}}image_store_wait_same_resource: +; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm +; GCN: s_waitcnt expcnt(0) +; GCN: image_load v[0:3], v4, s[0:7] dmask:0xf unorm +; GCN: s_waitcnt vmcnt(0) +; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm +define amdgpu_ps void @image_store_wait_same_resource(<8 x i32> inreg %rsrc, <4 x float> %arg3, i32 %arg4) #0 { +main_body: + call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %arg3, i32 %arg4, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false) + %data = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %arg4, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false) + call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %data, i32 %arg4, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false) + ret void +} + ; SI won't merge ds memory operations, because of the signed offset bug, so ; we only have check lines for VI. ; VI-LABEL: image_load_mmo diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll index f6c2cb44c99..61c287a896f 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s ; CHECK-LABEL: {{^}}test1: @@ -20,6 +20,7 @@ define amdgpu_ps void @test1(<8 x i32> inreg %rsrc, <4 x float> %d0, <4 x float> ; CHECK-LABEL: {{^}}test2: ; CHECK-NOT: s_waitcnt ; CHECK: image_load +; CHECK-NEXT: v_lshlrev_b32 ; CHECK-NEXT: s_waitcnt ; CHECK: s_waitcnt vmcnt(0){{$}} ; CHECK-NEXT: image_store |

