summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/AMDGPU/MIMGInstructions.td10
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll24
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll3
3 files changed, 27 insertions, 10 deletions
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 30a2df51038..651265fc54d 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -71,9 +71,9 @@ class MIMG_Store_Helper <bits<7> op, string asm,
r128:$r128, tfe:$tfe, lwe:$lwe, da:$da),
asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da", dns>, MIMGe<op> {
let ssamp = 0;
- let mayLoad = 1; // TableGen requires this for matching with the intrinsics
+ let mayLoad = 0;
let mayStore = 1;
- let hasSideEffects = 1;
+ let hasSideEffects = 0;
let hasPostISelHook = 0;
let DisableWQM = 1;
}
@@ -103,10 +103,10 @@ class MIMG_Atomic_Helper <string asm, RegisterClass data_rc,
(ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
dmask:$dmask, unorm:$unorm, GLC:$glc, slc:$slc,
r128:$r128, tfe:$tfe, lwe:$lwe, da:$da),
- asm#" $vdst, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
- > {
+ asm#" $vdst, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"> {
+ let mayLoad = 1;
let mayStore = 1;
- let hasSideEffects = 1;
+ let hasSideEffects = 1; // FIXME: Remove this
let hasPostISelHook = 0;
let DisableWQM = 1;
let Constraints = "$vdst = $vdata";
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll
index d9be4a4d019..fac0d665bf0 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll
@@ -157,12 +157,13 @@ main_body:
; Ideally, the register allocator would avoid the wait here
;
+; XXX - Is this really allowed? Are the resource descriptors allowed to alias?
; GCN-LABEL: {{^}}image_store_wait:
+; GCN: image_load v[5:8], v4, s[8:15] dmask:0xf unorm
; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm
-; GCN: s_waitcnt expcnt(0)
-; GCN: image_load v[0:3], v4, s[8:15] dmask:0xf unorm
-; GCN: s_waitcnt vmcnt(0)
-; GCN: image_store v[0:3], v4, s[16:23] dmask:0xf unorm
+; GCN: s_waitcnt vmcnt(1)
+; GCN: image_store v[5:8], v4, s[16:23] dmask:0xf unorm
+; GCN-NEXT: s_endpgm
define amdgpu_ps void @image_store_wait(<8 x i32> inreg %arg, <8 x i32> inreg %arg1, <8 x i32> inreg %arg2, <4 x float> %arg3, i32 %arg4) #0 {
main_body:
call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %arg3, i32 %arg4, <8 x i32> %arg, i32 15, i1 false, i1 false, i1 false, i1 false)
@@ -171,6 +172,21 @@ main_body:
ret void
}
+; The same image resource is used so reordering is not OK.
+; GCN-LABEL: {{^}}image_store_wait_same_resource:
+; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm
+; GCN: s_waitcnt expcnt(0)
+; GCN: image_load v[0:3], v4, s[0:7] dmask:0xf unorm
+; GCN: s_waitcnt vmcnt(0)
+; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm
+define amdgpu_ps void @image_store_wait_same_resource(<8 x i32> inreg %rsrc, <4 x float> %arg3, i32 %arg4) #0 {
+main_body:
+ call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %arg3, i32 %arg4, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
+ %data = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %arg4, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
+ call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %data, i32 %arg4, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
+ ret void
+}
+
; SI won't merge ds memory operations, because of the signed offset bug, so
; we only have check lines for VI.
; VI-LABEL: image_load_mmo
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll
index f6c2cb44c99..61c287a896f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
; CHECK-LABEL: {{^}}test1:
@@ -20,6 +20,7 @@ define amdgpu_ps void @test1(<8 x i32> inreg %rsrc, <4 x float> %d0, <4 x float>
; CHECK-LABEL: {{^}}test2:
; CHECK-NOT: s_waitcnt
; CHECK: image_load
+; CHECK-NEXT: v_lshlrev_b32
; CHECK-NEXT: s_waitcnt
; CHECK: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: image_store
OpenPOWER on IntegriCloud