summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU
diff options
context:
space:
mode:
authorMark Searles <m.c.searles@gmail.com>2018-04-19 15:42:30 +0000
committerMark Searles <m.c.searles@gmail.com>2018-04-19 15:42:30 +0000
commit1bc6e71f32ebadefc319c65b3f650e08e39d6aa7 (patch)
treebbb954896e398ff8029b87d948ef765e7c5a9eba /llvm/test/CodeGen/AMDGPU
parent9b20c245ca36998cb2ac6e81543f87b781a9d06b (diff)
downloadbcm5719-llvm-1bc6e71f32ebadefc319c65b3f650e08e39d6aa7.tar.gz
bcm5719-llvm-1bc6e71f32ebadefc319c65b3f650e08e39d6aa7.zip
[AMDGPU] Do not only rely on BB number when finding bottom loop
We should also check that the "bottom" basic block of a loopis a successor of the "header" basic block, otherwise we don't propagate the information correctly when the CFG is complex. This fixes an important rendering problem with Wolfsentein 2, because of one vector-memory wait was missing. Differential Revision: https://reviews.llvm.org/D43831 llvm-svn: 330337
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
-rw-r--r--llvm/test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir59
1 files changed, 59 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir
new file mode 100644
index 00000000000..2d9ec03132b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir
@@ -0,0 +1,59 @@
+# RUN: llc -o - %s -march=amdgcn -mcpu=fiji -run-pass=si-insert-waitcnts -verify-machineinstrs | FileCheck -check-prefix=GCN %s
+
+# GCN-LABEL: waitcnt-back-edge-loop
+# GCN: bb.2
+# GCN: S_WAITCNT 112
+# GCN: $vgpr5 = V_CVT_I32_F32_e32 killed $vgpr5, implicit $exec
+
+---
+name: waitcnt-back-edge-loop
+body: |
+ bb.0:
+ successors: %bb.1
+
+ $vgpr1 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr1_vgpr2
+ $vgpr2 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr1_vgpr2
+ $vgpr4 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
+ $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 3, killed $sgpr4, implicit $exec
+ $vgpr3 = V_CNDMASK_B32_e64 -1082130432, 1065353216, killed $sgpr0_sgpr1, implicit $exec
+ $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.3:
+ successors: %bb.1
+
+ $vgpr5 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
+
+ bb.1:
+ successors: %bb.5, %bb.2
+
+ $vgpr5 = V_CVT_I32_F32_e32 killed $vgpr5, implicit $exec
+ V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec
+ $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
+ S_CBRANCH_VCCZ %bb.5, implicit killed $vcc
+
+ bb.2:
+ successors: %bb.4, %bb.3
+
+ V_CMP_EQ_U32_e32 9, killed $vgpr5, implicit-def $vcc, implicit $exec
+ $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
+ S_CBRANCH_VCCZ %bb.3, implicit killed $vcc
+
+ bb.4:
+ successors: %bb.3, %bb.1
+
+ $vgpr5 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
+ $vgpr4 = V_CVT_I32_F32_e32 $vgpr5, implicit $exec
+ V_CMP_EQ_U32_e32 2, killed $vgpr4, implicit-def $vcc, implicit $exec
+ $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
+ $vgpr4 = V_MOV_B32_e32 $vgpr5, implicit $exec, implicit $exec
+ S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
+ S_BRANCH %bb.3
+
+ bb.5:
+
+ $vgpr4 = V_MAC_F32_e32 killed $vgpr0, killed $vgpr3, killed $vgpr4, implicit $exec
+ EXP_DONE 12, killed $vgpr4, undef $vgpr0, undef $vgpr0, undef $vgpr0, 0, 0, 15, implicit $exec
+ S_ENDPGM
+...
OpenPOWER on IntegriCloud