diff options
author | Guozhi Wei <carrot@google.com> | 2019-06-14 23:08:59 +0000 |
---|---|---|
committer | Guozhi Wei <carrot@google.com> | 2019-06-14 23:08:59 +0000 |
commit | d2210af3322d8494e0518d113efa25e13a67987c (patch) | |
tree | 5a1b5a17c1fc7b85e204456e271d7469d42b914e /llvm/test/CodeGen/AMDGPU | |
parent | db88fc56b967da83a5fc1cb995cc765331a3e6cb (diff) | |
download | bcm5719-llvm-d2210af3322d8494e0518d113efa25e13a67987c.tar.gz bcm5719-llvm-d2210af3322d8494e0518d113efa25e13a67987c.zip |
[MBP] Move a latch block with conditional exit and multi predecessors to top of loop
Current findBestLoopTop can find and move one kind of block to top, a latch block has one successor. Another common case is:
* a latch block
* it has two successors, one is loop header, another is exit
* it has more than one predecessors
If it is below one of its predecessors P, only P can fall through to it, all other predecessors need a jump to it, and another conditional jump to loop header. If it is moved before loop header, all its predecessors jump to it, then fall through to loop header. So all its predecessors except P can reduce one taken branch.
Differential Revision: https://reviews.llvm.org/D43256
llvm-svn: 363471
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/collapse-endcf.ll | 9 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll | 41 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/global_smrd_cfg.ll | 21 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/hoist-cond.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/loop_break.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/madmk.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/multilevel-break.ll | 56 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/valu-i1.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/wqm.ll | 11 |
14 files changed, 100 insertions, 94 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll index a425bcf2bdf..6a8456d99bc 100644 --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll @@ -230,6 +230,11 @@ bb.end: ; preds = %bb.then, %bb ; Make sure scc liveness is updated if sor_b64 is removed ; ALL-LABEL: {{^}}scc_liveness: +; GCN: %bb10 +; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}} +; GCN: s_andn2_b64 +; GCN-NEXT: s_cbranch_execz + ; GCN: [[BB1_LOOP:BB[0-9]+_[0-9]+]]: ; GCN: s_andn2_b64 exec, exec, ; GCN-NEXT: s_cbranch_execnz [[BB1_LOOP]] @@ -240,10 +245,6 @@ bb.end: ; preds = %bb.then, %bb ; GCN-NOT: s_or_b64 exec, exec ; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}} -; GCN: s_andn2_b64 -; GCN-NEXT: s_cbranch_execnz - -; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}} ; GCN: buffer_store_dword ; GCN: buffer_store_dword ; GCN: buffer_store_dword diff --git a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll index 8d21050ebee..01cd11a32d8 100644 --- a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll +++ b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll @@ -19,38 +19,39 @@ define amdgpu_ps void @main(i32, float) { ; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: ; implicit-def: $sgpr2_sgpr3 ; CHECK-NEXT: ; implicit-def: $sgpr6_sgpr7 -; CHECK-NEXT: BB0_1: ; %loop +; CHECK-NEXT: s_branch BB0_3 +; CHECK-NEXT: BB0_1: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: ; implicit-def: $vgpr1 +; CHECK-NEXT: BB0_2: ; %Flow +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_and_b64 s[8:9], exec, s[6:7] +; CHECK-NEXT: s_or_b64 s[8:9], s[8:9], s[4:5] +; CHECK-NEXT: s_mov_b64 s[4:5], s[8:9] +; CHECK-NEXT: s_andn2_b64 exec, exec, s[8:9] +; CHECK-NEXT: s_cbranch_execz BB0_7 +; CHECK-NEXT: BB0_3: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 32, v1 ; CHECK-NEXT: s_and_b64 vcc, exec, vcc ; CHECK-NEXT: s_or_b64 s[6:7], s[6:7], exec ; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], exec -; CHECK-NEXT: s_cbranch_vccz BB0_5 -; CHECK-NEXT: ; %bb.2: ; %endif1 -; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: s_cbranch_vccz BB0_1 +; CHECK-NEXT: ; %bb.4: ; %endif1 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 ; CHECK-NEXT: s_mov_b64 s[6:7], -1 ; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[0:1] ; CHECK-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; CHECK-NEXT: ; mask branch BB0_4 -; CHECK-NEXT: BB0_3: ; %endif2 -; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: ; mask branch BB0_6 +; CHECK-NEXT: BB0_5: ; %endif2 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 ; CHECK-NEXT: v_add_u32_e32 v1, 1, v1 ; CHECK-NEXT: s_xor_b64 s[6:7], exec, -1 -; CHECK-NEXT: BB0_4: ; %Flow1 -; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: BB0_6: ; %Flow1 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 ; CHECK-NEXT: s_or_b64 exec, exec, s[8:9] ; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec -; CHECK-NEXT: s_branch BB0_6 -; CHECK-NEXT: BB0_5: ; in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: ; implicit-def: $vgpr1 -; CHECK-NEXT: BB0_6: ; %Flow -; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: s_and_b64 s[8:9], exec, s[6:7] -; CHECK-NEXT: s_or_b64 s[8:9], s[8:9], s[4:5] -; CHECK-NEXT: s_mov_b64 s[4:5], s[8:9] -; CHECK-NEXT: s_andn2_b64 exec, exec, s[8:9] -; CHECK-NEXT: s_cbranch_execnz BB0_1 -; CHECK-NEXT: ; %bb.7: ; %Flow2 +; CHECK-NEXT: s_branch BB0_2 +; CHECK-NEXT: BB0_7: ; %Flow2 ; CHECK-NEXT: s_or_b64 exec, exec, s[8:9] ; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; this is the divergent branch with the condition not marked as divergent diff --git a/llvm/test/CodeGen/AMDGPU/global_smrd_cfg.ll b/llvm/test/CodeGen/AMDGPU/global_smrd_cfg.ll index be6e3fd05ae..1a675ce57bc 100644 --- a/llvm/test/CodeGen/AMDGPU/global_smrd_cfg.ll +++ b/llvm/test/CodeGen/AMDGPU/global_smrd_cfg.ll @@ -1,27 +1,28 @@ ; RUN: llc -mtriple amdgcn--amdhsa -mcpu=fiji -amdgpu-scalarize-global-loads=true -verify-machineinstrs < %s | FileCheck %s -; CHECK-LABEL: %bb11 +; CHECK-LABEL: %bb22 -; Load from %arg in a Loop body has alias store +; Load from %arg has alias store in Loop ; CHECK: flat_load_dword -; CHECK-LABEL: %bb20 -; CHECK: flat_store_dword +; ##################################################################### + +; Load from %arg1 has no-alias store in Loop - arg1[i+1] never alias arg1[i] + +; CHECK: s_load_dword ; ##################################################################### -; CHECK-LABEL: %bb22 +; CHECK-LABEL: %bb11 -; Load from %arg has alias store in Loop +; Load from %arg in a Loop body has alias store ; CHECK: flat_load_dword -; ##################################################################### - -; Load from %arg1 has no-alias store in Loop - arg1[i+1] never alias arg1[i] +; CHECK-LABEL: %bb20 -; CHECK: s_load_dword +; CHECK: flat_store_dword define amdgpu_kernel void @cfg(i32 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) #0 { bb: diff --git a/llvm/test/CodeGen/AMDGPU/hoist-cond.ll b/llvm/test/CodeGen/AMDGPU/hoist-cond.ll index 6fe53e718f6..76a26882987 100644 --- a/llvm/test/CodeGen/AMDGPU/hoist-cond.ll +++ b/llvm/test/CodeGen/AMDGPU/hoist-cond.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck %s ; Check that invariant compare is hoisted out of the loop. ; At the same time condition shall not be serialized into a VGPR and deserialized later diff --git a/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll b/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll index ae78a1ecf32..a050bfe29bf 100644 --- a/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll +++ b/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll @@ -3,20 +3,20 @@ ; SI-LABEL: {{^}}i1_copy_from_loop: ; +; SI: ; %Flow +; SI-DAG: s_andn2_b64 [[LCSSA_ACCUM:s\[[0-9]+:[0-9]+\]]], [[LCSSA_ACCUM]], exec +; SI-DAG: s_and_b64 [[CC_MASK2:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM:s\[[0-9]+:[0-9]+\]]], exec +; SI: s_or_b64 [[LCSSA_ACCUM]], [[LCSSA_ACCUM]], [[CC_MASK2]] + ; SI: ; %for.body ; SI: v_cmp_gt_u32_e64 [[CC_SREG:s\[[0-9]+:[0-9]+\]]], 4, -; SI-DAG: s_andn2_b64 [[CC_ACCUM:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM]], exec +; SI-DAG: s_andn2_b64 [[CC_ACCUM]], [[CC_ACCUM]], exec ; SI-DAG: s_and_b64 [[CC_MASK:s\[[0-9]+:[0-9]+\]]], [[CC_SREG]], exec ; SI: s_or_b64 [[CC_ACCUM]], [[CC_ACCUM]], [[CC_MASK]] ; SI: ; %Flow1 ; SI: s_or_b64 [[CC_ACCUM]], [[CC_ACCUM]], exec -; SI: ; %Flow -; SI-DAG: s_andn2_b64 [[LCSSA_ACCUM:s\[[0-9]+:[0-9]+\]]], [[LCSSA_ACCUM]], exec -; SI-DAG: s_and_b64 [[CC_MASK2:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM]], exec -; SI: s_or_b64 [[LCSSA_ACCUM]], [[LCSSA_ACCUM]], [[CC_MASK2]] - ; SI: ; %for.end ; SI: s_and_saveexec_b64 {{s\[[0-9]+:[0-9]+\]}}, [[LCSSA_ACCUM]] diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll index dc7f495c42f..bde1cd5c435 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll @@ -630,12 +630,7 @@ define amdgpu_kernel void @insertelement_v16f32_or_index(<16 x float> addrspace( ; GCN-LABEL: {{^}}broken_phi_bb: ; GCN: v_mov_b32_e32 [[PHIREG:v[0-9]+]], 8 -; GCN: s_branch [[BB2:BB[0-9]+_[0-9]+]] - -; GCN: {{^BB[0-9]+_[0-9]+}}: -; GCN: s_mov_b64 exec, - -; GCN: [[BB2]]: +; GCN: [[BB2:BB[0-9]+_[0-9]+]]: ; GCN: v_cmp_le_i32_e32 vcc, s{{[0-9]+}}, [[PHIREG]] ; GCN: buffer_load_dword @@ -647,6 +642,11 @@ define amdgpu_kernel void @insertelement_v16f32_or_index(<16 x float> addrspace( ; IDXMODE: s_set_gpr_idx_off ; GCN: s_cbranch_execnz [[REGLOOP]] + +; GCN: {{^; %bb.[0-9]}}: +; GCN: s_mov_b64 exec, +; GCN: s_branch [[BB2]] + define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) #0 { bb: br label %bb2 diff --git a/llvm/test/CodeGen/AMDGPU/loop_break.ll b/llvm/test/CodeGen/AMDGPU/loop_break.ll index b4436fc42fa..fd634689fcd 100644 --- a/llvm/test/CodeGen/AMDGPU/loop_break.ll +++ b/llvm/test/CodeGen/AMDGPU/loop_break.ll @@ -1,5 +1,5 @@ ; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck -check-prefix=GCN %s ; Uses llvm.amdgcn.break diff --git a/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll b/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll index 486364acdb7..f374276aa60 100644 --- a/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll +++ b/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll @@ -61,9 +61,9 @@ loopexit: ; GCN-LABEL: {{^}}break_cond_is_arg: ; GCN: s_xor_b64 [[REG1:[^ ,]*]], {{[^ ,]*, -1$}} +; GCN: s_andn2_b64 exec, exec, [[REG3:[^ ,]*]] ; GCN: s_and_b64 [[REG2:[^ ,]*]], exec, [[REG1]] -; GCN: s_or_b64 [[REG3:[^ ,]*]], [[REG2]], -; GCN: s_andn2_b64 exec, exec, [[REG3]] +; GCN: s_or_b64 [[REG3]], [[REG2]], define void @break_cond_is_arg(i32 %arg, i1 %breakcond) { entry: diff --git a/llvm/test/CodeGen/AMDGPU/madmk.ll b/llvm/test/CodeGen/AMDGPU/madmk.ll index e06c5e8f485..28df8c8f3db 100644 --- a/llvm/test/CodeGen/AMDGPU/madmk.ll +++ b/llvm/test/CodeGen/AMDGPU/madmk.ll @@ -188,9 +188,9 @@ define amdgpu_kernel void @madmk_add_inline_imm_f32(float addrspace(1)* noalias } ; SI-LABEL: {{^}}kill_madmk_verifier_error: +; SI: s_or_b64 ; SI: s_xor_b64 ; SI: v_mac_f32_e32 {{v[0-9]+}}, 0x472aee8c, {{v[0-9]+}} -; SI: s_or_b64 define amdgpu_kernel void @kill_madmk_verifier_error() nounwind { bb: br label %bb2 diff --git a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll index d243233119b..0ae28c6ef79 100644 --- a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll +++ b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll @@ -24,13 +24,29 @@ ; GCN: ; %main_body ; GCN: s_mov_b64 [[LEFT_OUTER:s\[[0-9]+:[0-9]+\]]], 0{{$}} +; GCN: [[FLOW2:BB[0-9]+_[0-9]+]]: ; %Flow2 +; GCN: s_or_b64 exec, exec, [[TMP0:s\[[0-9]+:[0-9]+\]]] +; GCN: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_OUTER:s\[[0-9]+:[0-9]+\]]] +; GCN: s_or_b64 [[TMP1]], [[TMP1]], [[LEFT_OUTER]] +; GCN: s_mov_b64 [[LEFT_OUTER]], [[TMP1]] +; GCN: s_andn2_b64 exec, exec, [[TMP1]] +; GCN: s_cbranch_execz [[IF_BLOCK:BB[0-9]+_[0-9]+]] + ; GCN: [[OUTER_LOOP:BB[0-9]+_[0-9]+]]: ; %LOOP.outer{{$}} ; GCN: s_mov_b64 [[LEFT_INNER:s\[[0-9]+:[0-9]+\]]], 0{{$}} +; GCN: ; %Flow +; GCN: s_or_b64 exec, exec, [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]] +; GCN: s_and_b64 [[TMP0]], exec, [[BREAK_INNER:s\[[0-9]+:[0-9]+\]]] +; GCN: s_or_b64 [[TMP0]], [[TMP0]], [[LEFT_INNER]] +; GCN: s_mov_b64 [[LEFT_INNER]], [[TMP0]] +; GCN: s_andn2_b64 exec, exec, [[TMP0]] +; GCN: s_cbranch_execz [[FLOW2]] + ; GCN: [[INNER_LOOP:BB[0-9]+_[0-9]+]]: ; %LOOP{{$}} -; GCN: s_or_b64 [[BREAK_OUTER:s\[[0-9]+:[0-9]+\]]], [[BREAK_OUTER]], exec -; GCN: s_or_b64 [[BREAK_INNER:s\[[0-9]+:[0-9]+\]]], [[BREAK_INNER]], exec -; GCN: s_and_saveexec_b64 [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]], vcc +; GCN: s_or_b64 [[BREAK_OUTER]], [[BREAK_OUTER]], exec +; GCN: s_or_b64 [[BREAK_INNER]], [[BREAK_INNER]], exec +; GCN: s_and_saveexec_b64 [[SAVE_EXEC]], vcc ; FIXME: duplicate comparison ; GCN: ; %ENDIF @@ -43,23 +59,7 @@ ; GCN-DAG: s_or_b64 [[BREAK_OUTER]], [[BREAK_OUTER]], [[TMP_EQ]] ; GCN-DAG: s_or_b64 [[BREAK_INNER]], [[BREAK_INNER]], [[TMP_NE]] -; GCN: ; %Flow -; GCN: s_or_b64 exec, exec, [[SAVE_EXEC]] -; GCN: s_and_b64 [[TMP0:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_INNER]] -; GCN: s_or_b64 [[TMP0]], [[TMP0]], [[LEFT_INNER]] -; GCN: s_mov_b64 [[LEFT_INNER]], [[TMP0]] -; GCN: s_andn2_b64 exec, exec, [[TMP0]] -; GCN: s_cbranch_execnz [[INNER_LOOP]] - -; GCN: ; %Flow2 -; GCN: s_or_b64 exec, exec, [[TMP0]] -; GCN: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_OUTER]] -; GCN: s_or_b64 [[TMP1]], [[TMP1]], [[LEFT_OUTER]] -; GCN: s_mov_b64 [[LEFT_OUTER]], [[TMP1]] -; GCN: s_andn2_b64 exec, exec, [[TMP1]] -; GCN: s_cbranch_execnz [[OUTER_LOOP]] - -; GCN: ; %IF +; GCN: [[IF_BLOCK]]: ; %IF ; GCN-NEXT: s_endpgm define amdgpu_vs void @multi_else_break(<4 x float> %vec, i32 %ub, i32 %cont) { main_body: @@ -92,12 +92,18 @@ ENDIF: ; preds = %LOOP ; GCN-LABEL: {{^}}multi_if_break_loop: ; GCN: s_mov_b64 [[LEFT:s\[[0-9]+:[0-9]+\]]], 0{{$}} +; GCN: ; %Flow4 +; GCN: s_and_b64 [[BREAK:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK]] +; GCN: s_or_b64 [[LEFT]], [[BREAK]], [[OLD_LEFT:s\[[0-9]+:[0-9]+\]]] +; GCN: s_andn2_b64 exec, exec, [[LEFT]] +; GCN-NEXT: s_cbranch_execz + ; GCN: [[LOOP:BB[0-9]+_[0-9]+]]: ; %bb1{{$}} -; GCN: s_mov_b64 [[OLD_LEFT:s\[[0-9]+:[0-9]+\]]], [[LEFT]] +; GCN: s_mov_b64 [[OLD_LEFT]], [[LEFT]] ; GCN: ; %LeafBlock1 ; GCN: s_mov_b64 -; GCN: s_mov_b64 [[BREAK:s\[[0-9]+:[0-9]+\]]], -1{{$}} +; GCN: s_mov_b64 [[BREAK]], -1{{$}} ; GCN: ; %case1 ; GCN: buffer_load_dword [[LOAD2:v[0-9]+]], @@ -118,12 +124,6 @@ ENDIF: ; preds = %LOOP ; GCN-DAG: s_and_b64 [[TMP:s\[[0-9]+:[0-9]+\]]], vcc, exec ; GCN: s_or_b64 [[BREAK]], [[BREAK]], [[TMP]] -; GCN: ; %Flow4 -; GCN: s_and_b64 [[BREAK]], exec, [[BREAK]] -; GCN: s_or_b64 [[LEFT]], [[BREAK]], [[OLD_LEFT]] -; GCN: s_andn2_b64 exec, exec, [[LEFT]] -; GCN-NEXT: s_cbranch_execnz - define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 { bb: %id = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll index be5d8d47205..2be99267c4e 100644 --- a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll +++ b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll @@ -3,11 +3,11 @@ ; GCN-LABEL: {{^}}negated_cond: ; GCN: BB0_1: ; GCN: v_cmp_eq_u32_e64 [[CC:[^,]+]], -; GCN: BB0_2: +; GCN: BB0_3: ; GCN-NOT: v_cndmask_b32 ; GCN-NOT: v_cmp ; GCN: s_andn2_b64 vcc, exec, [[CC]] -; GCN: s_cbranch_vccnz BB0_4 +; GCN: s_cbranch_vccnz BB0_2 define amdgpu_kernel void @negated_cond(i32 addrspace(1)* %arg1) { bb: br label %bb1 @@ -36,11 +36,11 @@ bb4: ; GCN-LABEL: {{^}}negated_cond_dominated_blocks: ; GCN: v_cmp_eq_u32_e64 [[CC:[^,]+]], -; GCN: BB1_1: +; GCN: %bb4 ; GCN-NOT: v_cndmask_b32 ; GCN-NOT: v_cmp ; GCN: s_andn2_b64 vcc, exec, [[CC]] -; GCN: s_cbranch_vccz BB1_3 +; GCN: s_cbranch_vccnz BB1_1 define amdgpu_kernel void @negated_cond_dominated_blocks(i32 addrspace(1)* %arg1) { bb: br label %bb2 diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll index ad68d300de0..14928749e9f 100644 --- a/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll +++ b/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll @@ -96,20 +96,20 @@ declare float @llvm.fabs.f32(float) nounwind readnone ; FUNC-LABEL: {{^}}loop_land_info_assert: ; SI: v_cmp_lt_i32_e64 [[CMP4:s\[[0-9:]+\]]], s{{[0-9]+}}, 4{{$}} ; SI: s_and_b64 [[CMP4M:s\[[0-9]+:[0-9]+\]]], exec, [[CMP4]] -; SI: s_branch [[INFLOOP:BB[0-9]+_[0-9]+]] + +; SI: [[WHILELOOP:BB[0-9]+_[0-9]+]]: ; %while.cond +; SI: s_cbranch_vccz [[FOR_COND_PH:BB[0-9]+_[0-9]+]] ; SI: [[CONVEX_EXIT:BB[0-9_]+]] ; SI: s_mov_b64 vcc, ; SI-NEXT: s_cbranch_vccnz [[ENDPGM:BB[0-9]+_[0-9]+]] -; SI: s_cbranch_vccnz [[INFLOOP]] + +; SI: s_cbranch_vccnz [[WHILELOOP]] ; SI: ; %if.else ; SI: buffer_store_dword -; SI: [[INFLOOP]]: -; SI: s_cbranch_vccnz [[CONVEX_EXIT]] - -; SI: ; %for.cond.preheader +; SI: [[FOR_COND_PH]]: ; %for.cond.preheader ; SI: s_cbranch_vccz [[ENDPGM]] ; SI: [[ENDPGM]]: diff --git a/llvm/test/CodeGen/AMDGPU/valu-i1.ll b/llvm/test/CodeGen/AMDGPU/valu-i1.ll index 3a9970e78e3..718b1b5238f 100644 --- a/llvm/test/CodeGen/AMDGPU/valu-i1.ll +++ b/llvm/test/CodeGen/AMDGPU/valu-i1.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs -enable-misched -asm-verbose < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -verify-machineinstrs -enable-misched -asm-verbose -disable-block-placement < %s | FileCheck -check-prefix=SI %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll index 41ff30b4fdd..b827668950b 100644 --- a/llvm/test/CodeGen/AMDGPU/wqm.ll +++ b/llvm/test/CodeGen/AMDGPU/wqm.ll @@ -650,12 +650,15 @@ main_body: ; CHECK-DAG: v_mov_b32_e32 [[CTR:v[0-9]+]], 0 ; CHECK-DAG: s_mov_b32 [[SEVEN:s[0-9]+]], 0x40e00000 -; CHECK: [[LOOPHDR:BB[0-9]+_[0-9]+]]: ; %body -; CHECK: v_add_f32_e32 [[CTR]], 2.0, [[CTR]] +; CHECK: [[LOOPHDR:BB[0-9]+_[0-9]+]]: ; %loop ; CHECK: v_cmp_lt_f32_e32 vcc, [[SEVEN]], [[CTR]] -; CHECK: s_cbranch_vccz [[LOOPHDR]] -; CHECK: ; %break +; CHECK: s_cbranch_vccnz +; CHECK: ; %body +; CHECK: v_add_f32_e32 [[CTR]], 2.0, [[CTR]] +; CHECK: s_branch [[LOOPHDR]] + +; CHECK: ; %break ; CHECK: ; return define amdgpu_ps <4 x float> @test_loop_vcc(<4 x float> %in) nounwind { entry: |