summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU
diff options
context:
space:
mode:
authorNicolai Haehnle <nhaehnle@gmail.com>2018-10-31 13:26:48 +0000
committerNicolai Haehnle <nhaehnle@gmail.com>2018-10-31 13:26:48 +0000
commit28212cc6891559855d41066d68e64a84097bb749 (patch)
tree58796a00d00b99291d0b62e166d4dce0242cb6a6 /llvm/test/CodeGen/AMDGPU
parent2efccd2cf2804e2143c67c01bdfa44c5e3d887ac (diff)
downloadbcm5719-llvm-28212cc6891559855d41066d68e64a84097bb749.tar.gz
bcm5719-llvm-28212cc6891559855d41066d68e64a84097bb749.zip
AMDGPU: Remove PHI loop condition optimization
Summary: The optimization to early break out of loops if all threads are dead was never fully implemented. But the PHI node analyzing is actually causing a number of problems, so remove all the extra code for it. (This does actually regress code quality in a few places because it ends up relying more heavily on phi's of i1, which we don't do a great job with. However, since it fixes real bugs in the wild, we should take this change. I have some prototype changes to improve i1 lowering in general -- not just for control flow -- which should help recover the code quality, I just need to make those changes fit for general consumption. -- Nicolai) Change-Id: I6fc6c6c8961857ac6009fcfb9f7e5e48dc23fbb1 Patch-by: Christian König <christian.koenig@amd.com> Reviewers: arsenm, rampitec, tpr Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D53359 llvm-svn: 345718
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
-rw-r--r--llvm/test/CodeGen/AMDGPU/loop_break.ll70
-rw-r--r--llvm/test/CodeGen/AMDGPU/multilevel-break.ll29
-rw-r--r--llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll44
-rw-r--r--llvm/test/CodeGen/AMDGPU/valu-i1.ll10
4 files changed, 74 insertions, 79 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/loop_break.ll b/llvm/test/CodeGen/AMDGPU/loop_break.ll
index b2641cd4d2e..576950188d3 100644
--- a/llvm/test/CodeGen/AMDGPU/loop_break.ll
+++ b/llvm/test/CodeGen/AMDGPU/loop_break.ll
@@ -5,16 +5,17 @@
; OPT-LABEL: @break_loop(
; OPT: bb1:
-; OPT: call i64 @llvm.amdgcn.break(i64
+; OPT: icmp slt i32
; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
; OPT: bb4:
; OPT: load volatile
+; OPT: icmp slt i32
; OPT: xor i1 %cmp1
-; OPT: call i64 @llvm.amdgcn.if.break(
; OPT: br label %Flow
; OPT: Flow:
+; OPT: call i64 @llvm.amdgcn.if.break(
; OPT: call i1 @llvm.amdgcn.loop(i64
; OPT: br i1 %{{[0-9]+}}, label %bb9, label %bb1
@@ -23,21 +24,19 @@
; TODO: Can remove exec fixes in return block
; GCN-LABEL: {{^}}break_loop:
-; GCN: s_mov_b64 [[INITMASK:s\[[0-9]+:[0-9]+\]]], 0{{$}}
+; GCN: s_mov_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], 0{{$}}
; GCN: [[LOOP_ENTRY:BB[0-9]+_[0-9]+]]: ; %bb1
-; GCN: s_or_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec, [[INITMASK]]
; GCN: v_cmp_lt_i32_e32 vcc, -1
; GCN: s_and_b64 vcc, exec, vcc
-; GCN-NEXT: s_cbranch_vccnz [[FLOW:BB[0-9]+_[0-9]+]]
+; GCN: s_cbranch_vccnz [[FLOW:BB[0-9]+_[0-9]+]]
; GCN: ; %bb.2: ; %bb4
; GCN: buffer_load_dword
; GCN: v_cmp_ge_i32_e32 vcc,
-; GCN: s_or_b64 [[MASK]], vcc, [[INITMASK]]
; GCN: [[FLOW]]:
-; GCN: s_mov_b64 [[INITMASK]], [[MASK]]
+; GCN: s_or_b64 [[MASK]], vcc, [[MASK]]
; GCN: s_andn2_b64 exec, exec, [[MASK]]
; GCN-NEXT: s_cbranch_execnz [[LOOP_ENTRY]]
@@ -66,25 +65,26 @@ bb9:
; OPT-LABEL: @undef_phi_cond_break_loop(
; OPT: bb1:
-; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ]
+; OPT-NEXT: %phi.broken = phi i64 [ %0, %Flow ], [ 0, %bb ]
; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
-; OPT: %0 = call i64 @llvm.amdgcn.if.break(i1 undef, i64 %phi.broken)
+; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1
+; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0
; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
; OPT: bb4:
; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
-; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken)
; OPT-NEXT: br label %Flow
; OPT: Flow:
-; OPT-NEXT: %loop.phi = phi i64 [ %1, %bb4 ], [ %0, %bb1 ]
; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
-; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %loop.phi)
-; OPT-NEXT: br i1 %2, label %bb9, label %bb1
+; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ]
+; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break(i1 %tmp3, i64 %phi.broken)
+; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop(i64 %0)
+; OPT-NEXT: br i1 %1, label %bb9, label %bb1
; OPT: bb9: ; preds = %Flow
-; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi)
+; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %0)
; OPT-NEXT: store volatile i32 7
; OPT-NEXT: ret void
define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 {
@@ -119,25 +119,26 @@ bb9: ; preds = %Flow
; OPT-LABEL: @constexpr_phi_cond_break_loop(
; OPT: bb1:
-; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ]
+; OPT-NEXT: %phi.broken = phi i64 [ %0, %Flow ], [ 0, %bb ]
; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
-; OPT: %0 = call i64 @llvm.amdgcn.if.break(i1 icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), i64 %phi.broken)
+; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1
+; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0
; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
; OPT: bb4:
; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
-; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken)
; OPT-NEXT: br label %Flow
; OPT: Flow:
-; OPT-NEXT: %loop.phi = phi i64 [ %1, %bb4 ], [ %0, %bb1 ]
; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
-; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %loop.phi)
-; OPT-NEXT: br i1 %2, label %bb9, label %bb1
+; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ]
+; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break(i1 %tmp3, i64 %phi.broken)
+; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop(i64 %0)
+; OPT-NEXT: br i1 %1, label %bb9, label %bb1
; OPT: bb9: ; preds = %Flow
-; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi)
+; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %0)
; OPT-NEXT: store volatile i32 7
; OPT-NEXT: ret void
define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 {
@@ -169,25 +170,26 @@ bb9: ; preds = %Flow
; OPT-LABEL: @true_phi_cond_break_loop(
; OPT: bb1:
-; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ]
+; OPT-NEXT: %phi.broken = phi i64 [ %0, %Flow ], [ 0, %bb ]
; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
-; OPT: %0 = call i64 @llvm.amdgcn.break(i64 %phi.broken)
-; OPT: br i1 %cmp0, label %bb4, label %Flow
+; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1
+; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0
+; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
; OPT: bb4:
; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
-; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken)
; OPT-NEXT: br label %Flow
; OPT: Flow:
-; OPT-NEXT: %loop.phi = phi i64 [ %1, %bb4 ], [ %0, %bb1 ]
; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
-; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %loop.phi)
-; OPT-NEXT: br i1 %2, label %bb9, label %bb1
+; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
+; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break(i1 %tmp3, i64 %phi.broken)
+; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop(i64 %0)
+; OPT-NEXT: br i1 %1, label %bb9, label %bb1
; OPT: bb9: ; preds = %Flow
-; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi)
+; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %0)
; OPT-NEXT: store volatile i32 7
; OPT-NEXT: ret void
define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 {
@@ -219,7 +221,7 @@ bb9: ; preds = %Flow
; OPT-LABEL: @false_phi_cond_break_loop(
; OPT: bb1:
-; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ]
+; OPT-NEXT: %phi.broken = phi i64 [ %0, %Flow ], [ 0, %bb ]
; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
; OPT-NOT: call
; OPT: br i1 %cmp0, label %bb4, label %Flow
@@ -227,17 +229,17 @@ bb9: ; preds = %Flow
; OPT: bb4:
; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
-; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken)
; OPT-NEXT: br label %Flow
; OPT: Flow:
-; OPT-NEXT: %loop.phi = phi i64 [ %0, %bb4 ], [ %phi.broken, %bb1 ]
; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
-; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop(i64 %loop.phi)
+; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ]
+; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break(i1 %tmp3, i64 %phi.broken)
+; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop(i64 %0)
; OPT-NEXT: br i1 %1, label %bb9, label %bb1
; OPT: bb9: ; preds = %Flow
-; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi)
+; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %0)
; OPT-NEXT: store volatile i32 7
; OPT-NEXT: ret void
define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
index 216ca1973b5..c4e2f1e3487 100644
--- a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
+++ b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
@@ -10,11 +10,12 @@
;
; OPT: Flow:
;
-; Ensure two else.break calls, for both the inner and outer loops
+; Ensure two if.break calls, for both the inner and outer loops
-; OPT: call i64 @llvm.amdgcn.else.break(i64 [[if_exec]],
-; OPT-NEXT: call i64 @llvm.amdgcn.else.break(i64 [[if_exec]],
-; OPT-NEXT: call void @llvm.amdgcn.end.cf
+; OPT: call void @llvm.amdgcn.end.cf
+; OPT-NEXT: call i64 @llvm.amdgcn.if.break(i1
+; OPT-NEXT: call i1 @llvm.amdgcn.loop(i64
+; OPT-NEXT: call i64 @llvm.amdgcn.if.break(i1
;
; OPT: Flow1:
@@ -30,10 +31,9 @@
; Ensure extra or eliminated
; GCN-NEXT: s_or_b64 exec, exec, [[SAVE_BREAK]]
-; GCN-NEXT: s_mov_b64
-; GCN-NEXT: s_and_b64 [[MASKED_SAVE_BREAK:s\[[0-9]+:[0-9]+\]]], exec, [[SAVE_BREAK]]
-; GCN-NEXT: s_or_b64 [[OR_BREAK:s\[[0-9]+:[0-9]+\]]], [[MASKED_SAVE_BREAK]], s{{\[[0-9]+:[0-9]+\]}}
-; TODO: get rid of redundant loop counter moves
+; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}}
+; GCN-NEXT: s_or_b64 [[OR_BREAK:s\[[0-9]+:[0-9]+\]]], vcc, s{{\[[0-9]+:[0-9]+\]}}
+; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}}
; GCN-NEXT: v_mov_b32_e32
; GCN-NEXT: s_andn2_b64 exec, exec, [[OR_BREAK]]
; GCN-NEXT: s_cbranch_execnz [[INNER_LOOP]]
@@ -43,8 +43,9 @@
; Ensure copy is eliminated
; GCN-NEXT: s_or_b64 exec, exec, [[OR_BREAK]]
-; GCN-NEXT: s_and_b64 [[MASKED2_SAVE_BREAK:s\[[0-9]+:[0-9]+\]]], exec, [[SAVE_BREAK]]
+; GCN-NEXT: s_and_b64 [[MASKED2_SAVE_BREAK:s\[[0-9]+:[0-9]+\]]], exec, vcc
; GCN-NEXT: s_or_b64 [[OUTER_OR_BREAK:s\[[0-9]+:[0-9]+\]]], [[MASKED2_SAVE_BREAK]], s{{\[[0-9]+:[0-9]+\]}}
+; GCN-NEXT: s_mov_b64
; GCN-NEXT: v_mov_b32_e32
; GCN-NEXT: s_andn2_b64 exec, exec, [[OUTER_OR_BREAK]]
; GCN-NEXT: s_cbranch_execnz [[OUTER_LOOP]]
@@ -71,9 +72,8 @@ ENDIF: ; preds = %LOOP
}
; OPT-LABEL: define amdgpu_kernel void @multi_if_break_loop(
-; OPT: llvm.amdgcn.break
-; OPT: llvm.amdgcn.loop
; OPT: llvm.amdgcn.if.break
+; OPT: llvm.amdgcn.loop
; OPT: llvm.amdgcn.if.break
; OPT: llvm.amdgcn.end.cf
@@ -82,9 +82,10 @@ ENDIF: ; preds = %LOOP
; GCN: [[LOOP:BB[0-9]+_[0-9]+]]: ; %bb1{{$}}
-; Uses a copy intsead of an or
-; GCN: s_mov_b64 [[COPY:s\[[0-9]+:[0-9]+\]]], [[BREAK_REG]]
-; GCN: s_or_b64 [[BREAK_REG]], exec, [[BREAK_REG]]
+; GCN: s_or_b64 [[BREAK_REG]], vcc, [[BREAK_REG]]
+; GCN: s_andn2_b64 exec, exec, [[BREAK_REG]]
+; GCN-NEXT: s_cbranch_execnz
+
define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
bb:
%id = call i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll b/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll
index 8489a785310..a007c965f94 100644
--- a/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll
+++ b/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll
@@ -10,7 +10,7 @@
; IR-LABEL: @reduced_nested_loop_conditions(
; IR: bb5:
-; IR-NEXT: %phi.broken = phi i64 [ %loop.phi, %bb10 ], [ 0, %bb ]
+; IR-NEXT: %phi.broken = phi i64 [ %3, %bb10 ], [ 0, %bb ]
; IR-NEXT: %tmp6 = phi i32 [ 0, %bb ], [ %tmp11, %bb10 ]
; IR-NEXT: %tmp7 = icmp eq i32 %tmp6, 1
; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %tmp7)
@@ -19,25 +19,23 @@
; IR-NEXT: br i1 %1, label %bb8, label %Flow
; IR: bb8:
-; IR-NEXT: %3 = call i64 @llvm.amdgcn.break(i64 %phi.broken)
; IR-NEXT: br label %bb13
; IR: bb10:
-; IR-NEXT: %loop.phi = phi i64 [ %6, %Flow ]
-; IR-NEXT: %tmp11 = phi i32 [ %5, %Flow ]
-; IR-NEXT: %4 = call i1 @llvm.amdgcn.loop(i64 %loop.phi)
+; IR-NEXT: %tmp11 = phi i32 [ %6, %Flow ]
+; IR-NEXT: %tmp12 = phi i1 [ %5, %Flow ]
+; IR-NEXT: %3 = call i64 @llvm.amdgcn.if.break(i1 %tmp12, i64 %phi.broken)
+; IR-NEXT: %4 = call i1 @llvm.amdgcn.loop(i64 %3)
; IR-NEXT: br i1 %4, label %bb23, label %bb5
; IR: Flow:
-; IR-NEXT: %loop.phi1 = phi i64 [ %loop.phi2, %bb4 ], [ %phi.broken, %bb5 ]
-; IR-NEXT: %5 = phi i32 [ %tmp21, %bb4 ], [ undef, %bb5 ]
-; IR-NEXT: %6 = call i64 @llvm.amdgcn.else.break(i64 %2, i64 %loop.phi1)
+; IR-NEXT: %5 = phi i1 [ %tmp22, %bb4 ], [ true, %bb5 ]
+; IR-NEXT: %6 = phi i32 [ %tmp21, %bb4 ], [ undef, %bb5 ]
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %2)
; IR-NEXT: br label %bb10
; IR: bb13:
-; IR-NEXT: %loop.phi3 = phi i64 [ %loop.phi4, %bb3 ], [ %3, %bb8 ]
-; IR-NEXT: %tmp14 = phi i1 [ false, %bb3 ], [ true, %bb8 ]
+; IR-NEXT: %tmp14 = phi i1 [ %tmp22, %bb3 ], [ true, %bb8 ]
; IR-NEXT: %tmp15 = bitcast i64 %tmp2 to <2 x i32>
; IR-NEXT: br i1 %tmp14, label %bb16, label %bb20
@@ -48,13 +46,12 @@
; IR-NEXT: br label %bb20
; IR: bb20:
-; IR-NEXT: %loop.phi4 = phi i64 [ %phi.broken, %bb16 ], [ %phi.broken, %bb13 ]
-; IR-NEXT: %loop.phi2 = phi i64 [ %phi.broken, %bb16 ], [ %loop.phi3, %bb13 ]
; IR-NEXT: %tmp21 = phi i32 [ %tmp19, %bb16 ], [ 0, %bb13 ]
+; IR-NEXT: %tmp22 = phi i1 [ false, %bb16 ], [ %tmp14, %bb13 ]
; IR-NEXT: br label %bb9
; IR: bb23:
-; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi)
+; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %3)
; IR-NEXT: ret void
; GCN-LABEL: {{^}}reduced_nested_loop_conditions:
@@ -125,7 +122,7 @@ bb23: ; preds = %bb10
; IR: Flow3:
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %21)
-; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %13)
+; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %14)
; IR-NEXT: %1 = extractvalue { i1, i64 } %0, 0
; IR-NEXT: %2 = extractvalue { i1, i64 } %0, 1
; IR-NEXT: br i1 %1, label %bb4.bb13_crit_edge, label %Flow4
@@ -147,25 +144,24 @@ bb23: ; preds = %bb10
; IR-NEXT: %8 = call { i1, i64 } @llvm.amdgcn.if(i1 %tmp15)
; IR: Flow1:
-; IR-NEXT: %loop.phi = phi i64 [ %18, %bb21 ], [ %phi.broken, %bb14 ]
; IR-NEXT: %11 = phi <4 x i32> [ %tmp9, %bb21 ], [ undef, %bb14 ]
; IR-NEXT: %12 = phi i32 [ %tmp10, %bb21 ], [ undef, %bb14 ]
-; IR-NEXT: %13 = phi i1 [ %17, %bb21 ], [ false, %bb14 ]
-; IR-NEXT: %14 = phi i1 [ false, %bb21 ], [ true, %bb14 ]
-; IR-NEXT: %15 = call i64 @llvm.amdgcn.else.break(i64 %10, i64 %loop.phi)
+; IR-NEXT: %13 = phi i1 [ %18, %bb21 ], [ true, %bb14 ]
+; IR-NEXT: %14 = phi i1 [ %18, %bb21 ], [ false, %bb14 ]
+; IR-NEXT: %15 = phi i1 [ false, %bb21 ], [ true, %bb14 ]
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %10)
-; IR-NEXT: %16 = call i1 @llvm.amdgcn.loop(i64 %15)
-; IR-NEXT: br i1 %16, label %Flow2, label %bb14
+; IR-NEXT: %16 = call i64 @llvm.amdgcn.if.break(i1 %13, i64 %phi.broken)
+; IR-NEXT: %17 = call i1 @llvm.amdgcn.loop(i64 %16)
+; IR-NEXT: br i1 %17, label %Flow2, label %bb14
; IR: bb21:
; IR: %tmp12 = icmp slt i32 %tmp11, 9
-; IR-NEXT: %17 = xor i1 %tmp12, true
-; IR-NEXT: %18 = call i64 @llvm.amdgcn.if.break(i1 %17, i64 %phi.broken)
+; IR-NEXT: %18 = xor i1 %tmp12, true
; IR-NEXT: br label %Flow1
; IR: Flow2:
-; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %15)
-; IR-NEXT: %19 = call { i1, i64 } @llvm.amdgcn.if(i1 %14)
+; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %16)
+; IR-NEXT: %19 = call { i1, i64 } @llvm.amdgcn.if(i1 %15)
; IR-NEXT: %20 = extractvalue { i1, i64 } %19, 0
; IR-NEXT: %21 = extractvalue { i1, i64 } %19, 1
; IR-NEXT: br i1 %20, label %bb31.loopexit, label %Flow3
diff --git a/llvm/test/CodeGen/AMDGPU/valu-i1.ll b/llvm/test/CodeGen/AMDGPU/valu-i1.ll
index 58bd9a0cdef..3d980b749a9 100644
--- a/llvm/test/CodeGen/AMDGPU/valu-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/valu-i1.ll
@@ -212,20 +212,16 @@ exit:
; SI-DAG: v_cmp_ne_u32_e32 [[NEG1_CHECK_1:vcc]], -1, [[B]]
; SI: s_and_b64 [[ORNEG1:s\[[0-9]+:[0-9]+\]]], [[NEG1_CHECK_1]], [[NEG1_CHECK_0]]
; SI: s_and_saveexec_b64 [[ORNEG2:s\[[0-9]+:[0-9]+\]]], [[ORNEG1]]
-; SI: s_xor_b64 [[ORNEG3:s\[[0-9]+:[0-9]+\]]], exec, [[ORNEG2]]
; SI: s_cbranch_execz [[LABEL_FLOW:BB[0-9]+_[0-9]+]]
; SI: BB{{[0-9]+_[0-9]+}}: ; %bb20
; SI: buffer_store_dword
-; SI: v_cmp_ge_i64_e{{32|64}} [[CMP:s\[[0-9]+:[0-9]+\]|vcc]]
-; SI: s_or_b64 [[TMP:s\[[0-9]+:[0-9]+\]]], [[CMP]], [[COND_STATE]]
; SI: [[LABEL_FLOW]]:
; SI-NEXT: ; in Loop: Header=[[LABEL_LOOP]]
-; SI-NEXT: s_or_b64 exec, exec, [[ORNEG3]]
-; SI-NEXT: s_mov_b64 [[MOVED_TMP:s\[[0-9]+:[0-9]+\]]], [[TMP]]
-; SI-NEXT: s_and_b64 [[MASKED_ORNEG3:s\[[0-9]+:[0-9]+\]]], exec, [[ORNEG3]]
-; SI-NEXT: s_or_b64 [[COND_STATE]], [[MASKED_ORNEG3]], [[MOVED_TMP]]
+; SI-NEXT: s_or_b64 exec, exec, [[ORNEG2]]
+; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10
+; SI-NEXT: s_or_b64 [[COND_STATE]], vcc, [[COND_STATE]]
; SI-NEXT: s_andn2_b64 exec, exec, [[COND_STATE]]
; SI-NEXT: s_cbranch_execnz [[LABEL_LOOP]]
OpenPOWER on IntegriCloud