diff options
author | Chandler Carruth <chandlerc@gmail.com> | 2017-11-17 19:58:36 +0000 |
---|---|---|
committer | Chandler Carruth <chandlerc@gmail.com> | 2017-11-17 19:58:36 +0000 |
commit | 693eedb13833245e2670308fa0e6fe47324ce553 (patch) | |
tree | e408fadaf8485061d9a799510d803634ead8a5b5 /llvm/test/Transforms/SimpleLoopUnswitch | |
parent | 5e80bdebd25f59be7aca4eb34dba8e69b31f8bfb (diff) | |
download | bcm5719-llvm-693eedb13833245e2670308fa0e6fe47324ce553.tar.gz bcm5719-llvm-693eedb13833245e2670308fa0e6fe47324ce553.zip |
[PM/Unswitch] Teach SimpleLoopUnswitch to do non-trivial unswitching,
making it no longer even remotely simple.
The pass will now be more of a "full loop unswitching" pass rather than
anything substantively simpler than any other approach. I plan to rename
it accordingly once the dust settles.
The key ideas of the new loop unswitcher are carried over for
non-trivial unswitching:
1) Fully unswitch a branch or switch instruction from inside of a loop to
outside of it.
2) Update the CFG and IR. This avoids needing to "remember" the
unswitched branches as well as avoiding excessively cloning and
reliance on complex parts of simplify-cfg to cleanup the cfg.
3) Update the analyses (where we can) rather than just blowing them away
or relying on something else updating them.
Sadly, #3 is somewhat compromised here as the dominator tree updates
were too complex for me to want to reason about. I will need to make
another attempt to do this now that we have a nice dynamic update API
for dominators. However, we do adhere to #3 w.r.t. LoopInfo.
This approach also adds an important principls specific to non-trivial
unswitching: not *all* of the loop will be duplicated when unswitching.
This fact allows us to compute the cost in terms of how much *duplicate*
code is inserted rather than just on raw size. Unswitching conditions
which essentialy partition loops will work regardless of the total loop
size.
Some remaining issues that I will be addressing in subsequent commits:
- Handling unstructured control flow.
- Unswitching 'switch' cases instead of just branches.
- Moving to the dynamic update API for dominators.
Some high-level, interesting limitationsV that folks might want to push
on as follow-ups but that I don't have any immediate plans around:
- We could be much more clever about not cloning things that will be
deleted. In fact, we should be able to delete *nothing* and do
a minimal number of clones.
- There are many more interesting selection criteria for which branch to
unswitch that we might want to look at. One that I'm interested in
particularly are a set of conditions which all exit the loop and which
can be merged into a single unswitched test of them.
Differential revision: https://reviews.llvm.org/D34200
llvm-svn: 318549
Diffstat (limited to 'llvm/test/Transforms/SimpleLoopUnswitch')
3 files changed, 2878 insertions, 19 deletions
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/2006-06-27-DeadSwitchCase.ll b/llvm/test/Transforms/SimpleLoopUnswitch/2006-06-27-DeadSwitchCase.ll index 04067eb05c8..61825b6c1d8 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/2006-06-27-DeadSwitchCase.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/2006-06-27-DeadSwitchCase.ll @@ -2,24 +2,30 @@ define void @init_caller_save() { entry: - br label %cond_true78 -cond_next20: ; preds = %cond_true64 - br label %bb31 -bb31: ; preds = %cond_true64, %cond_true64, %cond_next20 - %iftmp.29.1 = phi i32 [ 0, %cond_next20 ], [ 0, %cond_true64 ], [ 0, %cond_true64 ] ; <i32> [#uses=0] - br label %bb54 -bb54: ; preds = %cond_true78, %bb31 - br i1 false, label %bb75, label %cond_true64 -cond_true64: ; preds = %bb54 - switch i32 %i.0.0, label %cond_next20 [ - i32 17, label %bb31 - i32 18, label %bb31 - ] -bb75: ; preds = %bb54 - %tmp74.0 = add i32 %i.0.0, 1 ; <i32> [#uses=1] - br label %cond_true78 -cond_true78: ; preds = %bb75, %entry - %i.0.0 = phi i32 [ 0, %entry ], [ %tmp74.0, %bb75 ] ; <i32> [#uses=2] - br label %bb54 + br label %cond_true78 + +cond_true78: ; preds = %bb75, %entry + %i.0.0 = phi i32 [ 0, %entry ], [ %tmp74.0, %bb75 ] ; <i32> [#uses=2] + br label %bb54 + +bb54: ; preds = %cond_true78, %bb31 + br i1 false, label %bb75, label %cond_true64 + +cond_true64: ; preds = %bb54 + switch i32 %i.0.0, label %cond_next20 [ + i32 17, label %bb31 + i32 18, label %bb31 + ] + +cond_next20: ; preds = %cond_true64 + br label %bb31 + +bb31: ; preds = %cond_true64, %cond_true64, %cond_next20 + %iftmp.29.1 = phi i32 [ 0, %cond_next20 ], [ 0, %cond_true64 ], [ 0, %cond_true64 ] ; <i32> [#uses=0] + br label %bb54 + +bb75: ; preds = %bb54 + %tmp74.0 = add i32 %i.0.0, 1 ; <i32> [#uses=1] + br label %cond_true78 } diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-cost.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-cost.ll new file mode 100644 index 00000000000..2d98f148d46 --- /dev/null +++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-cost.ll @@ -0,0 +1,501 @@ +; Specifically exercise the cost modeling for non-trivial loop unswitching. +; +; RUN: opt -passes='loop(unswitch),verify<loops>' -enable-nontrivial-unswitch -unswitch-threshold=5 -S < %s | FileCheck %s +; RUN: opt -simple-loop-unswitch -enable-nontrivial-unswitch -unswitch-threshold=5 -S < %s | FileCheck %s + +declare void @a() +declare void @b() +declare void @x() + +; First establish enough code size in the duplicated 'loop_begin' block to +; suppress unswitching. +define void @test_no_unswitch(i1* %ptr, i1 %cond) { +; CHECK-LABEL: @test_no_unswitch( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br label %loop_begin +; +; We shouldn't have unswitched into any other block either. +; CHECK-NOT: br i1 %cond + +loop_begin: + call void @x() + call void @x() + call void @x() + call void @x() + br i1 %cond, label %loop_a, label %loop_b +; CHECK: loop_begin: +; CHECK-NEXT: call void @x() +; CHECK-NEXT: call void @x() +; CHECK-NEXT: call void @x() +; CHECK-NEXT: call void @x() +; CHECK-NEXT: br i1 %cond, label %loop_a, label %loop_b + +loop_a: + call void @a() + br label %loop_latch + +loop_b: + call void @b() + br label %loop_latch + +loop_latch: + %v = load i1, i1* %ptr + br i1 %v, label %loop_begin, label %loop_exit + +loop_exit: + ret void +} + +; Now check that the smaller formulation of 'loop_begin' does in fact unswitch +; with our low threshold. +define void @test_unswitch(i1* %ptr, i1 %cond) { +; CHECK-LABEL: @test_unswitch( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 %cond, label %entry.split.us, label %entry.split + +loop_begin: + call void @x() + br i1 %cond, label %loop_a, label %loop_b + +loop_a: + call void @a() + br label %loop_latch +; The 'loop_a' unswitched loop. +; +; CHECK: entry.split.us: +; CHECK-NEXT: br label %loop_begin.us +; +; CHECK: loop_begin.us: +; CHECK-NEXT: call void @x() +; CHECK-NEXT: br label %loop_a.us +; +; CHECK: loop_a.us: +; CHECK-NEXT: call void @a() +; CHECK-NEXT: br label %loop_latch.us +; +; CHECK: loop_latch.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us +; +; CHECK: loop_exit.split.us: +; CHECK-NEXT: br label %loop_exit + +loop_b: + call void @b() + br label %loop_latch +; The 'loop_b' unswitched loop. +; +; CHECK: entry.split: +; CHECK-NEXT: br label %loop_begin +; +; CHECK: loop_begin: +; CHECK-NEXT: call void @x() +; CHECK-NEXT: br label %loop_b +; +; CHECK: loop_b: +; CHECK-NEXT: call void @b() +; CHECK-NEXT: br label %loop_latch +; +; CHECK: loop_latch: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit.split +; +; CHECK: loop_exit.split: +; CHECK-NEXT: br label %loop_exit + +loop_latch: + %v = load i1, i1* %ptr + br i1 %v, label %loop_begin, label %loop_exit + +loop_exit: + ret void +; CHECK: loop_exit: +; CHECK-NEXT: ret void +} + +; Check that even with large amounts of code on either side of the unswitched +; branch, if that code would be kept in only one of the unswitched clones it +; doesn't contribute to the cost. +define void @test_unswitch_non_dup_code(i1* %ptr, i1 %cond) { +; CHECK-LABEL: @test_unswitch_non_dup_code( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 %cond, label %entry.split.us, label %entry.split + +loop_begin: + call void @x() + br i1 %cond, label %loop_a, label %loop_b + +loop_a: + call void @a() + call void @a() + call void @a() + call void @a() + br label %loop_latch +; The 'loop_a' unswitched loop. +; +; CHECK: entry.split.us: +; CHECK-NEXT: br label %loop_begin.us +; +; CHECK: loop_begin.us: +; CHECK-NEXT: call void @x() +; CHECK-NEXT: br label %loop_a.us +; +; CHECK: loop_a.us: +; CHECK-NEXT: call void @a() +; CHECK-NEXT: call void @a() +; CHECK-NEXT: call void @a() +; CHECK-NEXT: call void @a() +; CHECK-NEXT: br label %loop_latch.us +; +; CHECK: loop_latch.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us +; +; CHECK: loop_exit.split.us: +; CHECK-NEXT: br label %loop_exit + +loop_b: + call void @b() + call void @b() + call void @b() + call void @b() + br label %loop_latch +; The 'loop_b' unswitched loop. +; +; CHECK: entry.split: +; CHECK-NEXT: br label %loop_begin +; +; CHECK: loop_begin: +; CHECK-NEXT: call void @x() +; CHECK-NEXT: br label %loop_b +; +; CHECK: loop_b: +; CHECK-NEXT: call void @b() +; CHECK-NEXT: call void @b() +; CHECK-NEXT: call void @b() +; CHECK-NEXT: call void @b() +; CHECK-NEXT: br label %loop_latch +; +; CHECK: loop_latch: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit.split +; +; CHECK: loop_exit.split: +; CHECK-NEXT: br label %loop_exit + +loop_latch: + %v = load i1, i1* %ptr + br i1 %v, label %loop_begin, label %loop_exit + +loop_exit: + ret void +; CHECK: loop_exit: +; CHECK-NEXT: ret void +} + +; Much like with non-duplicated code directly in the successor, we also won't +; duplicate even interesting CFGs. +define void @test_unswitch_non_dup_code_in_cfg(i1* %ptr, i1 %cond) { +; CHECK-LABEL: @test_unswitch_non_dup_code_in_cfg( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 %cond, label %entry.split.us, label %entry.split + +loop_begin: + call void @x() + br i1 %cond, label %loop_a, label %loop_b + +loop_a: + %v1 = load i1, i1* %ptr + br i1 %v1, label %loop_a_a, label %loop_a_b + +loop_a_a: + call void @a() + br label %loop_latch + +loop_a_b: + call void @a() + br label %loop_latch +; The 'loop_a' unswitched loop. +; +; CHECK: entry.split.us: +; CHECK-NEXT: br label %loop_begin.us +; +; CHECK: loop_begin.us: +; CHECK-NEXT: call void @x() +; CHECK-NEXT: br label %loop_a.us +; +; CHECK: loop_a.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_a_a.us, label %loop_a_b.us +; +; CHECK: loop_a_b.us: +; CHECK-NEXT: call void @a() +; CHECK-NEXT: br label %loop_latch.us +; +; CHECK: loop_a_a.us: +; CHECK-NEXT: call void @a() +; CHECK-NEXT: br label %loop_latch.us +; +; CHECK: loop_latch.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us +; +; CHECK: loop_exit.split.us: +; CHECK-NEXT: br label %loop_exit + +loop_b: + %v2 = load i1, i1* %ptr + br i1 %v2, label %loop_b_a, label %loop_b_b + +loop_b_a: + call void @b() + br label %loop_latch + +loop_b_b: + call void @b() + br label %loop_latch +; The 'loop_b' unswitched loop. +; +; CHECK: entry.split: +; CHECK-NEXT: br label %loop_begin +; +; CHECK: loop_begin: +; CHECK-NEXT: call void @x() +; CHECK-NEXT: br label %loop_b +; +; CHECK: loop_b: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_b_a, label %loop_b_b +; +; CHECK: loop_b_a: +; CHECK-NEXT: call void @b() +; CHECK-NEXT: br label %loop_latch +; +; CHECK: loop_b_b: +; CHECK-NEXT: call void @b() +; CHECK-NEXT: br label %loop_latch +; +; CHECK: loop_latch: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit.split +; +; CHECK: loop_exit.split: +; CHECK-NEXT: br label %loop_exit + +loop_latch: + %v3 = load i1, i1* %ptr + br i1 %v3, label %loop_begin, label %loop_exit + +loop_exit: + ret void +; CHECK: loop_exit: +; CHECK-NEXT: ret void +} + +; Check that even if there is *some* non-duplicated code on one side of an +; unswitch, we don't count any other code in the loop that will in fact have to +; be duplicated. +define void @test_no_unswitch_non_dup_code(i1* %ptr, i1 %cond) { +; CHECK-LABEL: @test_no_unswitch_non_dup_code( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br label %loop_begin +; +; We shouldn't have unswitched into any other block either. +; CHECK-NOT: br i1 %cond + +loop_begin: + call void @x() + br i1 %cond, label %loop_a, label %loop_b +; CHECK: loop_begin: +; CHECK-NEXT: call void @x() +; CHECK-NEXT: br i1 %cond, label %loop_a, label %loop_b + +loop_a: + %v1 = load i1, i1* %ptr + br i1 %v1, label %loop_a_a, label %loop_a_b + +loop_a_a: + call void @a() + br label %loop_latch + +loop_a_b: + call void @a() + br label %loop_latch + +loop_b: + %v2 = load i1, i1* %ptr + br i1 %v2, label %loop_b_a, label %loop_b_b + +loop_b_a: + call void @b() + br label %loop_latch + +loop_b_b: + call void @b() + br label %loop_latch + +loop_latch: + call void @x() + call void @x() + %v = load i1, i1* %ptr + br i1 %v, label %loop_begin, label %loop_exit + +loop_exit: + ret void +} + +; Check that we still unswitch when the exit block contains lots of code, even +; though we do clone the exit block as part of unswitching. This should work +; because we should split the exit block before anything inside it. +define void @test_unswitch_large_exit(i1* %ptr, i1 %cond) { +; CHECK-LABEL: @test_unswitch_large_exit( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 %cond, label %entry.split.us, label %entry.split + +loop_begin: + call void @x() + br i1 %cond, label %loop_a, label %loop_b + +loop_a: + call void @a() + br label %loop_latch +; The 'loop_a' unswitched loop. +; +; CHECK: entry.split.us: +; CHECK-NEXT: br label %loop_begin.us +; +; CHECK: loop_begin.us: +; CHECK-NEXT: call void @x() +; CHECK-NEXT: br label %loop_a.us +; +; CHECK: loop_a.us: +; CHECK-NEXT: call void @a() +; CHECK-NEXT: br label %loop_latch.us +; +; CHECK: loop_latch.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us +; +; CHECK: loop_exit.split.us: +; CHECK-NEXT: br label %loop_exit + +loop_b: + call void @b() + br label %loop_latch +; The 'loop_b' unswitched loop. +; +; CHECK: entry.split: +; CHECK-NEXT: br label %loop_begin +; +; CHECK: loop_begin: +; CHECK-NEXT: call void @x() +; CHECK-NEXT: br label %loop_b +; +; CHECK: loop_b: +; CHECK-NEXT: call void @b() +; CHECK-NEXT: br label %loop_latch +; +; CHECK: loop_latch: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit.split +; +; CHECK: loop_exit.split: +; CHECK-NEXT: br label %loop_exit + +loop_latch: + %v = load i1, i1* %ptr + br i1 %v, label %loop_begin, label %loop_exit + +loop_exit: + call void @x() + call void @x() + call void @x() + call void @x() + ret void +; CHECK: loop_exit: +; CHECK-NEXT: call void @x() +; CHECK-NEXT: call void @x() +; CHECK-NEXT: call void @x() +; CHECK-NEXT: call void @x() +; CHECK-NEXT: ret void +} + +; Check that we handle a dedicated exit edge unswitch which is still +; non-trivial and has lots of code in the exit. +define void @test_unswitch_dedicated_exiting(i1* %ptr, i1 %cond) { +; CHECK-LABEL: @test_unswitch_dedicated_exiting( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 %cond, label %entry.split.us, label %entry.split + +loop_begin: + call void @x() + br i1 %cond, label %loop_a, label %loop_b_exit + +loop_a: + call void @a() + br label %loop_latch +; The 'loop_a' unswitched loop. +; +; CHECK: entry.split.us: +; CHECK-NEXT: br label %loop_begin.us +; +; CHECK: loop_begin.us: +; CHECK-NEXT: call void @x() +; CHECK-NEXT: br label %loop_a.us +; +; CHECK: loop_a.us: +; CHECK-NEXT: call void @a() +; CHECK-NEXT: br label %loop_latch.us +; +; CHECK: loop_latch.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us +; +; CHECK: loop_exit.split.us: +; CHECK-NEXT: br label %loop_exit + +loop_b_exit: + call void @b() + call void @b() + call void @b() + call void @b() + ret void +; The 'loop_b_exit' unswitched exit path. +; +; CHECK: entry.split: +; CHECK-NEXT: br label %loop_begin +; +; CHECK: loop_begin: +; CHECK-NEXT: call void @x() +; CHECK-NEXT: br label %loop_b_exit +; +; CHECK: loop_b_exit: +; CHECK-NEXT: call void @b() +; CHECK-NEXT: call void @b() +; CHECK-NEXT: call void @b() +; CHECK-NEXT: call void @b() +; CHECK-NEXT: ret void + +loop_latch: + %v = load i1, i1* %ptr + br i1 %v, label %loop_begin, label %loop_exit + +loop_exit: + ret void +; CHECK: loop_exit: +; CHECK-NEXT: ret void +} diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll new file mode 100644 index 00000000000..51cdebe70e0 --- /dev/null +++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll @@ -0,0 +1,2352 @@ +; RUN: opt -passes='loop(unswitch),verify<loops>' -enable-nontrivial-unswitch -S < %s | FileCheck %s +; RUN: opt -simple-loop-unswitch -enable-nontrivial-unswitch -S < %s | FileCheck %s + +declare void @a() +declare void @b() +declare void @c() +declare void @d() + +declare void @sink1(i32) +declare void @sink2(i32) + +; Negative test: we cannot unswitch convergent calls. +define void @test_no_unswitch_convergent(i1* %ptr, i1 %cond) { +; CHECK-LABEL: @test_no_unswitch_convergent( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br label %loop_begin +; +; We shouldn't have unswitched into any other block either. +; CHECK-NOT: br i1 %cond + +loop_begin: + br i1 %cond, label %loop_a, label %loop_b +; CHECK: loop_begin: +; CHECK-NEXT: br i1 %cond, label %loop_a, label %loop_b + +loop_a: + call void @a() convergent + br label %loop_latch + +loop_b: + call void @b() + br label %loop_latch + +loop_latch: + %v = load i1, i1* %ptr + br i1 %v, label %loop_begin, label %loop_exit + +loop_exit: + ret void +} + +; Negative test: we cannot unswitch noduplicate calls. +define void @test_no_unswitch_noduplicate(i1* %ptr, i1 %cond) { +; CHECK-LABEL: @test_no_unswitch_noduplicate( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br label %loop_begin +; +; We shouldn't have unswitched into any other block either. +; CHECK-NOT: br i1 %cond + +loop_begin: + br i1 %cond, label %loop_a, label %loop_b +; CHECK: loop_begin: +; CHECK-NEXT: br i1 %cond, label %loop_a, label %loop_b + +loop_a: + call void @a() noduplicate + br label %loop_latch + +loop_b: + call void @b() + br label %loop_latch + +loop_latch: + %v = load i1, i1* %ptr + br i1 %v, label %loop_begin, label %loop_exit + +loop_exit: + ret void +} + +declare i32 @__CxxFrameHandler3(...) + +; Negative test: we cannot unswitch when tokens are used across blocks as we +; might introduce PHIs. +define void @test_no_unswitch_cross_block_token(i1* %ptr, i1 %cond) nounwind personality i32 (...)* @__CxxFrameHandler3 { +; CHECK-LABEL: @test_no_unswitch_cross_block_token( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br label %loop_begin +; +; We shouldn't have unswitched into any other block either. +; CHECK-NOT: br i1 %cond + +loop_begin: + br i1 %cond, label %loop_a, label %loop_b +; CHECK: loop_begin: +; CHECK-NEXT: br i1 %cond, label %loop_a, label %loop_b + +loop_a: + call void @a() + br label %loop_cont + +loop_b: + call void @b() + br label %loop_cont + +loop_cont: + invoke void @a() + to label %loop_latch unwind label %loop_catch + +loop_latch: + br label %loop_begin + +loop_catch: + %catch = catchswitch within none [label %loop_catch_latch, label %loop_exit] unwind to caller + +loop_catch_latch: + %catchpad_latch = catchpad within %catch [] + catchret from %catchpad_latch to label %loop_begin + +loop_exit: + %catchpad_exit = catchpad within %catch [] + catchret from %catchpad_exit to label %exit + +exit: + ret void +} + + +; Non-trivial loop unswitching where there are two distinct trivial conditions +; to unswitch within the loop. +define i32 @test1(i1* %ptr, i1 %cond1, i1 %cond2) { +; CHECK-LABEL: @test1( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 %cond1, label %entry.split.us, label %entry.split + +loop_begin: + br i1 %cond1, label %loop_a, label %loop_b + +loop_a: + call void @a() + br label %latch +; The 'loop_a' unswitched loop. +; +; CHECK: entry.split.us: +; CHECK-NEXT: br label %loop_begin.us +; +; CHECK: loop_begin.us: +; CHECK-NEXT: br label %loop_a.us +; +; CHECK: loop_a.us: +; CHECK-NEXT: call void @a() +; CHECK-NEXT: br label %latch.us +; +; CHECK: latch.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us +; +; CHECK: loop_exit.split.us: +; CHECK-NEXT: br label %loop_exit + +loop_b: + br i1 %cond2, label %loop_b_a, label %loop_b_b +; The second unswitched condition. +; +; CHECK: entry.split: +; CHECK-NEXT: br i1 %cond2, label %entry.split.split.us, label %entry.split.split + +loop_b_a: + call void @b() + br label %latch +; The 'loop_b_a' unswitched loop. +; +; CHECK: entry.split.split.us: +; CHECK-NEXT: br label %loop_begin.us1 +; +; CHECK: loop_begin.us1: +; CHECK-NEXT: br label %loop_b.us +; +; CHECK: loop_b.us: +; CHECK-NEXT: br label %loop_b_a.us +; +; CHECK: loop_b_a.us: +; CHECK-NEXT: call void @b() +; CHECK-NEXT: br label %latch.us2 +; +; CHECK: latch.us2: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us1, label %loop_exit.split.split.us +; +; CHECK: loop_exit.split.split.us: +; CHECK-NEXT: br label %loop_exit.split + +loop_b_b: + call void @c() + br label %latch +; The 'loop_b_b' unswitched loop. +; +; CHECK: entry.split.split: +; CHECK-NEXT: br label %loop_begin +; +; CHECK: loop_begin: +; CHECK-NEXT: br label %loop_b +; +; CHECK: loop_b: +; CHECK-NEXT: br label %loop_b_b +; +; CHECK: loop_b_b: +; CHECK-NEXT: call void @c() +; CHECK-NEXT: br label %latch +; +; CHECK: latch: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit.split.split +; +; CHECK: loop_exit.split.split: +; CHECK-NEXT: br label %loop_exit.split + +latch: + %v = load i1, i1* %ptr + br i1 %v, label %loop_begin, label %loop_exit + +loop_exit: + ret i32 0 +; CHECK: loop_exit.split: +; CHECK-NEXT: br label %loop_exit +; +; CHECK: loop_exit: +; CHECK-NEXT: ret +} + +define i32 @test2(i1* %ptr, i1 %cond1, i32* %a.ptr, i32* %b.ptr, i32* %c.ptr) { +; CHECK-LABEL: @test2( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 %cond1, label %entry.split.us, label %entry.split + +loop_begin: + %v = load i1, i1* %ptr + br i1 %cond1, label %loop_a, label %loop_b + +loop_a: + %a = load i32, i32* %a.ptr + %ac = load i32, i32* %c.ptr + br i1 %v, label %loop_begin, label %loop_exit +; The 'loop_a' unswitched loop. +; +; CHECK: entry.split.us: +; CHECK-NEXT: br label %loop_begin.us +; +; CHECK: loop_begin.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br label %loop_a.us +; +; CHECK: loop_a.us: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: %[[AC:.*]] = load i32, i32* %c.ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin.backedge.us, label %loop_exit.split.us +; +; CHECK: loop_exit.split.us: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_a.us ] +; CHECK-NEXT: %[[AC_LCSSA:.*]] = phi i32 [ %[[AC]], %loop_a.us ] +; CHECK-NEXT: br label %loop_exit + +loop_b: + %b = load i32, i32* %b.ptr + %bc = load i32, i32* %c.ptr + br i1 %v, label %loop_begin, label %loop_exit +; The 'loop_b' unswitched loop. +; +; CHECK: entry.split: +; CHECK-NEXT: br label %loop_begin +; +; CHECK: loop_begin: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br label %loop_b +; +; CHECK: loop_b: +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: %[[BC:.*]] = load i32, i32* %c.ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin.backedge, label %loop_exit.split +; +; CHECK: loop_exit.split: +; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %loop_b ] +; CHECK-NEXT: %[[BC_LCSSA:.*]] = phi i32 [ %[[BC]], %loop_b ] +; CHECK-NEXT: br label %loop_exit + +loop_exit: + %ab.phi = phi i32 [ %a, %loop_a ], [ %b, %loop_b ] + %c.phi = phi i32 [ %ac, %loop_a ], [ %bc, %loop_b ] + %result = add i32 %ab.phi, %c.phi + ret i32 %result +; CHECK: loop_exit: +; CHECK-NEXT: %[[AB_PHI:.*]] = phi i32 [ %[[B_LCSSA]], %loop_exit.split ], [ %[[A_LCSSA]], %loop_exit.split.us ] +; CHECK-NEXT: %[[C_PHI:.*]] = phi i32 [ %[[BC_LCSSA]], %loop_exit.split ], [ %[[AC_LCSSA]], %loop_exit.split.us ] +; CHECK-NEXT: %[[RESULT:.*]] = add i32 %[[AB_PHI]], %[[C_PHI]] +; CHECK-NEXT: ret i32 %[[RESULT]] +} + +; Test a non-trivial unswitch of an exiting edge to an exit block with other +; in-loop predecessors. +define i32 @test3a(i1* %ptr, i1 %cond1, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test3a( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 %cond1, label %entry.split.us, label %entry.split + +loop_begin: + %v = load i1, i1* %ptr + %a = load i32, i32* %a.ptr + br i1 %cond1, label %loop_exit, label %loop_b +; The 'loop_exit' clone. +; +; CHECK: entry.split.us: +; CHECK-NEXT: br label %loop_begin.us +; +; CHECK: loop_begin.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %loop_exit.split.us +; +; CHECK: loop_exit.split.us: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_begin.us ] +; CHECK-NEXT: br label %loop_exit + +loop_b: + %b = load i32, i32* %b.ptr + br i1 %v, label %loop_begin, label %loop_exit +; The 'loop_b' unswitched loop. +; +; CHECK: entry.split: +; CHECK-NEXT: br label %loop_begin +; +; CHECK: loop_begin: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %loop_b +; +; CHECK: loop_b: +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit.split +; +; CHECK: loop_exit.split: +; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %loop_b ] +; CHECK-NEXT: br label %loop_exit + +loop_exit: + %ab.phi = phi i32 [ %a, %loop_begin ], [ %b, %loop_b ] + ret i32 %ab.phi +; CHECK: loop_exit: +; CHECK-NEXT: %[[AB_PHI:.*]] = phi i32 [ %[[B_LCSSA]], %loop_exit.split ], [ %[[A_LCSSA]], %loop_exit.split.us ] +; CHECK-NEXT: ret i32 %[[AB_PHI]] +} + +; Test a non-trivial unswitch of an exiting edge to an exit block with other +; in-loop predecessors. This is the same as @test3a but with the reversed order +; of successors so that the exiting edge is *not* the cloned edge. +define i32 @test3b(i1* %ptr, i1 %cond1, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test3b( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 %cond1, label %entry.split.us, label %entry.split + +loop_begin: + %v = load i1, i1* %ptr + %a = load i32, i32* %a.ptr + br i1 %cond1, label %loop_b, label %loop_exit +; The 'loop_b' unswitched loop. +; +; CHECK: entry.split.us: +; CHECK-NEXT: br label %loop_begin.us +; +; CHECK: loop_begin.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %loop_b.us +; +; CHECK: loop_b.us: +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us +; +; CHECK: loop_exit.split.us: +; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %loop_b.us ] +; CHECK-NEXT: br label %loop_exit + +loop_b: + %b = load i32, i32* %b.ptr + br i1 %v, label %loop_begin, label %loop_exit +; The 'loop_b' unswitched loop. +; +; CHECK: entry.split: +; CHECK-NEXT: br label %loop_begin +; +; CHECK: loop_begin: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %loop_exit.split +; +; CHECK: loop_exit.split: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_begin ] +; CHECK-NEXT: br label %loop_exit + +loop_exit: + %ab.phi = phi i32 [ %b, %loop_b ], [ %a, %loop_begin ] + ret i32 %ab.phi +; CHECK: loop_exit: +; CHECK-NEXT: %[[AB_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.split ], [ %[[B_LCSSA]], %loop_exit.split.us ] +; CHECK-NEXT: ret i32 %[[AB_PHI]] +} + +; Test a non-trivial unswitch of an exiting edge to an exit block with no other +; in-loop predecessors. +define void @test4a(i1* %ptr, i1 %cond1, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test4a( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 %cond1, label %entry.split.us, label %entry.split + +loop_begin: + %v = load i1, i1* %ptr + %a = load i32, i32* %a.ptr + br i1 %cond1, label %loop_exit1, label %loop_b +; The 'loop_exit' clone. +; +; CHECK: entry.split.us: +; CHECK-NEXT: br label %loop_begin.us +; +; CHECK: loop_begin.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %loop_exit1.split.us +; +; CHECK: loop_exit1.split.us: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_begin.us ] +; CHECK-NEXT: br label %loop_exit1 + +loop_b: + %b = load i32, i32* %b.ptr + br i1 %v, label %loop_begin, label %loop_exit2 +; The 'loop_b' unswitched loop. +; +; CHECK: entry.split: +; CHECK-NEXT: br label %loop_begin +; +; CHECK: loop_begin: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %loop_b +; +; CHECK: loop_b: +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit2 + +loop_exit1: + %a.phi = phi i32 [ %a, %loop_begin ] + call void @sink1(i32 %a.phi) + ret void +; CHECK: loop_exit1: +; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit1.split.us ] +; CHECK-NEXT: call void @sink1(i32 %[[A_PHI]]) +; CHECK-NEXT: ret void + +loop_exit2: + %b.phi = phi i32 [ %b, %loop_b ] + call void @sink2(i32 %b.phi) + ret void +; CHECK: loop_exit2: +; CHECK-NEXT: %[[B_PHI:.*]] = phi i32 [ %[[B]], %loop_b ] +; CHECK-NEXT: call void @sink2(i32 %[[B_PHI]]) +; CHECK-NEXT: ret void +} + +; Test a non-trivial unswitch of an exiting edge to an exit block with no other +; in-loop predecessors. This is the same as @test4a but with the edges reversed +; so that the exiting edge is *not* the cloned edge. +define void @test4b(i1* %ptr, i1 %cond1, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test4b( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 %cond1, label %entry.split.us, label %entry.split + +loop_begin: + %v = load i1, i1* %ptr + %a = load i32, i32* %a.ptr + br i1 %cond1, label %loop_b, label %loop_exit1 +; The 'loop_b' clone. +; +; CHECK: entry.split.us: +; CHECK-NEXT: br label %loop_begin.us +; +; CHECK: loop_begin.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %loop_b.us +; +; CHECK: loop_b.us: +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit2.split.us +; +; CHECK: loop_exit2.split.us: +; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %loop_b.us ] +; CHECK-NEXT: br label %loop_exit2 + +loop_b: + %b = load i32, i32* %b.ptr + br i1 %v, label %loop_begin, label %loop_exit2 +; The 'loop_exit' unswitched path. +; +; CHECK: entry.split: +; CHECK-NEXT: br label %loop_begin +; +; CHECK: loop_begin: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %loop_exit1 + +loop_exit1: + %a.phi = phi i32 [ %a, %loop_begin ] + call void @sink1(i32 %a.phi) + ret void +; CHECK: loop_exit1: +; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A]], %loop_begin ] +; CHECK-NEXT: call void @sink1(i32 %[[A_PHI]]) +; CHECK-NEXT: ret void + +loop_exit2: + %b.phi = phi i32 [ %b, %loop_b ] + call void @sink2(i32 %b.phi) + ret void +; CHECK: loop_exit2: +; CHECK-NEXT: %[[B_PHI:.*]] = phi i32 [ %[[B_LCSSA]], %loop_exit2.split.us ] +; CHECK-NEXT: call void @sink2(i32 %[[B_PHI]]) +; CHECK-NEXT: ret void +} + +; Test a non-trivial unswitch of an exiting edge to an exit block with no other +; in-loop predecessors. This is the same as @test4a but with a common merge +; block after the independent loop exits. This requires a different structural +; update to the dominator tree. +define void @test4c(i1* %ptr, i1 %cond1, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test4c( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 %cond1, label %entry.split.us, label %entry.split + +loop_begin: + %v = load i1, i1* %ptr + %a = load i32, i32* %a.ptr + br i1 %cond1, label %loop_exit1, label %loop_b +; The 'loop_exit' clone. +; +; CHECK: entry.split.us: +; CHECK-NEXT: br label %loop_begin.us +; +; CHECK: loop_begin.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %loop_exit1.split.us +; +; CHECK: loop_exit1.split.us: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_begin.us ] +; CHECK-NEXT: br label %loop_exit1 + +loop_b: + %b = load i32, i32* %b.ptr + br i1 %v, label %loop_begin, label %loop_exit2 +; The 'loop_b' unswitched loop. +; +; CHECK: entry.split: +; CHECK-NEXT: br label %loop_begin +; +; CHECK: loop_begin: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %loop_b +; +; CHECK: loop_b: +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit2 + +loop_exit1: + %a.phi = phi i32 [ %a, %loop_begin ] + call void @sink1(i32 %a.phi) + br label %exit +; CHECK: loop_exit1: +; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit1.split.us ] +; CHECK-NEXT: call void @sink1(i32 %[[A_PHI]]) +; CHECK-NEXT: br label %exit + +loop_exit2: + %b.phi = phi i32 [ %b, %loop_b ] + call void @sink2(i32 %b.phi) + br label %exit +; CHECK: loop_exit2: +; CHECK-NEXT: %[[B_PHI:.*]] = phi i32 [ %[[B]], %loop_b ] +; CHECK-NEXT: call void @sink2(i32 %[[B_PHI]]) +; CHECK-NEXT: br label %exit + +exit: + ret void +; CHECK: exit: +; CHECK-NEXT: ret void +} + +; Test that we can unswitch a condition out of multiple layers of a loop nest. +define i32 @test5(i1* %ptr, i1 %cond1, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test5( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 %cond1, label %loop_begin.split.us, label %entry.split +; +; CHECK: entry.split: +; CHECK-NEXT: br label %loop_begin +; +; CHECK: loop_begin: +; CHECK-NEXT: br label %loop_begin.split + +loop_begin: + br label %inner_loop_begin + +inner_loop_begin: + %v = load i1, i1* %ptr + %a = load i32, i32* %a.ptr + br i1 %cond1, label %loop_exit, label %inner_loop_b +; The 'loop_exit' clone. +; +; CHECK: loop_begin.split.us: +; CHECK-NEXT: br label %inner_loop_begin.us +; +; CHECK: inner_loop_begin.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %loop_exit.loopexit.split.us +; +; CHECK: loop_exit.loopexit.split.us: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %inner_loop_begin.us ] +; CHECK-NEXT: br label %loop_exit + +inner_loop_b: + %b = load i32, i32* %b.ptr + br i1 %v, label %inner_loop_begin, label %loop_latch +; The 'inner_loop_b' unswitched loop. +; +; CHECK: loop_begin.split: +; CHECK-NEXT: br label %inner_loop_begin +; +; CHECK: inner_loop_begin: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %inner_loop_b +; +; CHECK: inner_loop_b: +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_begin, label %loop_latch + +loop_latch: + %b.phi = phi i32 [ %b, %inner_loop_b ] + %v2 = load i1, i1* %ptr + br i1 %v2, label %loop_begin, label %loop_exit +; CHECK: loop_latch: +; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %inner_loop_b ] +; CHECK-NEXT: %[[V2:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V2]], label %loop_begin, label %loop_exit.loopexit1 + +loop_exit: + %ab.phi = phi i32 [ %a, %inner_loop_begin ], [ %b.phi, %loop_latch ] + ret i32 %ab.phi +; CHECK: loop_exit.loopexit: +; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.loopexit.split.us ] +; CHECK-NEXT: br label %loop_exit +; +; CHECK: loop_exit.loopexit1: +; CHECK-NEXT: %[[B_PHI:.*]] = phi i32 [ %[[B_LCSSA]], %loop_latch ] +; CHECK-NEXT: br label %loop_exit +; +; CHECK: loop_exit: +; CHECK-NEXT: %[[AB_PHI:.*]] = phi i32 [ %[[A_PHI]], %loop_exit.loopexit ], [ %[[B_PHI]], %loop_exit.loopexit1 ] +; CHECK-NEXT: ret i32 %[[AB_PHI]] +} + +; Test that we can unswitch a condition where we end up only cloning some of +; the nested loops and needing to delete some of the nested loops. +define i32 @test6(i1* %ptr, i1 %cond1, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test6( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 %cond1, label %entry.split.us, label %entry.split + +loop_begin: + %v = load i1, i1* %ptr + br i1 %cond1, label %loop_a, label %loop_b + +loop_a: + br label %loop_a_inner + +loop_a_inner: + %va = load i1, i1* %ptr + %a = load i32, i32* %a.ptr + br i1 %va, label %loop_a_inner, label %loop_a_inner_exit + +loop_a_inner_exit: + %a.lcssa = phi i32 [ %a, %loop_a_inner ] + br label %latch +; The 'loop_a' cloned loop. +; +; CHECK: entry.split.us: +; CHECK-NEXT: br label %loop_begin.us +; +; CHECK: loop_begin.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br label %loop_a.us +; +; CHECK: loop_a.us: +; CHECK-NEXT: br label %loop_a_inner.us +; +; CHECK: loop_a_inner.us +; CHECK-NEXT: %[[VA:.*]] = load i1, i1* %ptr +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br i1 %[[VA]], label %loop_a_inner.us, label %loop_a_inner_exit.us +; +; CHECK: loop_a_inner_exit.us: +; CHECK-NEXT: %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A]], %loop_a_inner.us ] +; CHECK-NEXT: br label %latch.us +; +; CHECK: latch.us: +; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA]], %loop_a_inner_exit.us ] +; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us +; +; CHECK: loop_exit.split.us: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_PHI]], %latch.us ] +; CHECK-NEXT: br label %loop_exit + +loop_b: + br label %loop_b_inner + +loop_b_inner: + %vb = load i1, i1* %ptr + %b = load i32, i32* %b.ptr + br i1 %vb, label %loop_b_inner, label %loop_b_inner_exit + +loop_b_inner_exit: + %b.lcssa = phi i32 [ %b, %loop_b_inner ] + br label %latch + +latch: + %ab.phi = phi i32 [ %a.lcssa, %loop_a_inner_exit ], [ %b.lcssa, %loop_b_inner_exit ] + br i1 %v, label %loop_begin, label %loop_exit +; The 'loop_b' unswitched loop. +; +; CHECK: entry.split: +; CHECK-NEXT: br label %loop_begin +; +; CHECK: loop_begin: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br label %loop_b +; +; CHECK: loop_b: +; CHECK-NEXT: br label %loop_b_inner +; +; CHECK: loop_b_inner +; CHECK-NEXT: %[[VB:.*]] = load i1, i1* %ptr +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: br i1 %[[VB]], label %loop_b_inner, label %loop_b_inner_exit +; +; CHECK: loop_b_inner_exit: +; CHECK-NEXT: %[[B_INNER_LCSSA:.*]] = phi i32 [ %[[B]], %loop_b_inner ] +; CHECK-NEXT: br label %latch +; +; CHECK: latch: +; CHECK-NEXT: %[[B_PHI:.*]] = phi i32 [ %[[B_INNER_LCSSA]], %loop_b_inner_exit ] +; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit.split +; +; CHECK: loop_exit.split: +; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B_PHI]], %latch ] +; CHECK-NEXT: br label %loop_exit + +loop_exit: + %ab.lcssa = phi i32 [ %ab.phi, %latch ] + ret i32 %ab.lcssa +; CHECK: loop_exit: +; CHECK-NEXT: %[[AB_PHI:.*]] = phi i32 [ %[[B_LCSSA]], %loop_exit.split ], [ %[[A_LCSSA]], %loop_exit.split.us ] +; CHECK-NEXT: ret i32 %[[AB_PHI]] +} + +; Test that when unswitching a deeply nested loop condition in a way that +; produces a non-loop clone that can reach multiple exit blocks which are part +; of different outer loops we correctly divide the cloned loop blocks between +; the outer loops based on reachability. +define i32 @test7a(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test7a( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br label %loop_begin + +loop_begin: + %a = load i32, i32* %a.ptr + br label %inner_loop_begin +; CHECK: loop_begin: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %inner_loop_begin + +inner_loop_begin: + %a.phi = phi i32 [ %a, %loop_begin ], [ %a2, %inner_inner_loop_exit ] + %cond = load i1, i1* %cond.ptr + %b = load i32, i32* %b.ptr + br label %inner_inner_loop_begin +; CHECK: inner_loop_begin: +; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A]], %loop_begin ], [ %[[A2:.*]], %inner_inner_loop_exit ] +; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: br i1 %[[COND]], label %inner_loop_begin.split.us, label %inner_loop_begin.split + +inner_inner_loop_begin: + %v1 = load i1, i1* %ptr + br i1 %v1, label %inner_inner_loop_a, label %inner_inner_loop_b + +inner_inner_loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %loop_exit, label %inner_inner_loop_c + +inner_inner_loop_b: + %v3 = load i1, i1* %ptr + br i1 %v3, label %inner_inner_loop_exit, label %inner_inner_loop_c + +inner_inner_loop_c: + %v4 = load i1, i1* %ptr + br i1 %v4, label %inner_loop_exit, label %inner_inner_loop_d + +inner_inner_loop_d: + br i1 %cond, label %inner_loop_exit, label %inner_inner_loop_begin +; The cloned copy that always exits with the adjustments required to fix up +; loop exits. +; +; CHECK: inner_loop_begin.split.us: +; CHECK-NEXT: br label %inner_inner_loop_begin.us +; +; CHECK: inner_inner_loop_begin.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a.us, label %inner_inner_loop_b.us +; +; CHECK: inner_inner_loop_b.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_exit.split.us, label %inner_inner_loop_c.us.loopexit +; +; CHECK: inner_inner_loop_a.us: +; CHECK-NEXT: %[[A_NEW_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_begin.us ] +; CHECK-NEXT: %[[B_NEW_LCSSA:.*]] = phi i32 [ %[[B]], %inner_inner_loop_begin.us ] +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split.us, label %inner_inner_loop_c.us +; +; CHECK: inner_inner_loop_c.us.loopexit: +; CHECK-NEXT: br label %inner_inner_loop_c.us +; +; CHECK: inner_inner_loop_c.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit.split.us, label %inner_inner_loop_d.us +; +; CHECK: inner_inner_loop_d.us: +; CHECK-NEXT: br label %inner_loop_exit.loopexit.split +; +; CHECK: inner_inner_loop_exit.split.us: +; CHECK-NEXT: br label %inner_inner_loop_exit +; +; CHECK: loop_exit.split.us: +; CHECK-NEXT: %[[A_LCSSA_US:.*]] = phi i32 [ %[[A_NEW_LCSSA]], %inner_inner_loop_a.us ] +; CHECK-NEXT: %[[B_LCSSA_US:.*]] = phi i32 [ %[[B_NEW_LCSSA]], %inner_inner_loop_a.us ] +; CHECK-NEXT: br label %loop_exit +; +; CHECK: inner_loop_exit.loopexit.split.us: +; CHECK-NEXT: br label %inner_loop_exit.loopexit +; +; The original copy that continues to loop. +; +; CHECK: inner_loop_begin.split: +; CHECK-NEXT: br label %inner_inner_loop_begin +; +; CHECK: inner_inner_loop_begin: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a, label %inner_inner_loop_b +; +; CHECK: inner_inner_loop_a: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split, label %inner_inner_loop_c +; +; CHECK: inner_inner_loop_b: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_exit.split, label %inner_inner_loop_c +; +; CHECK: inner_inner_loop_c: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit.split, label %inner_inner_loop_d +; +; CHECK: inner_inner_loop_d: +; CHECK-NEXT: br label %inner_inner_loop_begin +; +; CHECK: inner_inner_loop_exit.split: +; CHECK-NEXT: br label %inner_inner_loop_exit + +inner_inner_loop_exit: + %a2 = load i32, i32* %a.ptr + %v5 = load i1, i1* %ptr + br i1 %v5, label %inner_loop_exit, label %inner_loop_begin +; CHECK: inner_inner_loop_exit: +; CHECK-NEXT: %[[A2]] = load i32, i32* %a.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit1, label %inner_loop_begin + +inner_loop_exit: + br label %loop_begin +; CHECK: inner_loop_exit.loopexit.split: +; CHECK-NEXT: br label %inner_loop_exit.loopexit +; +; CHECK: inner_loop_exit.loopexit: +; CHECK-NEXT: br label %inner_loop_exit +; +; CHECK: inner_loop_exit.loopexit1: +; CHECK-NEXT: br label %inner_loop_exit +; +; CHECK: inner_loop_exit: +; CHECK-NEXT: br label %loop_begin + +loop_exit: + %a.lcssa = phi i32 [ %a.phi, %inner_inner_loop_a ] + %b.lcssa = phi i32 [ %b, %inner_inner_loop_a ] + %result = add i32 %a.lcssa, %b.lcssa + ret i32 %result +; CHECK: loop_exit.split: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_a ] +; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %inner_inner_loop_a ] +; CHECK-NEXT: br label %loop_exit +; +; CHECK: loop_exit: +; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.split ], [ %[[A_LCSSA_US]], %loop_exit.split.us ] +; CHECK-NEXT: %[[B_PHI:.*]] = phi i32 [ %[[B_LCSSA]], %loop_exit.split ], [ %[[B_LCSSA_US]], %loop_exit.split.us ] +; CHECK-NEXT: %[[RESULT:.*]] = add i32 %[[A_PHI]], %[[B_PHI]] +; CHECK-NEXT: ret i32 %[[RESULT]] +} + +; Same pattern as @test7a but here the original loop becomes a non-loop that +; can reach multiple exit blocks which are part of different outer loops. +define i32 @test7b(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test7b( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br label %loop_begin + +loop_begin: + %a = load i32, i32* %a.ptr + br label %inner_loop_begin +; CHECK: loop_begin: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %inner_loop_begin + +inner_loop_begin: + %a.phi = phi i32 [ %a, %loop_begin ], [ %a2, %inner_inner_loop_exit ] + %cond = load i1, i1* %cond.ptr + %b = load i32, i32* %b.ptr + br label %inner_inner_loop_begin +; CHECK: inner_loop_begin: +; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A]], %loop_begin ], [ %[[A2:.*]], %inner_inner_loop_exit ] +; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: br i1 %[[COND]], label %inner_loop_begin.split.us, label %inner_loop_begin.split + +inner_inner_loop_begin: + %v1 = load i1, i1* %ptr + br i1 %v1, label %inner_inner_loop_a, label %inner_inner_loop_b + +inner_inner_loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %loop_exit, label %inner_inner_loop_c + +inner_inner_loop_b: + %v3 = load i1, i1* %ptr + br i1 %v3, label %inner_inner_loop_exit, label %inner_inner_loop_c + +inner_inner_loop_c: + %v4 = load i1, i1* %ptr + br i1 %v4, label %inner_loop_exit, label %inner_inner_loop_d + +inner_inner_loop_d: + br i1 %cond, label %inner_inner_loop_begin, label %inner_loop_exit +; The cloned copy that continues looping. +; +; CHECK: inner_loop_begin.split.us: +; CHECK-NEXT: br label %inner_inner_loop_begin.us +; +; CHECK: inner_inner_loop_begin.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a.us, label %inner_inner_loop_b.us +; +; CHECK: inner_inner_loop_b.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_exit.split.us, label %inner_inner_loop_c.us +; +; CHECK: inner_inner_loop_a.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split.us, label %inner_inner_loop_c.us +; +; CHECK: inner_inner_loop_c.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit.split.us, label %inner_inner_loop_d.us +; +; CHECK: inner_inner_loop_d.us: +; CHECK-NEXT: br label %inner_inner_loop_begin.us +; +; CHECK: inner_inner_loop_exit.split.us: +; CHECK-NEXT: br label %inner_inner_loop_exit +; +; CHECK: loop_exit.split.us: +; CHECK-NEXT: %[[A_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_a.us ] +; CHECK-NEXT: %[[B_LCSSA_US:.*]] = phi i32 [ %[[B]], %inner_inner_loop_a.us ] +; CHECK-NEXT: br label %loop_exit +; +; CHECK: inner_loop_exit.loopexit.split.us: +; CHECK-NEXT: br label %inner_loop_exit.loopexit +; +; The original copy that now always exits and needs adjustments for exit +; blocks. +; +; CHECK: inner_loop_begin.split: +; CHECK-NEXT: br label %inner_inner_loop_begin +; +; CHECK: inner_inner_loop_begin: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a, label %inner_inner_loop_b +; +; CHECK: inner_inner_loop_a: +; CHECK-NEXT: %[[A_NEW_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_begin ] +; CHECK-NEXT: %[[B_NEW_LCSSA:.*]] = phi i32 [ %[[B]], %inner_inner_loop_begin ] +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split, label %inner_inner_loop_c +; +; CHECK: inner_inner_loop_b: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_exit.split, label %inner_inner_loop_c.loopexit +; +; CHECK: inner_inner_loop_c.loopexit: +; CHECK-NEXT: br label %inner_inner_loop_c +; +; CHECK: inner_inner_loop_c: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit.split, label %inner_inner_loop_d +; +; CHECK: inner_inner_loop_d: +; CHECK-NEXT: br label %inner_loop_exit.loopexit.split +; +; CHECK: inner_inner_loop_exit.split: +; CHECK-NEXT: br label %inner_inner_loop_exit + +inner_inner_loop_exit: + %a2 = load i32, i32* %a.ptr + %v5 = load i1, i1* %ptr + br i1 %v5, label %inner_loop_exit, label %inner_loop_begin +; CHECK: inner_inner_loop_exit: +; CHECK-NEXT: %[[A2]] = load i32, i32* %a.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit1, label %inner_loop_begin + +inner_loop_exit: + br label %loop_begin +; CHECK: inner_loop_exit.loopexit.split: +; CHECK-NEXT: br label %inner_loop_exit.loopexit +; +; CHECK: inner_loop_exit.loopexit: +; CHECK-NEXT: br label %inner_loop_exit +; +; CHECK: inner_loop_exit.loopexit1: +; CHECK-NEXT: br label %inner_loop_exit +; +; CHECK: inner_loop_exit: +; CHECK-NEXT: br label %loop_begin + +loop_exit: + %a.lcssa = phi i32 [ %a.phi, %inner_inner_loop_a ] + %b.lcssa = phi i32 [ %b, %inner_inner_loop_a ] + %result = add i32 %a.lcssa, %b.lcssa + ret i32 %result +; CHECK: loop_exit.split: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_NEW_LCSSA]], %inner_inner_loop_a ] +; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B_NEW_LCSSA]], %inner_inner_loop_a ] +; CHECK-NEXT: br label %loop_exit +; +; CHECK: loop_exit: +; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.split ], [ %[[A_LCSSA_US]], %loop_exit.split.us ] +; CHECK-NEXT: %[[B_PHI:.*]] = phi i32 [ %[[B_LCSSA]], %loop_exit.split ], [ %[[B_LCSSA_US]], %loop_exit.split.us ] +; CHECK-NEXT: %[[RESULT:.*]] = add i32 %[[A_PHI]], %[[B_PHI]] +; CHECK-NEXT: ret i32 %[[RESULT]] +} + +; Test that when the exit block set of an inner loop changes to start at a less +; high level of the loop nest we correctly hoist the loop up the nest. +define i32 @test8a(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test8a( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br label %loop_begin + +loop_begin: + %a = load i32, i32* %a.ptr + br label %inner_loop_begin +; CHECK: loop_begin: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %inner_loop_begin + +inner_loop_begin: + %a.phi = phi i32 [ %a, %loop_begin ], [ %a2, %inner_inner_loop_exit ] + %cond = load i1, i1* %cond.ptr + %b = load i32, i32* %b.ptr + br label %inner_inner_loop_begin +; CHECK: inner_loop_begin: +; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A]], %loop_begin ], [ %[[A2:.*]], %inner_inner_loop_exit ] +; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: br i1 %[[COND]], label %inner_loop_begin.split.us, label %inner_loop_begin.split + +inner_inner_loop_begin: + %v1 = load i1, i1* %ptr + br i1 %v1, label %inner_inner_loop_a, label %inner_inner_loop_b + +inner_inner_loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %inner_inner_loop_latch, label %inner_loop_exit + +inner_inner_loop_b: + br i1 %cond, label %inner_inner_loop_latch, label %inner_inner_loop_exit + +inner_inner_loop_latch: + br label %inner_inner_loop_begin +; The cloned region is now an exit from the inner loop. +; +; CHECK: inner_loop_begin.split.us: +; CHECK-NEXT: %[[A_INNER_INNER_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_begin ] +; CHECK-NEXT: br label %inner_inner_loop_begin.us +; +; CHECK: inner_inner_loop_begin.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a.us, label %inner_inner_loop_b.us +; +; CHECK: inner_inner_loop_b.us: +; CHECK-NEXT: br label %inner_inner_loop_latch.us +; +; CHECK: inner_inner_loop_a.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_latch.us, label %inner_loop_exit.loopexit.split.us +; +; CHECK: inner_inner_loop_latch.us: +; CHECK-NEXT: br label %inner_inner_loop_begin.us +; +; CHECK: inner_loop_exit.loopexit.split.us: +; CHECK-NEXT: %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_INNER_LCSSA]], %inner_inner_loop_a.us ] +; CHECK-NEXT: br label %inner_loop_exit.loopexit +; +; The original region exits the loop earlier. +; +; CHECK: inner_loop_begin.split: +; CHECK-NEXT: br label %inner_inner_loop_begin +; +; CHECK: inner_inner_loop_begin: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a, label %inner_inner_loop_b +; +; CHECK: inner_inner_loop_a: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_latch, label %inner_loop_exit.loopexit.split +; +; CHECK: inner_inner_loop_b: +; CHECK-NEXT: br label %inner_inner_loop_exit +; +; CHECK: inner_inner_loop_latch: +; CHECK-NEXT: br label %inner_inner_loop_begin + +inner_inner_loop_exit: + %a2 = load i32, i32* %a.ptr + %v4 = load i1, i1* %ptr + br i1 %v4, label %inner_loop_exit, label %inner_loop_begin +; CHECK: inner_inner_loop_exit: +; CHECK-NEXT: %[[A2]] = load i32, i32* %a.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit1, label %inner_loop_begin + +inner_loop_exit: + %v5 = load i1, i1* %ptr + br i1 %v5, label %loop_exit, label %loop_begin +; CHECK: inner_loop_exit.loopexit.split: +; CHECK-NEXT: %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_a ] +; CHECK-NEXT: br label %inner_loop_exit.loopexit +; +; CHECK: inner_loop_exit.loopexit: +; CHECK-NEXT: %[[A_INNER_US_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA]], %inner_loop_exit.loopexit.split ], [ %[[A_INNER_LCSSA_US]], %inner_loop_exit.loopexit.split.us ] +; CHECK-NEXT: br label %inner_loop_exit +; +; CHECK: inner_loop_exit.loopexit1: +; CHECK-NEXT: %[[A_INNER_LCSSA2:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_exit ] +; CHECK-NEXT: br label %inner_loop_exit +; +; CHECK: inner_loop_exit: +; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA2]], %inner_loop_exit.loopexit1 ], [ %[[A_INNER_US_PHI]], %inner_loop_exit.loopexit ] +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_exit, label %loop_begin + +loop_exit: + %a.lcssa = phi i32 [ %a.phi, %inner_loop_exit ] + ret i32 %a.lcssa +; CHECK: loop_exit: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_exit ] +; CHECK-NEXT: ret i32 %[[A_LCSSA]] +} + +; Same pattern as @test8a but where the original loop looses an exit block and +; needs to be hoisted up the nest. +define i32 @test8b(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test8b( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br label %loop_begin + +loop_begin: + %a = load i32, i32* %a.ptr + br label %inner_loop_begin +; CHECK: loop_begin: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %inner_loop_begin + +inner_loop_begin: + %a.phi = phi i32 [ %a, %loop_begin ], [ %a2, %inner_inner_loop_exit ] + %cond = load i1, i1* %cond.ptr + %b = load i32, i32* %b.ptr + br label %inner_inner_loop_begin +; CHECK: inner_loop_begin: +; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A]], %loop_begin ], [ %[[A2:.*]], %inner_inner_loop_exit ] +; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: br i1 %[[COND]], label %inner_loop_begin.split.us, label %inner_loop_begin.split + +inner_inner_loop_begin: + %v1 = load i1, i1* %ptr + br i1 %v1, label %inner_inner_loop_a, label %inner_inner_loop_b + +inner_inner_loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %inner_inner_loop_latch, label %inner_loop_exit + +inner_inner_loop_b: + br i1 %cond, label %inner_inner_loop_exit, label %inner_inner_loop_latch + +inner_inner_loop_latch: + br label %inner_inner_loop_begin +; The cloned region is similar to before but with one earlier exit. +; +; CHECK: inner_loop_begin.split.us: +; CHECK-NEXT: br label %inner_inner_loop_begin.us +; +; CHECK: inner_inner_loop_begin.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a.us, label %inner_inner_loop_b.us +; +; CHECK: inner_inner_loop_b.us: +; CHECK-NEXT: br label %inner_inner_loop_exit.split.us +; +; CHECK: inner_inner_loop_a.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_latch.us, label %inner_loop_exit.loopexit.split.us +; +; CHECK: inner_inner_loop_latch.us: +; CHECK-NEXT: br label %inner_inner_loop_begin.us +; +; CHECK: inner_inner_loop_exit.split.us: +; CHECK-NEXT: br label %inner_inner_loop_exit +; +; CHECK: inner_loop_exit.loopexit.split.us: +; CHECK-NEXT: %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_a.us ] +; CHECK-NEXT: br label %inner_loop_exit.loopexit +; +; The original region is now an exit in the preheader. +; +; CHECK: inner_loop_begin.split: +; CHECK-NEXT: %[[A_INNER_INNER_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_begin ] +; CHECK-NEXT: br label %inner_inner_loop_begin +; +; CHECK: inner_inner_loop_begin: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a, label %inner_inner_loop_b +; +; CHECK: inner_inner_loop_a: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_latch, label %inner_loop_exit.loopexit.split +; +; CHECK: inner_inner_loop_b: +; CHECK-NEXT: br label %inner_inner_loop_latch +; +; CHECK: inner_inner_loop_latch: +; CHECK-NEXT: br label %inner_inner_loop_begin + +inner_inner_loop_exit: + %a2 = load i32, i32* %a.ptr + %v4 = load i1, i1* %ptr + br i1 %v4, label %inner_loop_exit, label %inner_loop_begin +; CHECK: inner_inner_loop_exit: +; CHECK-NEXT: %[[A2]] = load i32, i32* %a.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit1, label %inner_loop_begin + +inner_loop_exit: + %v5 = load i1, i1* %ptr + br i1 %v5, label %loop_exit, label %loop_begin +; CHECK: inner_loop_exit.loopexit.split: +; CHECK-NEXT: %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A_INNER_INNER_LCSSA]], %inner_inner_loop_a ] +; CHECK-NEXT: br label %inner_loop_exit.loopexit +; +; CHECK: inner_loop_exit.loopexit: +; CHECK-NEXT: %[[A_INNER_US_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA]], %inner_loop_exit.loopexit.split ], [ %[[A_INNER_LCSSA_US]], %inner_loop_exit.loopexit.split.us ] +; CHECK-NEXT: br label %inner_loop_exit +; +; CHECK: inner_loop_exit.loopexit1: +; CHECK-NEXT: %[[A_INNER_LCSSA2:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_exit ] +; CHECK-NEXT: br label %inner_loop_exit +; +; CHECK: inner_loop_exit: +; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA2]], %inner_loop_exit.loopexit1 ], [ %[[A_INNER_US_PHI]], %inner_loop_exit.loopexit ] +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_exit, label %loop_begin + +loop_exit: + %a.lcssa = phi i32 [ %a.phi, %inner_loop_exit ] + ret i32 %a.lcssa +; CHECK: loop_exit: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_exit ] +; CHECK-NEXT: ret i32 %[[A_LCSSA]] +} + +; Test for when unswitching produces a clone of an inner loop but +; the clone no longer has an exiting edge *at all* and loops infinitely. +; Because it doesn't ever exit to the outer loop it is no longer an inner loop +; but needs to be hoisted up the nest to be a top-level loop. +define i32 @test9a(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test9a( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br label %loop_begin + +loop_begin: + %b = load i32, i32* %b.ptr + %cond = load i1, i1* %cond.ptr + br label %inner_loop_begin +; CHECK: loop_begin: +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr +; CHECK-NEXT: br i1 %[[COND]], label %loop_begin.split.us, label %loop_begin.split + +inner_loop_begin: + %a = load i32, i32* %a.ptr + br i1 %cond, label %inner_loop_latch, label %inner_loop_exit + +inner_loop_latch: + call void @sink1(i32 %b) + br label %inner_loop_begin +; The cloned inner loop ends up as an infinite loop and thus being a top-level +; loop with the preheader as an exit block of the outer loop. +; +; CHECK: loop_begin.split.us +; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %loop_begin ] +; CHECK-NEXT: br label %inner_loop_begin.us +; +; CHECK: inner_loop_begin.us: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %inner_loop_latch.us +; +; CHECK: inner_loop_latch.us: +; CHECK-NEXT: call void @sink1(i32 %[[B_LCSSA]]) +; CHECK-NEXT: br label %inner_loop_begin.us +; +; The original loop becomes boring non-loop code. +; +; CHECK: loop_begin.split +; CHECK-NEXT: br label %inner_loop_begin +; +; CHECK: inner_loop_begin: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %inner_loop_exit + +inner_loop_exit: + %a.inner_lcssa = phi i32 [ %a, %inner_loop_begin ] + %v = load i1, i1* %ptr + br i1 %v, label %loop_begin, label %loop_exit +; CHECK: inner_loop_exit: +; CHECK-NEXT: %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A]], %inner_loop_begin ] +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit + +loop_exit: + %a.lcssa = phi i32 [ %a.inner_lcssa, %inner_loop_exit ] + ret i32 %a.lcssa +; CHECK: loop_exit: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_LCSSA]], %inner_loop_exit ] +; CHECK-NEXT: ret i32 %[[A_LCSSA]] +} + +; The same core pattern as @test9a, but instead of the cloned loop becoming an +; infinite loop, the original loop has its only exit unswitched and the +; original loop becomes infinite and must be hoisted out of the loop nest. +define i32 @test9b(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test9b( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br label %loop_begin + +loop_begin: + %b = load i32, i32* %b.ptr + %cond = load i1, i1* %cond.ptr + br label %inner_loop_begin +; CHECK: loop_begin: +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr +; CHECK-NEXT: br i1 %[[COND]], label %loop_begin.split.us, label %loop_begin.split + +inner_loop_begin: + %a = load i32, i32* %a.ptr + br i1 %cond, label %inner_loop_exit, label %inner_loop_latch + +inner_loop_latch: + call void @sink1(i32 %b) + br label %inner_loop_begin +; The cloned inner loop becomes a boring non-loop. +; +; CHECK: loop_begin.split.us +; CHECK-NEXT: br label %inner_loop_begin.us +; +; CHECK: inner_loop_begin.us: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %inner_loop_exit.split.us +; +; CHECK: inner_loop_exit.split.us +; CHECK-NEXT: %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A]], %inner_loop_begin.us ] +; CHECK-NEXT: br label %inner_loop_exit +; +; The original loop becomes an infinite loop and thus a top-level loop with the +; preheader as an exit block for the outer loop. +; +; CHECK: loop_begin.split +; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %loop_begin ] +; CHECK-NEXT: br label %inner_loop_begin +; +; CHECK: inner_loop_begin: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %inner_loop_latch +; +; CHECK: inner_loop_latch: +; CHECK-NEXT: call void @sink1(i32 %[[B_LCSSA]]) +; CHECK-NEXT: br label %inner_loop_begin + +inner_loop_exit: + %a.inner_lcssa = phi i32 [ %a, %inner_loop_begin ] + %v = load i1, i1* %ptr + br i1 %v, label %loop_begin, label %loop_exit +; CHECK: inner_loop_exit: +; CHECK-NEXT: %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A_INNER_LCSSA_US]], %inner_loop_exit.split.us ] +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit + +loop_exit: + %a.lcssa = phi i32 [ %a.inner_lcssa, %inner_loop_exit ] + ret i32 %a.lcssa +; CHECK: loop_exit: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_LCSSA]], %inner_loop_exit ] +; CHECK-NEXT: ret i32 %[[A_LCSSA]] +} + +; Test that requires re-forming dedicated exits for the cloned loop. +define i32 @test10a(i1* %ptr, i1 %cond, i32* %a.ptr) { +; CHECK-LABEL: @test10a( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 %cond, label %entry.split.us, label %entry.split + +loop_begin: + %a = load i32, i32* %a.ptr + %v1 = load i1, i1* %ptr + br i1 %v1, label %loop_a, label %loop_b + +loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %loop_exit, label %loop_begin + +loop_b: + br i1 %cond, label %loop_exit, label %loop_begin +; The cloned loop with one edge as a direct exit. +; +; CHECK: entry.split.us: +; CHECK-NEXT: br label %loop_begin.us +; +; CHECK: loop_begin.us: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_a.us, label %loop_b.us +; +; CHECK: loop_b.us: +; CHECK-NEXT: %[[A_LCSSA_B:.*]] = phi i32 [ %[[A]], %loop_begin.us ] +; CHECK-NEXT: br label %loop_exit.split.us +; +; CHECK: loop_a.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split.us.loopexit, label %loop_begin.backedge.us +; +; CHECK: loop_begin.backedge.us: +; CHECK-NEXT: br label %loop_begin.us +; +; CHECK: loop_exit.split.us.loopexit: +; CHECK-NEXT: %[[A_LCSSA_A:.*]] = phi i32 [ %[[A]], %loop_a.us ] +; CHECK-NEXT: br label %loop_exit +; +; CHECK: loop_exit.split.us: +; CHECK-NEXT: %[[A_PHI_US:.*]] = phi i32 [ %[[A_LCSSA_B]], %loop_b.us ], [ %[[A_LCSSA_A]], %loop_exit.split.us.loopexit ] +; CHECK-NEXT: br label %loop_exit + +; The original loop without one 'loop_exit' edge. +; +; CHECK: entry.split: +; CHECK-NEXT: br label %loop_begin +; +; CHECK: loop_begin: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_a, label %loop_b +; +; CHECK: loop_a: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split, label %loop_begin.backedge +; +; CHECK: loop_begin.backedge: +; CHECK-NEXT: br label %loop_begin +; +; CHECK: loop_b: +; CHECK-NEXT: br label %loop_begin.backedge +; +; CHECK: loop_exit.split: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_a ] +; CHECK-NEXT: br label %loop_exit + +loop_exit: + %a.lcssa = phi i32 [ %a, %loop_a ], [ %a, %loop_b ] + ret i32 %a.lcssa +; CHECK: loop_exit: +; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.split ], [ %[[A_PHI_US]], %loop_exit.split.us ] +; CHECK-NEXT: ret i32 %[[AB_PHI]] +} + +; Test that requires re-forming dedicated exits for the original loop. +define i32 @test10b(i1* %ptr, i1 %cond, i32* %a.ptr) { +; CHECK-LABEL: @test10b( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 %cond, label %entry.split.us, label %entry.split + +loop_begin: + %a = load i32, i32* %a.ptr + %v1 = load i1, i1* %ptr + br i1 %v1, label %loop_a, label %loop_b + +loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %loop_begin, label %loop_exit + +loop_b: + br i1 %cond, label %loop_begin, label %loop_exit +; The cloned loop without one of the exits. +; +; CHECK: entry.split.us: +; CHECK-NEXT: br label %loop_begin.us +; +; CHECK: loop_begin.us: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_a.us, label %loop_b.us +; +; CHECK: loop_b.us: +; CHECK-NEXT: br label %loop_begin.backedge.us +; +; CHECK: loop_a.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin.backedge.us, label %loop_exit.split.us +; +; CHECK: loop_begin.backedge.us: +; CHECK-NEXT: br label %loop_begin.us +; +; CHECK: loop_exit.split.us: +; CHECK-NEXT: %[[A_LCSSA_US:.*]] = phi i32 [ %[[A]], %loop_a.us ] +; CHECK-NEXT: br label %loop_exit + +; The original loop without one 'loop_exit' edge. +; +; CHECK: entry.split: +; CHECK-NEXT: br label %loop_begin +; +; CHECK: loop_begin: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_a, label %loop_b +; +; CHECK: loop_a: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin.backedge, label %loop_exit.split.loopexit +; +; CHECK: loop_begin.backedge: +; CHECK-NEXT: br label %loop_begin +; +; CHECK: loop_b: +; CHECK-NEXT: %[[A_LCSSA_B:.*]] = phi i32 [ %[[A]], %loop_begin ] +; CHECK-NEXT: br label %loop_exit.split +; +; CHECK: loop_exit.split.loopexit: +; CHECK-NEXT: %[[A_LCSSA_A:.*]] = phi i32 [ %[[A]], %loop_a ] +; CHECK-NEXT: br label %loop_exit.split +; +; CHECK: loop_exit.split: +; CHECK-NEXT: %[[A_PHI_SPLIT:.*]] = phi i32 [ %[[A_LCSSA_B]], %loop_b ], [ %[[A_LCSSA_A]], %loop_exit.split.loopexit ] +; CHECK-NEXT: br label %loop_exit + +loop_exit: + %a.lcssa = phi i32 [ %a, %loop_a ], [ %a, %loop_b ] + ret i32 %a.lcssa +; CHECK: loop_exit: +; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_PHI_SPLIT]], %loop_exit.split ], [ %[[A_LCSSA_US]], %loop_exit.split.us ] +; CHECK-NEXT: ret i32 %[[AB_PHI]] +} + +; Check that if a cloned inner loop after unswitching doesn't loop and directly +; exits even an outer loop, we don't add the cloned preheader to the outer +; loop and do add the needed LCSSA phi nodes for the new exit block from the +; outer loop. +define i32 @test11a(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test11a( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br label %loop_begin + +loop_begin: + %b = load i32, i32* %b.ptr + %v1 = load i1, i1* %ptr + br i1 %v1, label %loop_latch, label %inner_loop_ph +; CHECK: loop_begin: +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_latch, label %inner_loop_ph + +inner_loop_ph: + %cond = load i1, i1* %cond.ptr + br label %inner_loop_begin +; CHECK: inner_loop_ph: +; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr +; CHECK-NEXT: br i1 %[[COND]], label %inner_loop_ph.split.us, label %inner_loop_ph.split + +inner_loop_begin: + call void @sink1(i32 %b) + %a = load i32, i32* %a.ptr + br i1 %cond, label %loop_exit, label %inner_loop_a + +inner_loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %inner_loop_exit, label %inner_loop_begin +; The cloned path doesn't actually loop and is an exit from the outer loop as +; well. +; +; CHECK: inner_loop_ph.split.us: +; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %inner_loop_ph ] +; CHECK-NEXT: br label %inner_loop_begin.us +; +; CHECK: inner_loop_begin.us: +; CHECK-NEXT: call void @sink1(i32 %[[B_LCSSA]]) +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %loop_exit.loopexit.split.us +; +; CHECK: loop_exit.loopexit.split.us: +; CHECK-NEXT: %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A]], %inner_loop_begin.us ] +; CHECK-NEXT: br label %loop_exit.loopexit +; +; The original remains a loop losing the exit edge. +; +; CHECK: inner_loop_ph.split: +; CHECK-NEXT: br label %inner_loop_begin +; +; CHECK: inner_loop_begin: +; CHECK-NEXT: call void @sink1(i32 %[[B]]) +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %inner_loop_a +; +; CHECK: inner_loop_a: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit, label %inner_loop_begin + +inner_loop_exit: + %a.inner_lcssa = phi i32 [ %a, %inner_loop_a ] + %v3 = load i1, i1* %ptr + br i1 %v3, label %loop_latch, label %loop_exit +; CHECK: inner_loop_exit: +; CHECK-NEXT: %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A]], %inner_loop_a ] +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_latch, label %loop_exit.loopexit1 + +loop_latch: + br label %loop_begin +; CHECK: loop_latch: +; CHECK-NEXT: br label %loop_begin + +loop_exit: + %a.lcssa = phi i32 [ %a, %inner_loop_begin ], [ %a.inner_lcssa, %inner_loop_exit ] + ret i32 %a.lcssa +; CHECK: loop_exit.loopexit: +; CHECK-NEXT: %[[A_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_LCSSA_US]], %loop_exit.loopexit.split.us ] +; CHECK-NEXT: br label %loop_exit +; +; CHECK: loop_exit.loopexit1: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_LCSSA]], %inner_loop_exit ] +; CHECK-NEXT: br label %loop_exit +; +; CHECK: loop_exit: +; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_LCSSA_US]], %loop_exit.loopexit ], [ %[[A_LCSSA]], %loop_exit.loopexit1 ] +; CHECK-NEXT: ret i32 %[[A_PHI]] +} + +; Check that if the original inner loop after unswitching doesn't loop and +; directly exits even an outer loop, we remove the original preheader from the +; outer loop and add needed LCSSA phi nodes for the new exit block from the +; outer loop. +define i32 @test11b(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test11b( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br label %loop_begin + +loop_begin: + %b = load i32, i32* %b.ptr + %v1 = load i1, i1* %ptr + br i1 %v1, label %loop_latch, label %inner_loop_ph +; CHECK: loop_begin: +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_latch, label %inner_loop_ph + +inner_loop_ph: + %cond = load i1, i1* %cond.ptr + br label %inner_loop_begin +; CHECK: inner_loop_ph: +; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr +; CHECK-NEXT: br i1 %[[COND]], label %inner_loop_ph.split.us, label %inner_loop_ph.split + +inner_loop_begin: + call void @sink1(i32 %b) + %a = load i32, i32* %a.ptr + br i1 %cond, label %inner_loop_a, label %loop_exit + +inner_loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %inner_loop_exit, label %inner_loop_begin +; The cloned path continues to loop without the exit out of the entire nest. +; +; CHECK: inner_loop_ph.split.us: +; CHECK-NEXT: br label %inner_loop_begin.us +; +; CHECK: inner_loop_begin.us: +; CHECK-NEXT: call void @sink1(i32 %[[B]]) +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %inner_loop_a.us +; +; CHECK: inner_loop_a.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.split.us, label %inner_loop_begin.us +; +; CHECK: inner_loop_exit.split.us: +; CHECK-NEXT: %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A]], %inner_loop_a.us ] +; CHECK-NEXT: br label %inner_loop_exit +; +; The original remains a loop losing the exit edge. +; +; CHECK: inner_loop_ph.split: +; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %inner_loop_ph ] +; CHECK-NEXT: br label %inner_loop_begin +; +; CHECK: inner_loop_begin: +; CHECK-NEXT: call void @sink1(i32 %[[B_LCSSA]]) +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %loop_exit.loopexit + +inner_loop_exit: + %a.inner_lcssa = phi i32 [ %a, %inner_loop_a ] + %v3 = load i1, i1* %ptr + br i1 %v3, label %loop_latch, label %loop_exit +; CHECK: inner_loop_exit: +; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA_US]], %inner_loop_exit.split.us ] +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_latch, label %loop_exit.loopexit1 + +loop_latch: + br label %loop_begin +; CHECK: loop_latch: +; CHECK-NEXT: br label %loop_begin + +loop_exit: + %a.lcssa = phi i32 [ %a, %inner_loop_begin ], [ %a.inner_lcssa, %inner_loop_exit ] + ret i32 %a.lcssa +; CHECK: loop_exit.loopexit: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %inner_loop_begin ] +; CHECK-NEXT: br label %loop_exit +; +; CHECK: loop_exit.loopexit1: +; CHECK-NEXT: %[[A_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_exit ] +; CHECK-NEXT: br label %loop_exit +; +; CHECK: loop_exit: +; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.loopexit ], [ %[[A_LCSSA_US]], %loop_exit.loopexit1 ] +; CHECK-NEXT: ret i32 %[[A_PHI]] +} + +; Like test11a, but checking that when the whole thing is wrapped in yet +; another loop, we correctly attribute the cloned preheader to that outermost +; loop rather than only handling the case where the preheader is not in any loop +; at all. +define i32 @test12a(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test12a( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br label %loop_begin + +loop_begin: + br label %inner_loop_begin +; CHECK: loop_begin: +; CHECK-NEXT: br label %inner_loop_begin + +inner_loop_begin: + %b = load i32, i32* %b.ptr + %v1 = load i1, i1* %ptr + br i1 %v1, label %inner_loop_latch, label %inner_inner_loop_ph +; CHECK: inner_loop_begin: +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_latch, label %inner_inner_loop_ph + +inner_inner_loop_ph: + %cond = load i1, i1* %cond.ptr + br label %inner_inner_loop_begin +; CHECK: inner_inner_loop_ph: +; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr +; CHECK-NEXT: br i1 %[[COND]], label %inner_inner_loop_ph.split.us, label %inner_inner_loop_ph.split + +inner_inner_loop_begin: + call void @sink1(i32 %b) + %a = load i32, i32* %a.ptr + br i1 %cond, label %inner_loop_exit, label %inner_inner_loop_a + +inner_inner_loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %inner_inner_loop_exit, label %inner_inner_loop_begin +; The cloned path doesn't actually loop and is an exit from the outer loop as +; well. +; +; CHECK: inner_inner_loop_ph.split.us: +; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %inner_inner_loop_ph ] +; CHECK-NEXT: br label %inner_inner_loop_begin.us +; +; CHECK: inner_inner_loop_begin.us: +; CHECK-NEXT: call void @sink1(i32 %[[B_LCSSA]]) +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %inner_loop_exit.loopexit.split.us +; +; CHECK: inner_loop_exit.loopexit.split.us: +; CHECK-NEXT: %[[A_INNER_INNER_LCSSA_US:.*]] = phi i32 [ %[[A]], %inner_inner_loop_begin.us ] +; CHECK-NEXT: br label %inner_loop_exit.loopexit +; +; The original remains a loop losing the exit edge. +; +; CHECK: inner_inner_loop_ph.split: +; CHECK-NEXT: br label %inner_inner_loop_begin +; +; CHECK: inner_inner_loop_begin: +; CHECK-NEXT: call void @sink1(i32 %[[B]]) +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %inner_inner_loop_a +; +; CHECK: inner_inner_loop_a: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_exit, label %inner_inner_loop_begin + +inner_inner_loop_exit: + %a.inner_inner_lcssa = phi i32 [ %a, %inner_inner_loop_a ] + %v3 = load i1, i1* %ptr + br i1 %v3, label %inner_loop_latch, label %inner_loop_exit +; CHECK: inner_inner_loop_exit: +; CHECK-NEXT: %[[A_INNER_INNER_LCSSA:.*]] = phi i32 [ %[[A]], %inner_inner_loop_a ] +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_latch, label %inner_loop_exit.loopexit1 + +inner_loop_latch: + br label %inner_loop_begin +; CHECK: inner_loop_latch: +; CHECK-NEXT: br label %inner_loop_begin + +inner_loop_exit: + %a.inner_lcssa = phi i32 [ %a, %inner_inner_loop_begin ], [ %a.inner_inner_lcssa, %inner_inner_loop_exit ] + %v4 = load i1, i1* %ptr + br i1 %v4, label %loop_begin, label %loop_exit +; CHECK: inner_loop_exit.loopexit: +; CHECK-NEXT: %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_INNER_LCSSA_US]], %inner_loop_exit.loopexit.split.us ] +; CHECK-NEXT: br label %inner_loop_exit +; +; CHECK: inner_loop_exit.loopexit1: +; CHECK-NEXT: %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A_INNER_INNER_LCSSA]], %inner_inner_loop_exit ] +; CHECK-NEXT: br label %inner_loop_exit +; +; CHECK: inner_loop_exit: +; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA_US]], %inner_loop_exit.loopexit ], [ %[[A_INNER_LCSSA]], %inner_loop_exit.loopexit1 ] +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit + +loop_exit: + %a.lcssa = phi i32 [ %a.inner_lcssa, %inner_loop_exit ] + ret i32 %a.lcssa +; CHECK: loop_exit: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_exit ] +; CHECK-NEXT: ret i32 %[[A_LCSSA]] +} + +; Like test11b, but checking that when the whole thing is wrapped in yet +; another loop, we correctly sink the preheader to the outermost loop rather +; than only handling the case where the preheader is completely removed from +; a loop. +define i32 @test12b(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test12b( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br label %loop_begin + +loop_begin: + br label %inner_loop_begin +; CHECK: loop_begin: +; CHECK-NEXT: br label %inner_loop_begin + +inner_loop_begin: + %b = load i32, i32* %b.ptr + %v1 = load i1, i1* %ptr + br i1 %v1, label %inner_loop_latch, label %inner_inner_loop_ph +; CHECK: inner_loop_begin: +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_latch, label %inner_inner_loop_ph + +inner_inner_loop_ph: + %cond = load i1, i1* %cond.ptr + br label %inner_inner_loop_begin +; CHECK: inner_inner_loop_ph: +; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr +; CHECK-NEXT: br i1 %[[COND]], label %inner_inner_loop_ph.split.us, label %inner_inner_loop_ph.split + +inner_inner_loop_begin: + call void @sink1(i32 %b) + %a = load i32, i32* %a.ptr + br i1 %cond, label %inner_inner_loop_a, label %inner_loop_exit + +inner_inner_loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %inner_inner_loop_exit, label %inner_inner_loop_begin +; The cloned path continues to loop without the exit out of the entire nest. +; +; CHECK: inner_inner_loop_ph.split.us: +; CHECK-NEXT: br label %inner_inner_loop_begin.us +; +; CHECK: inner_inner_loop_begin.us: +; CHECK-NEXT: call void @sink1(i32 %[[B]]) +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %inner_inner_loop_a.us +; +; CHECK: inner_inner_loop_a.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_exit.split.us, label %inner_inner_loop_begin.us +; +; CHECK: inner_inner_loop_exit.split.us: +; CHECK-NEXT: %[[A_INNER_INNER_LCSSA_US:.*]] = phi i32 [ %[[A]], %inner_inner_loop_a.us ] +; CHECK-NEXT: br label %inner_inner_loop_exit +; +; The original remains a loop losing the exit edge. +; +; CHECK: inner_inner_loop_ph.split: +; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %inner_inner_loop_ph ] +; CHECK-NEXT: br label %inner_inner_loop_begin +; +; CHECK: inner_inner_loop_begin: +; CHECK-NEXT: call void @sink1(i32 %[[B_LCSSA]]) +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %inner_loop_exit.loopexit + +inner_inner_loop_exit: + %a.inner_inner_lcssa = phi i32 [ %a, %inner_inner_loop_a ] + %v3 = load i1, i1* %ptr + br i1 %v3, label %inner_loop_latch, label %inner_loop_exit +; CHECK: inner_inner_loop_exit: +; CHECK-NEXT: %[[A_INNER_INNER_PHI:.*]] = phi i32 [ %[[A_INNER_INNER_LCSSA_US]], %inner_inner_loop_exit.split.us ] +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_latch, label %inner_loop_exit.loopexit1 + +inner_loop_latch: + br label %inner_loop_begin +; CHECK: inner_loop_latch: +; CHECK-NEXT: br label %inner_loop_begin + +inner_loop_exit: + %a.inner_lcssa = phi i32 [ %a, %inner_inner_loop_begin ], [ %a.inner_inner_lcssa, %inner_inner_loop_exit ] + %v4 = load i1, i1* %ptr + br i1 %v4, label %loop_begin, label %loop_exit +; CHECK: inner_loop_exit.loopexit: +; CHECK-NEXT: %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A]], %inner_inner_loop_begin ] +; CHECK-NEXT: br label %inner_loop_exit +; +; CHECK: inner_loop_exit.loopexit1: +; CHECK-NEXT: %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_INNER_PHI]], %inner_inner_loop_exit ] +; CHECK-NEXT: br label %inner_loop_exit +; +; CHECK: inner_loop_exit: +; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA]], %inner_loop_exit.loopexit ], [ %[[A_INNER_LCSSA_US]], %inner_loop_exit.loopexit1 ] +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit + +loop_exit: + %a.lcssa = phi i32 [ %a.inner_lcssa, %inner_loop_exit ] + ret i32 %a.lcssa +; CHECK: loop_exit: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_exit ] +; CHECK-NEXT: ret i32 %[[A_LCSSA]] +} + +; Test where the cloned loop has an inner loop that has to be traversed to form +; the cloned loop, and where this inner loop has multiple blocks, and where the +; exiting block that connects the inner loop to the cloned loop is not the header +; block. This ensures that we correctly handle interesting corner cases of +; traversing back to the header when establishing the cloned loop. +define i32 @test13a(i1* %ptr, i1 %cond, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test13a( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 %cond, label %entry.split.us, label %entry.split + +loop_begin: + %a = load i32, i32* %a.ptr + %v1 = load i1, i1* %ptr + br i1 %v1, label %loop_a, label %loop_b + +loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %loop_exit, label %loop_latch + +loop_b: + %b = load i32, i32* %b.ptr + br i1 %cond, label %loop_b_inner_ph, label %loop_exit + +loop_b_inner_ph: + br label %loop_b_inner_header + +loop_b_inner_header: + %v3 = load i1, i1* %ptr + br i1 %v3, label %loop_b_inner_latch, label %loop_b_inner_body + +loop_b_inner_body: + %v4 = load i1, i1* %ptr + br i1 %v4, label %loop_b_inner_latch, label %loop_b_inner_exit + +loop_b_inner_latch: + br label %loop_b_inner_header + +loop_b_inner_exit: + br label %loop_latch + +loop_latch: + br label %loop_begin +; The cloned loop contains an inner loop within it. +; +; CHECK: entry.split.us: +; CHECK-NEXT: br label %loop_begin.us +; +; CHECK: loop_begin.us: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_a.us, label %loop_b.us +; +; CHECK: loop_b.us: +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: br label %loop_b_inner_ph.us +; +; CHECK: loop_b_inner_ph.us: +; CHECK-NEXT: br label %loop_b_inner_header.us +; +; CHECK: loop_b_inner_header.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_b_inner_latch.us, label %loop_b_inner_body.us +; +; CHECK: loop_b_inner_body.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_b_inner_latch.us, label %loop_b_inner_exit.us +; +; CHECK: loop_b_inner_exit.us: +; CHECK-NEXT: br label %loop_latch.us +; +; CHECK: loop_b_inner_latch.us: +; CHECK-NEXT: br label %loop_b_inner_header.us +; +; CHECK: loop_a.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split.us, label %loop_latch.us +; +; CHECK: loop_latch.us: +; CHECK-NEXT: br label %loop_begin.us +; +; CHECK: loop_exit.split.us: +; CHECK-NEXT: %[[A_LCSSA_US:.*]] = phi i32 [ %[[A]], %loop_a.us ] +; CHECK-NEXT: br label %loop_exit +; +; And the original loop no longer contains an inner loop. +; +; CHECK: entry.split: +; CHECK-NEXT: br label %loop_begin +; +; CHECK: loop_begin: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_a, label %loop_b +; +; CHECK: loop_a: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split.loopexit, label %loop_latch +; +; CHECK: loop_b: +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: br label %loop_exit.split +; +; CHECK: loop_latch: +; CHECK-NEXT: br label %loop_begin + +loop_exit: + %lcssa = phi i32 [ %a, %loop_a ], [ %b, %loop_b ] + ret i32 %lcssa +; CHECK: loop_exit.split.loopexit: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_a ] +; CHECK-NEXT: br label %loop_exit.split +; +; CHECK: loop_exit.split: +; CHECK-NEXT: %[[AB_PHI:.*]] = phi i32 [ %[[B]], %loop_b ], [ %[[A_LCSSA]], %loop_exit.split.loopexit ] +; CHECK-NEXT: br label %loop_exit +; +; CHECK: loop_exit: +; CHECK-NEXT: %[[AB_PHI_US:.*]] = phi i32 [ %[[AB_PHI]], %loop_exit.split ], [ %[[A_LCSSA_US]], %loop_exit.split.us ] +; CHECK-NEXT: ret i32 %[[AB_PHI_US]] +} + +; Test where the original loop has an inner loop that has to be traversed to +; rebuild the loop, and where this inner loop has multiple blocks, and where +; the exiting block that connects the inner loop to the original loop is not +; the header block. This ensures that we correctly handle interesting corner +; cases of traversing back to the header when re-establishing the original loop +; still exists after unswitching. +define i32 @test13b(i1* %ptr, i1 %cond, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test13b( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 %cond, label %entry.split.us, label %entry.split + +loop_begin: + %a = load i32, i32* %a.ptr + %v1 = load i1, i1* %ptr + br i1 %v1, label %loop_a, label %loop_b + +loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %loop_exit, label %loop_latch + +loop_b: + %b = load i32, i32* %b.ptr + br i1 %cond, label %loop_exit, label %loop_b_inner_ph + +loop_b_inner_ph: + br label %loop_b_inner_header + +loop_b_inner_header: + %v3 = load i1, i1* %ptr + br i1 %v3, label %loop_b_inner_latch, label %loop_b_inner_body + +loop_b_inner_body: + %v4 = load i1, i1* %ptr + br i1 %v4, label %loop_b_inner_latch, label %loop_b_inner_exit + +loop_b_inner_latch: + br label %loop_b_inner_header + +loop_b_inner_exit: + br label %loop_latch + +loop_latch: + br label %loop_begin +; The cloned loop doesn't contain an inner loop. +; +; CHECK: entry.split.us: +; CHECK-NEXT: br label %loop_begin.us +; +; CHECK: loop_begin.us: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_a.us, label %loop_b.us +; +; CHECK: loop_b.us: +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: br label %loop_exit.split.us +; +; CHECK: loop_a.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split.us.loopexit, label %loop_latch.us +; +; CHECK: loop_latch.us: +; CHECK-NEXT: br label %loop_begin.us +; +; CHECK: loop_exit.split.us.loopexit: +; CHECK-NEXT: %[[A_LCSSA_US:.*]] = phi i32 [ %[[A]], %loop_a.us ] +; CHECK-NEXT: br label %loop_exit.split.us +; +; CHECK: loop_exit.split.us: +; CHECK-NEXT: %[[AB_PHI_US:.*]] = phi i32 [ %[[B]], %loop_b.us ], [ %[[A_LCSSA_US]], %loop_exit.split.us.loopexit ] +; CHECK-NEXT: br label %loop_exit +; +; But the original loop contains an inner loop that must be traversed.; +; +; CHECK: entry.split: +; CHECK-NEXT: br label %loop_begin +; +; CHECK: loop_begin: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_a, label %loop_b +; +; CHECK: loop_a: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split, label %loop_latch +; +; CHECK: loop_b: +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: br label %loop_b_inner_ph +; +; CHECK: loop_b_inner_ph: +; CHECK-NEXT: br label %loop_b_inner_header +; +; CHECK: loop_b_inner_header: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_b_inner_latch, label %loop_b_inner_body +; +; CHECK: loop_b_inner_body: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_b_inner_latch, label %loop_b_inner_exit +; +; CHECK: loop_b_inner_latch: +; CHECK-NEXT: br label %loop_b_inner_header +; +; CHECK: loop_b_inner_exit: +; CHECK-NEXT: br label %loop_latch +; +; CHECK: loop_latch: +; CHECK-NEXT: br label %loop_begin + +loop_exit: + %lcssa = phi i32 [ %a, %loop_a ], [ %b, %loop_b ] + ret i32 %lcssa +; CHECK: loop_exit.split: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_a ] +; CHECK-NEXT: br label %loop_exit +; +; CHECK: loop_exit: +; CHECK-NEXT: %[[AB_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.split ], [ %[[AB_PHI_US]], %loop_exit.split.us ] +; CHECK-NEXT: ret i32 %[[AB_PHI]] +} + +define i32 @test20(i32* %var, i32 %cond1, i32 %cond2) { +; CHECK-LABEL: @test20( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br label %loop_begin + +loop_begin: + %var_val = load i32, i32* %var + switch i32 %cond2, label %loop_a [ + i32 0, label %loop_b + i32 1, label %loop_b + i32 13, label %loop_c + i32 2, label %loop_b + i32 42, label %loop_exit + ] +; CHECK: loop_begin: +; CHECK-NEXT: %[[V:.*]] = load i32, i32* %var +; CHECK-NEXT: switch i32 %cond2, label %loop_a [ +; CHECK-NEXT: i32 0, label %loop_b +; CHECK-NEXT: i32 1, label %loop_b +; CHECK-NEXT: i32 13, label %loop_c +; CHECK-NEXT: i32 2, label %loop_b +; CHECK-NEXT: i32 42, label %loop_exit +; CHECK-NEXT: ] + +loop_a: + call void @a() + br label %loop_latch +; CHECK: loop_a: +; CHECK-NEXT: call void @a() +; CHECK-NEXT: br label %loop_latch + +loop_b: + call void @b() + br label %loop_latch +; CHECK: loop_b: +; CHECK-NEXT: call void @b() +; CHECK-NEXT: br label %loop_latch + +loop_c: + call void @c() noreturn nounwind + br label %loop_latch +; CHECK: loop_c: +; CHECK-NEXT: call void @c() +; CHECK-NEXT: br label %loop_latch + +loop_latch: + br label %loop_begin +; CHECK: loop_latch: +; CHECK-NEXT: br label %loop_begin + +loop_exit: + %lcssa = phi i32 [ %var_val, %loop_begin ] + ret i32 %lcssa +; CHECK: loop_exit: +; CHECK-NEXT: %[[LCSSA:.*]] = phi i32 [ %[[V]], %loop_begin ] +; CHECK-NEXT: ret i32 %[[LCSSA]] +} |