summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/SimpleLoopUnswitch
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/Transforms/SimpleLoopUnswitch')
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/2006-06-13-SingleEntryPHI.ll36
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/2006-06-27-DeadSwitchCase.ll32
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/2007-05-09-Unreachable.ll29
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/2007-05-09-tl.ll96
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/2007-07-12-ExitDomInfo.ll46
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/2007-07-13-DomInfo.ll28
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/2007-07-18-DomInfo.ll67
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/2007-08-01-Dom.ll30
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/2007-08-01-LCSSA.ll56
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/2007-10-04-DomFrontier.ll29
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/2008-06-02-DomInfo.ll27
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/2008-06-17-DomFrontier.ll22
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/2010-11-18-LCSSA.ll29
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/2011-06-02-CritSwitch.ll29
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/2011-09-26-EHCrash.ll64
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/2012-04-02-IndirectBr.ll42
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/2012-04-30-LoopUnswitch-LPad-Crash.ll97
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/2012-05-20-Phi.ll26
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/2015-09-18-Addrspace.ll29
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/LIV-loop-condtion.ll37
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/basictest.ll185
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/cleanuppad.ll45
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/copy-metadata.ll35
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/crash.ll67
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/delete-dead-blocks.ll101
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/exponential-behavior.ll52
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch-nested.ll139
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch-nested2.ll149
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch.ll80
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch2.ll56
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/exponential-switch-unswitch.ll118
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/formDedicatedAfterTrivial1.ll25
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/formDedicatedAfterTrivial2.ll22
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/formDedicatedAfterTrivial3.ll37
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/guards.ll239
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/infinite-loop.ll65
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/msan.ll142
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-cost.ll502
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll4216
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/pr37888.ll39
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/preserve-analyses.ll130
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-iteration.ll42
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll1245
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/update-scev.ll187
44 files changed, 8769 insertions, 0 deletions
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/2006-06-13-SingleEntryPHI.ll b/llvm/test/Transforms/SimpleLoopUnswitch/2006-06-13-SingleEntryPHI.ll
new file mode 100644
index 00000000000..0a769ec5da6
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/2006-06-13-SingleEntryPHI.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -simple-loop-unswitch -disable-output
+; RUN: opt < %s -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output
+
+ %struct.BLEND_MAP = type { i16, i16, i16, i32, %struct.BLEND_MAP_ENTRY* }
+ %struct.BLEND_MAP_ENTRY = type { float, i8, { [5 x float], [4 x i8] } }
+ %struct.TPATTERN = type { i16, i16, i16, i32, float, float, float, %struct.WARP*, %struct.TPATTERN*, %struct.BLEND_MAP*, { %struct.anon, [4 x i8] } }
+ %struct.TURB = type { i16, %struct.WARP*, [3 x double], i32, float, float }
+ %struct.WARP = type { i16, %struct.WARP* }
+ %struct.anon = type { float, [3 x double] }
+
+define void @Parse_Pattern() {
+entry:
+ br label %bb1096.outer20
+bb671: ; preds = %cond_true1099
+ br label %bb1096.outer23
+bb1096.outer20.loopexit: ; preds = %cond_true1099
+ %Local_Turb.0.ph24.lcssa = phi %struct.TURB* [ %Local_Turb.0.ph24, %cond_true1099 ] ; <%struct.TURB*> [#uses=1]
+ br label %bb1096.outer20
+bb1096.outer20: ; preds = %bb1096.outer20.loopexit, %entry
+ %Local_Turb.0.ph22 = phi %struct.TURB* [ undef, %entry ], [ %Local_Turb.0.ph24.lcssa, %bb1096.outer20.loopexit ] ; <%struct.TURB*> [#uses=1]
+ %tmp1098 = icmp eq i32 0, 0 ; <i1> [#uses=1]
+ br label %bb1096.outer23
+bb1096.outer23: ; preds = %bb1096.outer20, %bb671
+ %Local_Turb.0.ph24 = phi %struct.TURB* [ %Local_Turb.0.ph22, %bb1096.outer20 ], [ null, %bb671 ] ; <%struct.TURB*> [#uses=2]
+ br label %bb1096
+bb1096: ; preds = %cond_true1099, %bb1096.outer23
+ br i1 %tmp1098, label %cond_true1099, label %bb1102
+cond_true1099: ; preds = %bb1096
+ switch i32 0, label %bb1096.outer20.loopexit [
+ i32 161, label %bb671
+ i32 359, label %bb1096
+ ]
+bb1102: ; preds = %bb1096
+ %Local_Turb.0.ph24.lcssa1 = phi %struct.TURB* [ %Local_Turb.0.ph24, %bb1096 ] ; <%struct.TURB*> [#uses=0]
+ ret void
+}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/2006-06-27-DeadSwitchCase.ll b/llvm/test/Transforms/SimpleLoopUnswitch/2006-06-27-DeadSwitchCase.ll
new file mode 100644
index 00000000000..85066168e1e
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/2006-06-27-DeadSwitchCase.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -simple-loop-unswitch -disable-output
+; RUN: opt < %s -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output
+
+define void @init_caller_save() {
+entry:
+ br label %cond_true78
+
+cond_true78: ; preds = %bb75, %entry
+ %i.0.0 = phi i32 [ 0, %entry ], [ %tmp74.0, %bb75 ] ; <i32> [#uses=2]
+ br label %bb54
+
+bb54: ; preds = %cond_true78, %bb31
+ br i1 false, label %bb75, label %cond_true64
+
+cond_true64: ; preds = %bb54
+ switch i32 %i.0.0, label %cond_next20 [
+ i32 17, label %bb31
+ i32 18, label %bb31
+ ]
+
+cond_next20: ; preds = %cond_true64
+ br label %bb31
+
+bb31: ; preds = %cond_true64, %cond_true64, %cond_next20
+ %iftmp.29.1 = phi i32 [ 0, %cond_next20 ], [ 0, %cond_true64 ], [ 0, %cond_true64 ] ; <i32> [#uses=0]
+ br label %bb54
+
+bb75: ; preds = %bb54
+ %tmp74.0 = add i32 %i.0.0, 1 ; <i32> [#uses=1]
+ br label %cond_true78
+}
+
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/2007-05-09-Unreachable.ll b/llvm/test/Transforms/SimpleLoopUnswitch/2007-05-09-Unreachable.ll
new file mode 100644
index 00000000000..02c7a96deb5
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/2007-05-09-Unreachable.ll
@@ -0,0 +1,29 @@
+; PR1333
+; RUN: opt < %s -simple-loop-unswitch -disable-output
+; RUN: opt < %s -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-pc-linux-gnu"
+ %struct.ada__streams__root_stream_type = type { %struct.ada__tags__dispatch_table* }
+ %struct.ada__tags__dispatch_table = type { [1 x i8*] }
+ %struct.quotes__T173s = type { i8, %struct.quotes__T173s__T174s, [2 x [1 x double]], [2 x i16], i64, i8 }
+ %struct.quotes__T173s__T174s = type { i8, i8, i8, i16, i16, [2 x [1 x double]] }
+
+define void @quotes__write_quote() {
+entry:
+ %tmp606.i = icmp eq i32 0, 0 ; <i1> [#uses=1]
+ br label %bb
+bb: ; preds = %cond_next73, %bb, %entry
+ br i1 false, label %bb51, label %bb
+bb51: ; preds = %cond_next73, %bb
+ br i1 %tmp606.i, label %quotes__bid_ask_depth_offset_matrices__get_price.exit, label %cond_true.i
+cond_true.i: ; preds = %bb51
+ unreachable
+quotes__bid_ask_depth_offset_matrices__get_price.exit: ; preds = %bb51
+ br i1 false, label %cond_next73, label %cond_true72
+cond_true72: ; preds = %quotes__bid_ask_depth_offset_matrices__get_price.exit
+ unreachable
+cond_next73: ; preds = %quotes__bid_ask_depth_offset_matrices__get_price.exit
+ br i1 false, label %bb, label %bb51
+}
+
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/2007-05-09-tl.ll b/llvm/test/Transforms/SimpleLoopUnswitch/2007-05-09-tl.ll
new file mode 100644
index 00000000000..a0408c8ea6a
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/2007-05-09-tl.ll
@@ -0,0 +1,96 @@
+; RUN: opt < %s -simple-loop-unswitch -disable-output
+; RUN: opt < %s -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output
+; PR1333
+
+define void @pp_cxx_expression() {
+entry:
+ %tmp6 = lshr i32 0, 24 ; <i32> [#uses=1]
+ br label %tailrecurse
+
+tailrecurse: ; preds = %tailrecurse, %tailrecurse, %entry
+ switch i32 %tmp6, label %bb96 [
+ i32 24, label %bb10
+ i32 25, label %bb10
+ i32 28, label %bb10
+ i32 29, label %bb48
+ i32 31, label %bb48
+ i32 32, label %bb48
+ i32 33, label %bb48
+ i32 34, label %bb48
+ i32 36, label %bb15
+ i32 51, label %bb89
+ i32 52, label %bb89
+ i32 54, label %bb83
+ i32 57, label %bb59
+ i32 63, label %bb80
+ i32 64, label %bb80
+ i32 68, label %bb80
+ i32 169, label %bb75
+ i32 170, label %bb19
+ i32 171, label %bb63
+ i32 172, label %bb63
+ i32 173, label %bb67
+ i32 174, label %bb67
+ i32 175, label %bb19
+ i32 176, label %bb75
+ i32 178, label %bb59
+ i32 179, label %bb89
+ i32 180, label %bb59
+ i32 182, label %bb48
+ i32 183, label %bb48
+ i32 184, label %bb48
+ i32 185, label %bb48
+ i32 186, label %bb48
+ i32 195, label %bb48
+ i32 196, label %bb59
+ i32 197, label %bb89
+ i32 198, label %bb70
+ i32 199, label %bb59
+ i32 200, label %bb59
+ i32 201, label %bb59
+ i32 202, label %bb59
+ i32 203, label %bb75
+ i32 204, label %bb59
+ i32 205, label %tailrecurse
+ i32 210, label %tailrecurse
+ ]
+
+bb10: ; preds = %tailrecurse, %tailrecurse, %tailrecurse
+ ret void
+
+bb15: ; preds = %tailrecurse
+ ret void
+
+bb19: ; preds = %tailrecurse, %tailrecurse
+ ret void
+
+bb48: ; preds = %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse
+ ret void
+
+bb59: ; preds = %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse
+ ret void
+
+bb63: ; preds = %tailrecurse, %tailrecurse
+ ret void
+
+bb67: ; preds = %tailrecurse, %tailrecurse
+ ret void
+
+bb70: ; preds = %tailrecurse
+ ret void
+
+bb75: ; preds = %tailrecurse, %tailrecurse, %tailrecurse
+ ret void
+
+bb80: ; preds = %tailrecurse, %tailrecurse, %tailrecurse
+ ret void
+
+bb83: ; preds = %tailrecurse
+ ret void
+
+bb89: ; preds = %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse
+ ret void
+
+bb96: ; preds = %tailrecurse
+ ret void
+}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/2007-07-12-ExitDomInfo.ll b/llvm/test/Transforms/SimpleLoopUnswitch/2007-07-12-ExitDomInfo.ll
new file mode 100644
index 00000000000..571e3eb6696
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/2007-07-12-ExitDomInfo.ll
@@ -0,0 +1,46 @@
+; RUN: opt < %s -simple-loop-unswitch -instcombine -disable-output
+; RUN: opt < %s -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -instcombine -disable-output
+
+@str3 = external constant [3 x i8] ; <[3 x i8]*> [#uses=1]
+
+define i32 @stringSearch_Clib(i32 %count) {
+entry:
+ %ttmp25 = icmp sgt i32 %count, 0 ; <i1> [#uses=1]
+ br i1 %ttmp25, label %bb36.preheader, label %bb44
+
+bb36.preheader: ; preds = %entry
+ %ttmp33 = icmp slt i32 0, 250 ; <i1> [#uses=1]
+ br label %bb36.outer
+
+bb36.outer: ; preds = %bb41, %bb36.preheader
+ br i1 %ttmp33, label %bb.nph, label %bb41
+
+bb.nph: ; preds = %bb36.outer
+ %ttmp8 = icmp eq i8* null, null ; <i1> [#uses=1]
+ %ttmp6 = icmp eq i8* null, null ; <i1> [#uses=1]
+ %tmp31 = call i32 @strcspn( i8* null, i8* getelementptr ([3 x i8], [3 x i8]* @str3, i64 0, i64 0) ) ; <i32> [#uses=1]
+ br i1 %ttmp8, label %cond_next, label %cond_true
+
+cond_true: ; preds = %bb.nph
+ ret i32 0
+
+cond_next: ; preds = %bb.nph
+ br i1 %ttmp6, label %cond_next28, label %cond_true20
+
+cond_true20: ; preds = %cond_next
+ ret i32 0
+
+cond_next28: ; preds = %cond_next
+ %tmp33 = add i32 %tmp31, 0 ; <i32> [#uses=1]
+ br label %bb41
+
+bb41: ; preds = %cond_next28, %bb36.outer
+ %c.2.lcssa = phi i32 [ 0, %bb36.outer ], [ %tmp33, %cond_next28 ] ; <i32> [#uses=1]
+ br i1 false, label %bb36.outer, label %bb44
+
+bb44: ; preds = %bb41, %entry
+ %c.01.1 = phi i32 [ 0, %entry ], [ %c.2.lcssa, %bb41 ] ; <i32> [#uses=1]
+ ret i32 %c.01.1
+}
+
+declare i32 @strcspn(i8*, i8*)
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/2007-07-13-DomInfo.ll b/llvm/test/Transforms/SimpleLoopUnswitch/2007-07-13-DomInfo.ll
new file mode 100644
index 00000000000..626ac848cfb
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/2007-07-13-DomInfo.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -simple-loop-unswitch -disable-output
+; RUN: opt < %s -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output
+
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+ %tmp1785365 = icmp ult i32 0, 100 ; <i1> [#uses=1]
+ br label %bb
+
+bb: ; preds = %cond_true, %entry
+ br i1 false, label %cond_true, label %cond_next
+
+cond_true: ; preds = %bb
+ br i1 %tmp1785365, label %bb, label %bb1788
+
+cond_next: ; preds = %bb
+ %iftmp.1.0 = select i1 false, i32 0, i32 0 ; <i32> [#uses=1]
+ br i1 false, label %cond_true47, label %cond_next74
+
+cond_true47: ; preds = %cond_next
+ %tmp53 = urem i32 %iftmp.1.0, 0 ; <i32> [#uses=0]
+ ret i32 0
+
+cond_next74: ; preds = %cond_next
+ ret i32 0
+
+bb1788: ; preds = %cond_true
+ ret i32 0
+}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/2007-07-18-DomInfo.ll b/llvm/test/Transforms/SimpleLoopUnswitch/2007-07-18-DomInfo.ll
new file mode 100644
index 00000000000..52d96893060
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/2007-07-18-DomInfo.ll
@@ -0,0 +1,67 @@
+; RUN: opt < %s -simple-loop-unswitch -disable-output
+; RUN: opt < %s -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output
+; PR1559
+
+target triple = "i686-pc-linux-gnu"
+ %struct.re_pattern_buffer = type { i8*, i32, i32, i32, i8*, i8*, i32, i8 }
+
+define fastcc i32 @byte_regex_compile(i8* %pattern, i32 %size, i32 %syntax, %struct.re_pattern_buffer* %bufp) {
+entry:
+ br i1 false, label %bb147, label %cond_next123
+
+cond_next123: ; preds = %entry
+ ret i32 0
+
+bb147: ; preds = %entry
+ switch i32 0, label %normal_char [
+ i32 91, label %bb1734
+ i32 92, label %bb5700
+ ]
+
+bb1734: ; preds = %bb147
+ br label %bb1855.outer.outer
+
+cond_true1831: ; preds = %bb1855.outer
+ br i1 %tmp1837, label %cond_next1844, label %cond_true1840
+
+cond_true1840: ; preds = %cond_true1831
+ ret i32 0
+
+cond_next1844: ; preds = %cond_true1831
+ br i1 false, label %bb1855.outer, label %cond_true1849
+
+cond_true1849: ; preds = %cond_next1844
+ br label %bb1855.outer.outer
+
+bb1855.outer.outer: ; preds = %cond_true1849, %bb1734
+ %b.10.ph.ph = phi i8* [ null, %cond_true1849 ], [ null, %bb1734 ] ; <i8*> [#uses=1]
+ br label %bb1855.outer
+
+bb1855.outer: ; preds = %bb1855.outer.outer, %cond_next1844
+ %b.10.ph = phi i8* [ null, %cond_next1844 ], [ %b.10.ph.ph, %bb1855.outer.outer ] ; <i8*> [#uses=1]
+ %tmp1837 = icmp eq i8* null, null ; <i1> [#uses=2]
+ br i1 false, label %cond_true1831, label %cond_next1915
+
+cond_next1915: ; preds = %cond_next1961, %bb1855.outer
+ store i8* null, i8** null
+ br i1 %tmp1837, label %cond_next1929, label %cond_true1923
+
+cond_true1923: ; preds = %cond_next1915
+ ret i32 0
+
+cond_next1929: ; preds = %cond_next1915
+ br i1 false, label %cond_next1961, label %cond_next2009
+
+cond_next1961: ; preds = %cond_next1929
+ %tmp1992 = getelementptr i8, i8* %b.10.ph, i32 0 ; <i8*> [#uses=0]
+ br label %cond_next1915
+
+cond_next2009: ; preds = %cond_next1929
+ ret i32 0
+
+bb5700: ; preds = %bb147
+ ret i32 0
+
+normal_char: ; preds = %bb147
+ ret i32 0
+}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/2007-08-01-Dom.ll b/llvm/test/Transforms/SimpleLoopUnswitch/2007-08-01-Dom.ll
new file mode 100644
index 00000000000..52794891c56
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/2007-08-01-Dom.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -licm -simple-loop-unswitch -disable-output
+; PR 1589
+
+ %struct.QBasicAtomic = type { i32 }
+
+define void @_ZNK5QDate9addMonthsEi(%struct.QBasicAtomic* sret %agg.result, %struct.QBasicAtomic* %this, i32 %nmonths) {
+entry:
+ br label %cond_true90
+
+bb16: ; preds = %cond_true90
+ br i1 false, label %bb93, label %cond_true90
+
+bb45: ; preds = %cond_true90
+ br i1 false, label %bb53, label %bb58
+
+bb53: ; preds = %bb45
+ br i1 false, label %bb93, label %cond_true90
+
+bb58: ; preds = %bb45
+ store i32 0, i32* null, align 4
+ br i1 false, label %cond_true90, label %bb93
+
+cond_true90: ; preds = %bb58, %bb53, %bb16, %entry
+ %nmonths_addr.016.1 = phi i32 [ %nmonths, %entry ], [ 0, %bb16 ], [ 0, %bb53 ], [ %nmonths_addr.016.1, %bb58 ] ; <i32> [#uses=2]
+ %tmp14 = icmp slt i32 %nmonths_addr.016.1, -11 ; <i1> [#uses=1]
+ br i1 %tmp14, label %bb16, label %bb45
+
+bb93: ; preds = %bb58, %bb53, %bb16
+ ret void
+}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/2007-08-01-LCSSA.ll b/llvm/test/Transforms/SimpleLoopUnswitch/2007-08-01-LCSSA.ll
new file mode 100644
index 00000000000..7c65459a65c
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/2007-08-01-LCSSA.ll
@@ -0,0 +1,56 @@
+; RUN: opt < %s -simple-loop-unswitch -instcombine -disable-output
+; RUN: opt < %s -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -instcombine -disable-output
+ %struct.ClassDef = type { %struct.QByteArray, %struct.QByteArray, %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", i8, i8, %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", %"struct.QMap<QByteArray,QByteArray>", %"struct.QList<ArgumentDef>", %"struct.QMap<QByteArray,QByteArray>", i32, i32 }
+ %struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }
+ %struct.Generator = type { %struct.FILE*, %struct.ClassDef*, %"struct.QList<ArgumentDef>", %struct.QByteArray, %"struct.QList<ArgumentDef>" }
+ %struct.QBasicAtomic = type { i32 }
+ %struct.QByteArray = type { %"struct.QByteArray::Data"* }
+ %"struct.QByteArray::Data" = type { %struct.QBasicAtomic, i32, i32, i8*, [1 x i8] }
+ %"struct.QList<ArgumentDef>" = type { %"struct.QList<ArgumentDef>::._19" }
+ %"struct.QList<ArgumentDef>::._19" = type { %struct.QListData }
+ %struct.QListData = type { %"struct.QListData::Data"* }
+ %"struct.QListData::Data" = type { %struct.QBasicAtomic, i32, i32, i32, i8, [1 x i8*] }
+ %"struct.QMap<QByteArray,QByteArray>" = type { %"struct.QMap<QByteArray,QByteArray>::._56" }
+ %"struct.QMap<QByteArray,QByteArray>::._56" = type { %struct.QMapData* }
+ %struct.QMapData = type { %struct.QMapData*, [12 x %struct.QMapData*], %struct.QBasicAtomic, i32, i32, i32, i8 }
+ %struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 }
+@.str9 = external constant [1 x i8] ; <[1 x i8]*> [#uses=1]
+
+declare i32 @strcmp(i8*, i8*)
+
+define i32 @_ZN9Generator6strregEPKc(%struct.Generator* %this, i8* %s) {
+entry:
+ %s_addr.0 = select i1 false, i8* getelementptr ([1 x i8], [1 x i8]* @.str9, i32 0, i32 0), i8* %s ; <i8*> [#uses=2]
+ %tmp122 = icmp eq i8* %s_addr.0, null ; <i1> [#uses=1]
+ br label %bb184
+
+bb55: ; preds = %bb184
+ ret i32 0
+
+bb88: ; preds = %bb184
+ br i1 %tmp122, label %bb154, label %bb128
+
+bb128: ; preds = %bb88
+ %tmp138 = call i32 @strcmp( i8* null, i8* %s_addr.0 ) ; <i32> [#uses=1]
+ %iftmp.37.0.in4 = icmp eq i32 %tmp138, 0 ; <i1> [#uses=1]
+ br i1 %iftmp.37.0.in4, label %bb250, label %bb166
+
+bb154: ; preds = %bb88
+ br i1 false, label %bb250, label %bb166
+
+bb166: ; preds = %bb154, %bb128
+ %tmp175 = add i32 %idx.0, 1 ; <i32> [#uses=1]
+ %tmp177 = add i32 %tmp175, 0 ; <i32> [#uses=1]
+ %tmp181 = add i32 %tmp177, 0 ; <i32> [#uses=1]
+ %tmp183 = add i32 %i33.0, 1 ; <i32> [#uses=1]
+ br label %bb184
+
+bb184: ; preds = %bb166, %entry
+ %i33.0 = phi i32 [ 0, %entry ], [ %tmp183, %bb166 ] ; <i32> [#uses=2]
+ %idx.0 = phi i32 [ 0, %entry ], [ %tmp181, %bb166 ] ; <i32> [#uses=2]
+ %tmp49 = icmp slt i32 %i33.0, 0 ; <i1> [#uses=1]
+ br i1 %tmp49, label %bb88, label %bb55
+
+bb250: ; preds = %bb154, %bb128
+ ret i32 %idx.0
+}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/2007-10-04-DomFrontier.ll b/llvm/test/Transforms/SimpleLoopUnswitch/2007-10-04-DomFrontier.ll
new file mode 100644
index 00000000000..efbb7619591
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/2007-10-04-DomFrontier.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -licm -loop-unroll -disable-output
+
+@resonant = external global i32 ; <i32*> [#uses=2]
+
+define void @weightadj() {
+entry:
+ br label %bb
+
+bb: ; preds = %bb158, %entry
+ store i32 0, i32* @resonant, align 4
+ br i1 false, label %g.exit, label %bb158
+
+g.exit: ; preds = %bb68, %bb
+ br i1 false, label %bb68, label %cond_true
+
+cond_true: ; preds = %g.exit
+ store i32 1, i32* @resonant, align 4
+ br label %bb68
+
+bb68: ; preds = %cond_true, %g.exit
+ %tmp71 = icmp slt i32 0, 0 ; <i1> [#uses=1]
+ br i1 %tmp71, label %g.exit, label %bb158
+
+bb158: ; preds = %bb68, %bb
+ br i1 false, label %bb, label %return
+
+return: ; preds = %bb158
+ ret void
+}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/2008-06-02-DomInfo.ll b/llvm/test/Transforms/SimpleLoopUnswitch/2008-06-02-DomInfo.ll
new file mode 100644
index 00000000000..5db1ced473f
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/2008-06-02-DomInfo.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -simple-loop-unswitch -instcombine -gvn -disable-output
+; RUN: opt < %s -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -instcombine -gvn -disable-output
+; PR2372
+target triple = "i386-pc-linux-gnu"
+
+define i32 @func_3(i16 signext %p_5, i16 signext %p_6) nounwind {
+entry:
+ %tmp3 = icmp eq i16 %p_5, 0 ; <i1> [#uses=1]
+ %tmp1314 = sext i16 %p_6 to i32 ; <i32> [#uses=1]
+ %tmp28 = icmp ugt i32 %tmp1314, 3 ; <i1> [#uses=1]
+ %bothcond = or i1 %tmp28, false ; <i1> [#uses=1]
+ br label %bb
+bb: ; preds = %bb54, %entry
+ br i1 %tmp3, label %bb54, label %bb5
+bb5: ; preds = %bb
+ br i1 %bothcond, label %bb54, label %bb31
+bb31: ; preds = %bb5
+ br label %bb54
+bb54: ; preds = %bb31, %bb5, %bb
+ br i1 false, label %bb64, label %bb
+bb64: ; preds = %bb54
+ %tmp6566 = sext i16 %p_6 to i32 ; <i32> [#uses=1]
+ %tmp68 = tail call i32 (...) @func_18( i32 1, i32 %tmp6566, i32 1 ) nounwind ; <i32> [#uses=0]
+ ret i32 undef
+}
+
+declare i32 @func_18(...)
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/2008-06-17-DomFrontier.ll b/llvm/test/Transforms/SimpleLoopUnswitch/2008-06-17-DomFrontier.ll
new file mode 100644
index 00000000000..e309d60a3e4
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/2008-06-17-DomFrontier.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -licm -simple-loop-unswitch -disable-output
+@g_56 = external global i16 ; <i16*> [#uses=2]
+
+define i32 @func_67(i32 %p_68, i8 signext %p_69, i8 signext %p_71) nounwind {
+entry:
+ br label %bb
+bb: ; preds = %bb44, %entry
+ br label %bb3
+bb3: ; preds = %bb36, %bb
+ %bothcond = or i1 false, false ; <i1> [#uses=1]
+ br i1 %bothcond, label %bb29, label %bb19
+bb19: ; preds = %bb3
+ br i1 false, label %bb36, label %bb29
+bb29: ; preds = %bb19, %bb3
+ ret i32 0
+bb36: ; preds = %bb19
+ store i16 0, i16* @g_56, align 2
+ br i1 false, label %bb44, label %bb3
+bb44: ; preds = %bb44, %bb36
+ %tmp46 = load i16, i16* @g_56, align 2 ; <i16> [#uses=0]
+ br i1 false, label %bb, label %bb44
+}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/2010-11-18-LCSSA.ll b/llvm/test/Transforms/SimpleLoopUnswitch/2010-11-18-LCSSA.ll
new file mode 100644
index 00000000000..f3a382d813b
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/2010-11-18-LCSSA.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -simple-loop-unswitch
+; RUN: opt < %s -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa
+; PR8622
+@g_38 = external global i32, align 4
+
+define void @func_67(i32 %p_68.coerce) nounwind {
+entry:
+ br i1 true, label %for.end12, label %bb.nph
+
+bb.nph: ; preds = %entry
+ %g_38.promoted = load i32, i32* @g_38
+ br label %for.body
+
+for.body: ; preds = %for.cond, %bb.nph
+ %tobool.i = icmp eq i32 %p_68.coerce, 1
+ %xor4.i = xor i32 %p_68.coerce, 1
+ %call1 = select i1 %tobool.i, i32 0, i32 %xor4.i
+ br label %for.cond
+
+for.cond: ; preds = %for.body
+ br i1 true, label %for.cond.for.end12_crit_edge, label %for.body
+
+for.cond.for.end12_crit_edge: ; preds = %for.cond
+ store i32 %call1, i32* @g_38
+ br label %for.end12
+
+for.end12: ; preds = %for.cond.for.end12_crit_edge, %entry
+ ret void
+}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/2011-06-02-CritSwitch.ll b/llvm/test/Transforms/SimpleLoopUnswitch/2011-06-02-CritSwitch.ll
new file mode 100644
index 00000000000..b861d3029d5
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/2011-06-02-CritSwitch.ll
@@ -0,0 +1,29 @@
+; RUN: opt -simple-loop-unswitch -disable-output < %s
+; RUN: opt -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output < %s
+; PR10031
+
+define i32 @test(i32 %command) {
+entry:
+ br label %tailrecurse
+
+tailrecurse: ; preds = %if.then14, %tailrecurse, %entry
+ br i1 undef, label %if.then, label %tailrecurse
+
+if.then: ; preds = %tailrecurse
+ switch i32 %command, label %sw.bb [
+ i32 2, label %land.lhs.true
+ i32 0, label %land.lhs.true
+ ]
+
+land.lhs.true: ; preds = %if.then, %if.then
+ br i1 undef, label %sw.bb, label %if.then14
+
+if.then14: ; preds = %land.lhs.true
+ switch i32 %command, label %tailrecurse [
+ i32 0, label %sw.bb
+ i32 1, label %sw.bb
+ ]
+
+sw.bb: ; preds = %if.then14
+ unreachable
+}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/2011-09-26-EHCrash.ll b/llvm/test/Transforms/SimpleLoopUnswitch/2011-09-26-EHCrash.ll
new file mode 100644
index 00000000000..16886bfec03
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/2011-09-26-EHCrash.ll
@@ -0,0 +1,64 @@
+; RUN: opt < %s -sroa -simple-loop-unswitch -disable-output
+; RUN: opt < %s -sroa -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output
+; PR11016
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.7.2"
+
+%class.MyContainer.1.3.19.29 = type { [6 x %class.MyMemVarClass.0.2.18.28*] }
+%class.MyMemVarClass.0.2.18.28 = type { i32 }
+
+define void @_ZN11MyContainer1fEi(%class.MyContainer.1.3.19.29* %this, i32 %doit) uwtable ssp align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %inc1 = phi i32 [ %inc, %for.inc ], [ 0, %entry ]
+ %conv = sext i32 %inc1 to i64
+ %cmp = icmp ult i64 %conv, 6
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %tobool = icmp ne i32 %doit, 0
+ br i1 %tobool, label %for.inc, label %if.then
+
+if.then: ; preds = %for.body
+ %idxprom = sext i32 %inc1 to i64
+ %array_ = getelementptr inbounds %class.MyContainer.1.3.19.29, %class.MyContainer.1.3.19.29* %this, i32 0, i32 0
+ %arrayidx = getelementptr inbounds [6 x %class.MyMemVarClass.0.2.18.28*], [6 x %class.MyMemVarClass.0.2.18.28*]* %array_, i32 0, i64 %idxprom
+ %tmp4 = load %class.MyMemVarClass.0.2.18.28*, %class.MyMemVarClass.0.2.18.28** %arrayidx, align 8
+ %isnull = icmp eq %class.MyMemVarClass.0.2.18.28* %tmp4, null
+ br i1 %isnull, label %for.inc, label %delete.notnull
+
+delete.notnull: ; preds = %if.then
+ invoke void @_ZN13MyMemVarClassD1Ev(%class.MyMemVarClass.0.2.18.28* %tmp4)
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont: ; preds = %delete.notnull
+ %0 = bitcast %class.MyMemVarClass.0.2.18.28* %tmp4 to i8*
+ call void @_ZdlPv(i8* %0) nounwind
+ br label %for.inc
+
+lpad: ; preds = %delete.notnull
+ %1 = landingpad { i8*, i32 }
+ cleanup
+ %2 = extractvalue { i8*, i32 } %1, 0
+ %3 = extractvalue { i8*, i32 } %1, 1
+ %4 = bitcast %class.MyMemVarClass.0.2.18.28* %tmp4 to i8*
+ call void @_ZdlPv(i8* %4) nounwind
+ %lpad.val = insertvalue { i8*, i32 } undef, i8* %2, 0
+ %lpad.val7 = insertvalue { i8*, i32 } %lpad.val, i32 %3, 1
+ resume { i8*, i32 } %lpad.val7
+
+for.inc: ; preds = %invoke.cont, %if.then, %for.body
+ %inc = add nsw i32 %inc1, 1
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
+
+declare void @_ZN13MyMemVarClassD1Ev(%class.MyMemVarClass.0.2.18.28*)
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @_ZdlPv(i8*) nounwind
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/2012-04-02-IndirectBr.ll b/llvm/test/Transforms/SimpleLoopUnswitch/2012-04-02-IndirectBr.ll
new file mode 100644
index 00000000000..72af9840818
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/2012-04-02-IndirectBr.ll
@@ -0,0 +1,42 @@
+; RUN: opt < %s -S -simple-loop-unswitch -verify-loop-info -verify-dom-info | FileCheck %s
+; RUN: opt < %s -S -simple-loop-unswitch -verify-loop-info -verify-dom-info -enable-mssa-loop-dependency=true -verify-memoryssa | FileCheck %s
+; PR12343: -simple-loop-unswitch crash on indirect branch
+
+; CHECK: %0 = icmp eq i64 undef, 0
+; CHECK-NEXT: br i1 %0, label %"5", label %"4"
+
+; CHECK: "5": ; preds = %entry
+; CHECK-NEXT: br label %"16"
+
+; CHECK: "16": ; preds = %"22", %"5"
+; CHECK-NEXT: indirectbr i8* undef, [label %"22", label %"33"]
+
+; CHECK: "22": ; preds = %"16"
+; CHECK-NEXT: br i1 %0, label %"16", label %"26"
+
+; CHECK: "26": ; preds = %"22"
+; CHECK-NEXT: unreachable
+
+define void @foo() {
+entry:
+ %0 = icmp eq i64 undef, 0
+ br i1 %0, label %"5", label %"4"
+
+"4": ; preds = %entry
+ unreachable
+
+"5": ; preds = %entry
+ br label %"16"
+
+"16": ; preds = %"22", %"5"
+ indirectbr i8* undef, [label %"22", label %"33"]
+
+"22": ; preds = %"16"
+ br i1 %0, label %"16", label %"26"
+
+"26": ; preds = %"22"
+ unreachable
+
+"33": ; preds = %"16"
+ unreachable
+}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/2012-04-30-LoopUnswitch-LPad-Crash.ll b/llvm/test/Transforms/SimpleLoopUnswitch/2012-04-30-LoopUnswitch-LPad-Crash.ll
new file mode 100644
index 00000000000..8f05e58b8eb
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/2012-04-30-LoopUnswitch-LPad-Crash.ll
@@ -0,0 +1,97 @@
+; RUN: opt < %s -basicaa -instcombine -inline -functionattrs -licm -simple-loop-unswitch -gvn -verify
+; PR12573
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.0"
+
+%class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379 = type { %class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376*, %class.B.21.41.65.101.137.157.177.197.237.241.245.249.261.293.301.337.345.378 }
+%class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376 = type { %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* }
+%class.B.21.41.65.101.137.157.177.197.237.241.245.249.261.293.301.337.345.378 = type { %class.A.20.40.64.100.136.156.176.196.236.240.244.248.260.292.300.336.344.377* }
+%class.A.20.40.64.100.136.156.176.196.236.240.244.248.260.292.300.336.344.377 = type { i8 }
+
+define void @_Z23get_reconstruction_pathv() uwtable ssp personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ %c = alloca %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379, align 8
+ br label %for.cond
+
+for.cond: ; preds = %for.end, %entry
+ invoke void @_ZN1DptEv(%class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %c)
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont: ; preds = %for.cond
+ invoke void @_ZN1C3endEv()
+ to label %for.cond3 unwind label %lpad
+
+for.cond3: ; preds = %invoke.cont6, %invoke.cont
+ invoke void @_ZN1DptEv(%class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %c)
+ to label %invoke.cont4 unwind label %lpad
+
+invoke.cont4: ; preds = %for.cond3
+ invoke void @_ZN1C3endEv()
+ to label %invoke.cont6 unwind label %lpad
+
+invoke.cont6: ; preds = %invoke.cont4
+ br i1 undef, label %for.cond3, label %for.end
+
+lpad: ; preds = %for.end, %invoke.cont4, %for.cond3, %invoke.cont, %for.cond
+ %0 = landingpad { i8*, i32 }
+ cleanup
+ resume { i8*, i32 } undef
+
+for.end: ; preds = %invoke.cont6
+ invoke void @_ZN1C13_M_insert_auxER1D()
+ to label %for.cond unwind label %lpad
+}
+
+define void @_ZN1DptEv(%class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %this) uwtable ssp align 2 {
+entry:
+ %this.addr = alloca %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379*, align 8
+ store %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %this, %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379** %this.addr, align 8
+ %this1 = load %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379*, %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379** %this.addr
+ %px = getelementptr inbounds %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379, %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %this1, i32 0, i32 0
+ %0 = load %class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376*, %class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376** %px, align 8
+ %tobool = icmp ne %class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376* %0, null
+ br i1 %tobool, label %cond.end, label %cond.false
+
+cond.false: ; preds = %entry
+ call void @_Z10__assert13v() noreturn
+ unreachable
+
+cond.end: ; preds = %entry
+ ret void
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @_ZN1C3endEv()
+
+define void @_ZN1C13_M_insert_auxER1D() uwtable ssp align 2 {
+entry:
+ ret void
+}
+
+define void @_ZN1DD1Ev() unnamed_addr uwtable inlinehint ssp align 2 {
+entry:
+ ret void
+}
+
+define void @_ZN1DD2Ev() unnamed_addr uwtable inlinehint ssp align 2 {
+entry:
+ ret void
+}
+
+define void @_ZN1BD1Ev() unnamed_addr uwtable ssp align 2 {
+entry:
+ ret void
+}
+
+define void @_ZN1BD2Ev() unnamed_addr uwtable ssp align 2 {
+entry:
+ ret void
+}
+
+define void @_ZN1BaSERS_() uwtable ssp align 2 {
+entry:
+ unreachable
+}
+
+declare void @_Z10__assert13v() noreturn
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/2012-05-20-Phi.ll b/llvm/test/Transforms/SimpleLoopUnswitch/2012-05-20-Phi.ll
new file mode 100644
index 00000000000..e9f35709ba7
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/2012-05-20-Phi.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -simple-loop-unswitch -disable-output
+; RUN: opt < %s -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output
+; PR12887
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@a = common global i32 0, align 4
+@c = common global i32 0, align 4
+@b = common global i32 0, align 4
+
+define void @func() noreturn nounwind uwtable {
+entry:
+ %0 = load i32, i32* @a, align 4
+ %tobool = icmp eq i32 %0, 0
+ %1 = load i32, i32* @b, align 4
+ br label %while.body
+
+while.body: ; preds = %while.body, %entry
+ %d.0 = phi i8 [ undef, %entry ], [ %conv2, %while.body ]
+ %conv = sext i8 %d.0 to i32
+ %cond = select i1 %tobool, i32 0, i32 %conv
+ %conv11 = zext i8 %d.0 to i32
+ %add = add i32 %1, %conv11
+ %conv2 = trunc i32 %add to i8
+ br label %while.body
+}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/2015-09-18-Addrspace.ll b/llvm/test/Transforms/SimpleLoopUnswitch/2015-09-18-Addrspace.ll
new file mode 100644
index 00000000000..c8de642a5d0
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/2015-09-18-Addrspace.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -simple-loop-unswitch -S | FileCheck %s
+; RUN: opt < %s -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -S | FileCheck %s
+
+; In cases where two address spaces do not have the same size pointer, the
+; input for the addrspacecast should not be used as a substitute for itself
+; when manipulating the pointer.
+
+target datalayout = "e-m:e-p:16:16-p1:32:16-i32:16-i64:16-n8:16"
+
+define void @foo() {
+; CHECK-LABEL: @foo
+entry:
+ %arrayidx.i1 = getelementptr inbounds i16, i16* undef, i16 undef
+ %arrayidx.i = addrspacecast i16* %arrayidx.i1 to i16 addrspace(1)*
+ br i1 undef, label %for.body.i, label %bar.exit
+
+for.body.i: ; preds = %for.body.i, %entry
+; When we call makeLoopInvariant (i.e. trivial LICM) on this load, it
+; will try to find the base object to prove deferenceability. If we look
+; through the addrspacecast, we'll fail an assertion about bitwidths matching
+; CHECK-LABEL: for.body.i
+; CHECK: %0 = load i16, i16 addrspace(1)* %arrayidx.i, align 2
+ %0 = load i16, i16 addrspace(1)* %arrayidx.i, align 2
+ %cmp1.i = icmp eq i16 %0, 0
+ br i1 %cmp1.i, label %bar.exit, label %for.body.i
+
+bar.exit: ; preds = %for.body.i, %entry
+ ret void
+}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/LIV-loop-condtion.ll b/llvm/test/Transforms/SimpleLoopUnswitch/LIV-loop-condtion.ll
new file mode 100644
index 00000000000..59c14e937b6
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/LIV-loop-condtion.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -simple-loop-unswitch -S 2>&1 | FileCheck %s
+; RUN: opt < %s -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -S 2>&1 | FileCheck %s
+
+; This is to test trivial loop unswitch only happens when trivial condition
+; itself is an LIV loop condition (not partial LIV which could occur in and/or).
+
+define i32 @test(i1 %cond1, i32 %var1) {
+; CHECK-LABEL: define i32 @test(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond1, label %entry.split, label %loop_exit.split
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br label %loop_begin
+
+loop_begin:
+ %var3 = phi i32 [%var1, %entry], [%var2, %do_something]
+ %cond2 = icmp eq i32 %var3, 10
+ %cond.and = and i1 %cond1, %cond2
+ br i1 %cond.and, label %do_something, label %loop_exit
+; CHECK: loop_begin:
+; CHECK-NEXT: %[[VAR3:.*]] = phi i32
+; CHECK-NEXT: %[[COND2:.*]] = icmp eq i32 %[[VAR3]], 10
+; CHECK-NEXT: %[[COND_AND:.*]] = and i1 true, %[[COND2]]
+; CHECK-NEXT: br i1 %[[COND_AND]], label %do_something, label %loop_exit
+
+do_something:
+ %var2 = add i32 %var3, 1
+ call void @some_func() noreturn nounwind
+ br label %loop_begin
+
+loop_exit:
+ ret i32 0
+}
+
+declare void @some_func() noreturn
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/basictest.ll b/llvm/test/Transforms/SimpleLoopUnswitch/basictest.ll
new file mode 100644
index 00000000000..240f433a8db
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/basictest.ll
@@ -0,0 +1,185 @@
+; RUN: opt -passes='loop(unswitch),verify<loops>' -S < %s | FileCheck %s
+; RUN: opt -enable-mssa-loop-dependency=true -verify-memoryssa -passes='loop(unswitch),verify<loops>' -S < %s | FileCheck %s
+
+define i32 @test(i32* %A, i1 %C) {
+entry:
+ br label %no_exit
+no_exit: ; preds = %no_exit.backedge, %entry
+ %i.0.0 = phi i32 [ 0, %entry ], [ %i.0.0.be, %no_exit.backedge ] ; <i32> [#uses=3]
+ %gep.upgrd.1 = zext i32 %i.0.0 to i64 ; <i64> [#uses=1]
+ %tmp.7 = getelementptr i32, i32* %A, i64 %gep.upgrd.1 ; <i32*> [#uses=4]
+ %tmp.13 = load i32, i32* %tmp.7 ; <i32> [#uses=2]
+ %tmp.14 = add i32 %tmp.13, 1 ; <i32> [#uses=1]
+ store i32 %tmp.14, i32* %tmp.7
+ br i1 %C, label %then, label %endif
+then: ; preds = %no_exit
+ %tmp.29 = load i32, i32* %tmp.7 ; <i32> [#uses=1]
+ %tmp.30 = add i32 %tmp.29, 2 ; <i32> [#uses=1]
+ store i32 %tmp.30, i32* %tmp.7
+ %inc9 = add i32 %i.0.0, 1 ; <i32> [#uses=2]
+ %tmp.112 = icmp ult i32 %inc9, 100000 ; <i1> [#uses=1]
+ br i1 %tmp.112, label %no_exit.backedge, label %return
+no_exit.backedge: ; preds = %endif, %then
+ %i.0.0.be = phi i32 [ %inc9, %then ], [ %inc, %endif ] ; <i32> [#uses=1]
+ br label %no_exit
+endif: ; preds = %no_exit
+ %inc = add i32 %i.0.0, 1 ; <i32> [#uses=2]
+ %tmp.1 = icmp ult i32 %inc, 100000 ; <i1> [#uses=1]
+ br i1 %tmp.1, label %no_exit.backedge, label %return
+return: ; preds = %endif, %then
+ ret i32 %tmp.13
+}
+
+; This simple test would normally unswitch, but should be inhibited by the presence of
+; the noduplicate call.
+
+; CHECK-LABEL: @test2(
+define i32 @test2(i32* %var) {
+ %mem = alloca i32
+ store i32 2, i32* %mem
+ %c = load i32, i32* %mem
+
+ br label %loop_begin
+
+loop_begin:
+
+ %var_val = load i32, i32* %var
+
+ switch i32 %c, label %default [
+ i32 1, label %inc
+ i32 2, label %dec
+ ]
+
+inc:
+ call void @incf() noreturn nounwind
+ br label %loop_begin
+dec:
+; CHECK: call void @decf()
+; CHECK-NOT: call void @decf()
+ call void @decf() noreturn nounwind noduplicate
+ br label %loop_begin
+default:
+ br label %loop_exit
+loop_exit:
+ ret i32 0
+; CHECK: }
+}
+
+; This simple test would normally unswitch, but should be inhibited by the presence of
+; the convergent call that is not control-dependent on the unswitch condition.
+
+; CHECK-LABEL: @test3(
+define i32 @test3(i32* %var) {
+ %mem = alloca i32
+ store i32 2, i32* %mem
+ %c = load i32, i32* %mem
+
+ br label %loop_begin
+
+loop_begin:
+
+ %var_val = load i32, i32* %var
+
+; CHECK: call void @conv()
+; CHECK-NOT: call void @conv()
+ call void @conv() convergent
+
+ switch i32 %c, label %default [
+ i32 1, label %inc
+ i32 2, label %dec
+ ]
+
+inc:
+ call void @incf() noreturn nounwind
+ br label %loop_begin
+dec:
+ call void @decf() noreturn nounwind
+ br label %loop_begin
+default:
+ br label %loop_exit
+loop_exit:
+ ret i32 0
+; CHECK: }
+}
+
+; Make sure we don't unswitch, as we can not find an input value %a
+; that will effectively unswitch 0 or 3 out of the loop.
+;
+; CHECK: define void @and_or_i2_as_switch_input(i2
+; CHECK: entry:
+; This is an indication that the loop has NOT been unswitched.
+; CHECK-NOT: icmp
+; CHECK: br
+define void @and_or_i2_as_switch_input(i2 %a) {
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i2 [ 0, %entry ], [ %inc, %for.inc ]
+ %and = and i2 %a, %i
+ %or = or i2 %and, %i
+ switch i2 %or, label %sw.default [
+ i2 0, label %sw.bb
+ i2 3, label %sw.bb1
+ ]
+
+sw.bb:
+ br label %sw.epilog
+
+sw.bb1:
+ br label %sw.epilog
+
+sw.default:
+ br label %sw.epilog
+
+sw.epilog:
+ br label %for.inc
+
+for.inc:
+ %inc = add nsw i2 %i, 1
+ %cmp = icmp slt i2 %inc, 3
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
+
+; Make sure we don't unswitch, as we can not find an input value %a
+; that will effectively unswitch true/false out of the loop.
+;
+; CHECK: define void @and_or_i1_as_branch_input(i1
+; CHECK: entry:
+; This is an indication that the loop has NOT been unswitched.
+; CHECK-NOT: icmp
+; CHECK: br
+define void @and_or_i1_as_branch_input(i1 %a) {
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i1 [ 0, %entry ], [ %inc, %for.inc ]
+ %and = and i1 %a, %i
+ %or = or i1 %and, %i
+ br i1 %or, label %sw.bb, label %sw.bb1
+
+sw.bb:
+ br label %sw.epilog
+
+sw.bb1:
+ br label %sw.epilog
+
+sw.epilog:
+ br label %for.inc
+
+for.inc:
+ %inc = add nsw i1 %i, 1
+ %cmp = icmp slt i1 %inc, 1
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
+
+declare void @incf() noreturn
+declare void @decf() noreturn
+declare void @conv() convergent
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/cleanuppad.ll b/llvm/test/Transforms/SimpleLoopUnswitch/cleanuppad.ll
new file mode 100644
index 00000000000..1cade22b659
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/cleanuppad.ll
@@ -0,0 +1,45 @@
+; RUN: opt -S -simple-loop-unswitch < %s | FileCheck %s
+; RUN: opt -S -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s
+target triple = "x86_64-pc-win32"
+
+define void @f(i32 %doit, i1 %x, i1 %y) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ %tobool = icmp eq i32 %doit, 0
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ br i1 %x, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ br i1 %tobool, label %if.then, label %for.inc
+
+if.then: ; preds = %for.body
+ br i1 %y, label %for.inc, label %delete.notnull
+
+delete.notnull: ; preds = %if.then
+ invoke void @g()
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont: ; preds = %delete.notnull
+ br label %for.inc
+
+lpad: ; preds = %delete.notnull
+ %cp = cleanuppad within none []
+ cleanupret from %cp unwind to caller
+
+for.inc: ; preds = %invoke.cont, %if.then, %for.body
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
+
+declare void @g()
+
+declare i32 @__CxxFrameHandler3(...)
+
+; CHECK-LABEL: define void @f(
+; CHECK: cleanuppad within none []
+; CHECK-NOT: cleanuppad
+
+attributes #0 = { ssp uwtable }
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/copy-metadata.ll b/llvm/test/Transforms/SimpleLoopUnswitch/copy-metadata.ll
new file mode 100644
index 00000000000..09d7d792c7c
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/copy-metadata.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -simple-loop-unswitch -S | FileCheck %s
+; RUN: opt < %s -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -S | FileCheck %s
+
+; This test checks if unswitched condition preserve make.implicit metadata.
+define i32 @test(i1 %cond) {
+; CHECK-LABEL: @test(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %{{.*}}, label %entry.split, label %loop_exit, !make.implicit !0
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br label %loop_begin
+
+loop_begin:
+ br i1 %cond, label %continue, label %loop_exit, !make.implicit !0
+; CHECK: loop_begin:
+; CHECK-NEXT: br label %continue
+
+continue:
+ call void @some_func()
+ br label %loop_begin
+; CHECK: continue:
+; CHECK-NEXT: call
+; CHECK-NEXT: br label %loop_begin
+
+loop_exit:
+ ret i32 0
+; CHECK: loop_exit:
+; CHECK-NEXT: ret
+}
+
+declare void @some_func()
+
+!0 = !{}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/crash.ll b/llvm/test/Transforms/SimpleLoopUnswitch/crash.ll
new file mode 100644
index 00000000000..cf6a19d2540
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/crash.ll
@@ -0,0 +1,67 @@
+; RUN: opt < %s -simple-loop-unswitch -disable-output
+; RUN: opt < %s -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output
+
+define void @test1(i32* %S2) {
+entry:
+ br i1 false, label %list_Length.exit, label %cond_true.i
+cond_true.i: ; preds = %entry
+ ret void
+list_Length.exit: ; preds = %entry
+ br i1 false, label %list_Length.exit9, label %cond_true.i5
+cond_true.i5: ; preds = %list_Length.exit
+ ret void
+list_Length.exit9: ; preds = %list_Length.exit
+ br i1 false, label %bb78, label %return
+bb44: ; preds = %bb78, %cond_next68
+ br i1 %tmp49.not, label %bb62, label %bb62.loopexit
+bb62.loopexit: ; preds = %bb44
+ br label %bb62
+bb62: ; preds = %bb62.loopexit, %bb44
+ br i1 false, label %return.loopexit, label %cond_next68
+cond_next68: ; preds = %bb62
+ br i1 false, label %return.loopexit, label %bb44
+bb78: ; preds = %list_Length.exit9
+ %tmp49.not = icmp eq i32* %S2, null ; <i1> [#uses=1]
+ br label %bb44
+return.loopexit: ; preds = %cond_next68, %bb62
+ %retval.0.ph = phi i32 [ 1, %cond_next68 ], [ 0, %bb62 ] ; <i32> [#uses=1]
+ br label %return
+return: ; preds = %return.loopexit, %list_Length.exit9
+ %retval.0 = phi i32 [ 0, %list_Length.exit9 ], [ %retval.0.ph, %return.loopexit ] ; <i32> [#uses=0]
+ ret void
+}
+
+define void @test2() nounwind {
+entry:
+ br label %bb.nph
+
+bb.nph: ; preds = %entry
+ %and.i13521 = and <4 x i1> undef, undef ; <<4 x i1>> [#uses=1]
+ br label %for.body
+
+for.body: ; preds = %for.body, %bb.nph
+ %or.i = select <4 x i1> %and.i13521, <4 x i32> undef, <4 x i32> undef ; <<4 x i32>> [#uses=0]
+ br i1 false, label %for.body, label %for.end
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+; PR6879
+define i32* @test3(i32** %p_45, i16 zeroext %p_46, i64 %p_47, i64 %p_48, i16 signext %p_49) nounwind {
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.cond4, %entry
+ br i1 false, label %for.cond4, label %for.end88
+
+for.cond4: ; preds = %for.cond
+ %conv46 = trunc i32 0 to i8 ; <i8> [#uses=2]
+ %cmp60 = icmp sgt i8 %conv46, 124 ; <i1> [#uses=1]
+ %or.cond = and i1 undef, %cmp60 ; <i1> [#uses=1]
+ %cond = select i1 %or.cond, i8 %conv46, i8 undef ; <i8> [#uses=0]
+ br label %for.cond
+
+for.end88: ; preds = %for.cond
+ ret i32* undef
+}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/delete-dead-blocks.ll b/llvm/test/Transforms/SimpleLoopUnswitch/delete-dead-blocks.ll
new file mode 100644
index 00000000000..aa10670cb4e
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/delete-dead-blocks.ll
@@ -0,0 +1,101 @@
+; RUN: opt < %s -simple-loop-unswitch -enable-nontrivial-unswitch -S 2>&1 | FileCheck %s
+; RUN: opt < %s -passes=unswitch -enable-nontrivial-unswitch -S 2>&1 | FileCheck %s
+;
+; Checking that (dead) blocks from inner loop are deleted after unswitch.
+;
+declare void @foo()
+
+; CHECK-LABEL: @Test
+define void @Test(i32) {
+entry:
+ br label %outer
+outer:
+ %oi = phi i32 [ 0, %entry ], [ %oinc, %outer_continue]
+ br label %inner
+inner:
+ %ii = phi i32 [ 0, %outer ], [ %iinc, %continue]
+ call void @foo()
+ switch i32 %0, label %get_out2 [
+ i32 0, label %continue
+ i32 1, label %case1
+ i32 2, label %get_out
+ ]
+;
+; since we unswitch on the above switch, %case1 and %continue blocks
+; become dead in the original loop
+;
+; CHECK-NOT: case1:
+case1:
+ br label %continue
+; CHECK-NOT: {{^}}continue:
+continue:
+ %iinc = add i32 %ii, 1
+ %icmp = icmp eq i32 %ii, 100
+ br i1 %icmp, label %inner, label %outer_continue
+
+outer_continue:
+ %oinc = add i32 %oi, 1
+ %ocmp = icmp eq i32 %oi, 100
+ br i1 %ocmp, label %outer, label %get_out
+
+get_out:
+ ret void
+get_out2:
+ unreachable
+}
+
+;
+; This comes from PR38778
+; CHECK-LABEL: @Test2
+define void @Test2(i32) {
+header:
+ br label %loop
+loop:
+ switch i32 %0, label %continue [
+ i32 -2147483648, label %check
+ i32 98, label %guarded1
+ i32 99, label %guarded2
+ ]
+; CHECK-NOT: {{^}}guarded1:
+guarded1:
+ br i1 undef, label %continue, label %leave
+guarded2:
+ br label %continue
+check:
+ %val = add i32 0, 1
+ br i1 undef, label %continue, label %leave
+continue:
+ br label %loop
+leave:
+ %local = phi i32 [ 0, %guarded1 ], [ %val, %check ]
+ ret void
+}
+
+;
+; Yet another test from PR38778
+;
+; CHECK-LABEL: @Test3
+define void @Test3(i32) {
+header:
+ br label %outer
+outer:
+ %bad_input.i = icmp eq i32 %0, -2147483648
+ br label %inner
+inner:
+ br i1 %bad_input.i, label %overflow, label %switchme
+overflow:
+ br label %continue
+switchme:
+ switch i32 %0, label %continue [
+ i32 88, label %go_out
+ i32 99, label %case2
+ ]
+; CHECK-NOT: {{^}}case2:
+case2:
+ br label %continue
+continue:
+ %local_11_92 = phi i32 [ 0, %switchme ], [ 18, %case2 ], [ 0, %overflow ]
+ br i1 undef, label %outer, label %inner
+go_out:
+ unreachable
+}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/exponential-behavior.ll b/llvm/test/Transforms/SimpleLoopUnswitch/exponential-behavior.ll
new file mode 100644
index 00000000000..1c46ddbf51a
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/exponential-behavior.ll
@@ -0,0 +1,52 @@
+; RUN: opt -simple-loop-unswitch -S < %s | FileCheck %s
+; RUN: opt -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -S < %s | FileCheck %s
+
+define void @f(i32 %n, i32* %ptr) {
+; CHECK-LABEL: @f(
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.inc, %be ]
+ %iv.inc = add i32 %iv, 1
+ %unswitch_cond_root = icmp ne i32 %iv.inc, 42
+ %us.0 = and i1 %unswitch_cond_root, %unswitch_cond_root
+ %us.1 = and i1 %us.0, %us.0
+ %us.2 = and i1 %us.1, %us.1
+ %us.3 = and i1 %us.2, %us.2
+ %us.4 = and i1 %us.3, %us.3
+ %us.5 = and i1 %us.4, %us.4
+ %us.6 = and i1 %us.5, %us.5
+ %us.7 = and i1 %us.6, %us.6
+ %us.8 = and i1 %us.7, %us.7
+ %us.9 = and i1 %us.8, %us.8
+ %us.10 = and i1 %us.9, %us.9
+ %us.11 = and i1 %us.10, %us.10
+ %us.12 = and i1 %us.11, %us.11
+ %us.13 = and i1 %us.12, %us.12
+ %us.14 = and i1 %us.13, %us.13
+ %us.15 = and i1 %us.14, %us.14
+ %us.16 = and i1 %us.15, %us.15
+ %us.17 = and i1 %us.16, %us.16
+ %us.18 = and i1 %us.17, %us.17
+ %us.19 = and i1 %us.18, %us.18
+ %us.20 = and i1 %us.19, %us.19
+ %us.21 = and i1 %us.20, %us.20
+ %us.22 = and i1 %us.21, %us.21
+ %us.23 = and i1 %us.22, %us.22
+ %us.24 = and i1 %us.23, %us.23
+ %us.25 = and i1 %us.24, %us.24
+ %us.26 = and i1 %us.25, %us.25
+ %us.27 = and i1 %us.26, %us.26
+ %us.28 = and i1 %us.27, %us.27
+ %us.29 = and i1 %us.28, %us.28
+ br i1 %us.29, label %leave, label %be
+
+be:
+ store volatile i32 0, i32* %ptr
+ %becond = icmp ult i32 %iv.inc, %n
+ br i1 %becond, label %leave, label %loop
+
+leave:
+ ret void
+}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch-nested.ll b/llvm/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch-nested.ll
new file mode 100644
index 00000000000..711c476a5e5
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch-nested.ll
@@ -0,0 +1,139 @@
+;
+; There should be just a single copy of each loop when strictest mutiplier
+; candidates formula (unscaled candidates == 0) is enforced:
+
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN: -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=1 \
+; RUN: -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN: -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=16 \
+; RUN: -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1
+;
+;
+; When we relax the candidates part of a multiplier formula
+; (unscaled candidates == 4) we start getting some unswitches,
+; which leads to siblings multiplier kicking in.
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN: -unswitch-num-initial-unscaled-candidates=4 -unswitch-siblings-toplevel-div=1 \
+; RUN: -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | \
+; RUN: sort -b -k 1 | FileCheck %s --check-prefixes=LOOP-UNSCALE4-DIV1
+;
+; NB: sort -b is essential here and below, otherwise blanks might lead to different
+; order depending on locale.
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN: -unswitch-num-initial-unscaled-candidates=4 -unswitch-siblings-toplevel-div=2 \
+; RUN: -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | \
+; RUN: sort -b -k 1 | FileCheck %s --check-prefixes=LOOP-UNSCALE4-DIV2
+;
+;
+; Get
+; 2^(num conds) == 2^5 = 32
+; loop nests when cost multiplier is disabled:
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=false \
+; RUN: -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | \
+; RUN: sort -b -k 1 | FileCheck %s --check-prefixes=LOOP32
+;
+; Single loop nest, not unswitched
+; LOOP1: Loop at depth 1 containing:
+; LOOP1: Loop at depth 2 containing:
+; LOOP1: Loop at depth 3 containing:
+; LOOP1-NOT: Loop at depth {{[0-9]+}} containing:
+;
+; Half unswitched loop nests, with unscaled4 and div1 it gets less depth1 loops unswitched
+; since they have more cost.
+; LOOP-UNSCALE4-DIV1-COUNT-6: Loop at depth 1 containing:
+; LOOP-UNSCALE4-DIV1-COUNT-19: Loop at depth 2 containing:
+; LOOP-UNSCALE4-DIV1-COUNT-29: Loop at depth 3 containing:
+; LOOP-UNSCALE4-DIV1-NOT: Loop at depth {{[0-9]+}} containing:
+;
+; Half unswitched loop nests, with unscaled4 and div2 it gets more depth1 loops unswitched
+; as div2 kicks in.
+; LOOP-UNSCALE4-DIV2-COUNT-11: Loop at depth 1 containing:
+; LOOP-UNSCALE4-DIV2-COUNT-22: Loop at depth 2 containing:
+; LOOP-UNSCALE4-DIV2-COUNT-29: Loop at depth 3 containing:
+; LOOP-UNSCALE4-DIV2-NOT: Loop at depth {{[0-9]+}} containing:
+;
+; 32 loop nests, fully unswitched
+; LOOP32-COUNT-32: Loop at depth 1 containing:
+; LOOP32-COUNT-32: Loop at depth 2 containing:
+; LOOP32-COUNT-32: Loop at depth 3 containing:
+; LOOP32-NOT: Loop at depth {{[0-9]+}} containing:
+
+declare void @bar()
+
+define void @loop_nested3_conds5(i32* %addr, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5) {
+entry:
+ %addr1 = getelementptr i32, i32* %addr, i64 0
+ %addr2 = getelementptr i32, i32* %addr, i64 1
+ %addr3 = getelementptr i32, i32* %addr, i64 2
+ br label %outer
+outer:
+ %iv1 = phi i32 [0, %entry], [%iv1.next, %outer_latch]
+ %iv1.next = add i32 %iv1, 1
+ ;; skip nontrivial unswitch
+ call void @bar()
+ br label %middle
+middle:
+ %iv2 = phi i32 [0, %outer], [%iv2.next, %middle_latch]
+ %iv2.next = add i32 %iv2, 1
+ ;; skip nontrivial unswitch
+ call void @bar()
+ br label %loop
+loop:
+ %iv3 = phi i32 [0, %middle], [%iv3.next, %loop_latch]
+ %iv3.next = add i32 %iv3, 1
+ ;; skip nontrivial unswitch
+ call void @bar()
+ br i1 %c1, label %loop_next1_left, label %loop_next1_right
+loop_next1_left:
+ br label %loop_next1
+loop_next1_right:
+ br label %loop_next1
+
+loop_next1:
+ br i1 %c2, label %loop_next2_left, label %loop_next2_right
+loop_next2_left:
+ br label %loop_next2
+loop_next2_right:
+ br label %loop_next2
+
+loop_next2:
+ br i1 %c3, label %loop_next3_left, label %loop_next3_right
+loop_next3_left:
+ br label %loop_next3
+loop_next3_right:
+ br label %loop_next3
+
+loop_next3:
+ br i1 %c4, label %loop_next4_left, label %loop_next4_right
+loop_next4_left:
+ br label %loop_next4
+loop_next4_right:
+ br label %loop_next4
+
+loop_next4:
+ br i1 %c5, label %loop_latch_left, label %loop_latch_right
+loop_latch_left:
+ br label %loop_latch
+loop_latch_right:
+ br label %loop_latch
+
+loop_latch:
+ store volatile i32 0, i32* %addr1
+ %test_loop = icmp slt i32 %iv3, 50
+ br i1 %test_loop, label %loop, label %middle_latch
+middle_latch:
+ store volatile i32 0, i32* %addr2
+ %test_middle = icmp slt i32 %iv2, 50
+ br i1 %test_middle, label %middle, label %outer_latch
+outer_latch:
+ store volatile i32 0, i32* %addr3
+ %test_outer = icmp slt i32 %iv1, 50
+ br i1 %test_outer, label %outer, label %exit
+exit:
+ ret void
+}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch-nested2.ll b/llvm/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch-nested2.ll
new file mode 100644
index 00000000000..447d42beeb4
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch-nested2.ll
@@ -0,0 +1,149 @@
+;
+; Here all the branches we unswitch are exiting from the inner loop.
+; That means we should not be getting exponential behavior on inner-loop
+; unswitch. In fact there should be just a single version of inner-loop,
+; with possibly some outer loop copies.
+;
+; There should be just a single copy of each loop when strictest mutiplier
+; candidates formula (unscaled candidates == 0) is enforced:
+
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN: -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=1 \
+; RUN: -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN: -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=16 \
+; RUN: -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1
+;
+;
+; When we relax the candidates part of a multiplier formula
+; (unscaled candidates == 2) we start getting some unswitches in outer loops,
+; which leads to siblings multiplier kicking in.
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN: -unswitch-num-initial-unscaled-candidates=3 -unswitch-siblings-toplevel-div=1 \
+; RUN: -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | \
+; RUN: sort -b -k 1 | FileCheck %s --check-prefixes=LOOP-UNSCALE3-DIV1
+;
+; NB: sort -b is essential here and below, otherwise blanks might lead to different
+; order depending on locale.
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN: -unswitch-num-initial-unscaled-candidates=3 -unswitch-siblings-toplevel-div=2 \
+; RUN: -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | \
+; RUN: sort -b -k 1 | FileCheck %s --check-prefixes=LOOP-UNSCALE3-DIV2
+;
+; With disabled cost-multiplier we get maximal possible amount of unswitches.
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=false \
+; RUN: -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | \
+; RUN: sort -b -k 1 | FileCheck %s --check-prefixes=LOOP-MAX
+;
+; Single loop nest, not unswitched
+; LOOP1: Loop at depth 1 containing:
+; LOOP1-NOT: Loop at depth 1 containing:
+; LOOP1: Loop at depth 2 containing:
+; LOOP1-NOT: Loop at depth 2 containing:
+; LOOP1: Loop at depth 3 containing:
+; LOOP1-NOT: Loop at depth 3 containing:
+;
+; Half unswitched loop nests, with unscaled3 and div1 it gets less depth1 loops unswitched
+; since they have more cost.
+; LOOP-UNSCALE3-DIV1-COUNT-4: Loop at depth 1 containing:
+; LOOP-UNSCALE3-DIV1-NOT: Loop at depth 1 containing:
+; LOOP-UNSCALE3-DIV1-COUNT-1: Loop at depth 2 containing:
+; LOOP-UNSCALE3-DIV1-NOT: Loop at depth 2 containing:
+; LOOP-UNSCALE3-DIV1-COUNT-1: Loop at depth 3 containing:
+; LOOP-UNSCALE3-DIV1-NOT: Loop at depth 3 containing:
+;
+; Half unswitched loop nests, with unscaled3 and div2 it gets more depth1 loops unswitched
+; as div2 kicks in.
+; LOOP-UNSCALE3-DIV2-COUNT-6: Loop at depth 1 containing:
+; LOOP-UNSCALE3-DIV2-NOT: Loop at depth 1 containing:
+; LOOP-UNSCALE3-DIV2-COUNT-1: Loop at depth 2 containing:
+; LOOP-UNSCALE3-DIV2-NOT: Loop at depth 2 containing:
+; LOOP-UNSCALE3-DIV2-COUNT-1: Loop at depth 3 containing:
+; LOOP-UNSCALE3-DIV2-NOT: Loop at depth 3 containing:
+;
+; Maximally unswitched (copy of the outer loop per each condition)
+; LOOP-MAX-COUNT-6: Loop at depth 1 containing:
+; LOOP-MAX-NOT: Loop at depth 1 containing:
+; LOOP-MAX-COUNT-1: Loop at depth 2 containing:
+; LOOP-MAX-NOT: Loop at depth 2 containing:
+; LOOP-MAX-COUNT-1: Loop at depth 3 containing:
+; LOOP-MAX-NOT: Loop at depth 3 containing:
+
+declare void @bar()
+
+define void @loop_nested3_conds5(i32* %addr, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5) {
+entry:
+ %addr1 = getelementptr i32, i32* %addr, i64 0
+ %addr2 = getelementptr i32, i32* %addr, i64 1
+ %addr3 = getelementptr i32, i32* %addr, i64 2
+ br label %outer
+outer:
+ %iv1 = phi i32 [0, %entry], [%iv1.next, %outer_latch]
+ %iv1.next = add i32 %iv1, 1
+ ;; skip nontrivial unswitch
+ call void @bar()
+ br label %middle
+middle:
+ %iv2 = phi i32 [0, %outer], [%iv2.next, %middle_latch]
+ %iv2.next = add i32 %iv2, 1
+ ;; skip nontrivial unswitch
+ call void @bar()
+ br label %loop
+loop:
+ %iv3 = phi i32 [0, %middle], [%iv3.next, %loop_latch]
+ %iv3.next = add i32 %iv3, 1
+ ;; skip nontrivial unswitch
+ call void @bar()
+ br i1 %c1, label %loop_next1_left, label %outer_latch
+loop_next1_left:
+ br label %loop_next1
+loop_next1_right:
+ br label %loop_next1
+
+loop_next1:
+ br i1 %c2, label %loop_next2_left, label %outer_latch
+loop_next2_left:
+ br label %loop_next2
+loop_next2_right:
+ br label %loop_next2
+
+loop_next2:
+ br i1 %c3, label %loop_next3_left, label %outer_latch
+loop_next3_left:
+ br label %loop_next3
+loop_next3_right:
+ br label %loop_next3
+
+loop_next3:
+ br i1 %c4, label %loop_next4_left, label %outer_latch
+loop_next4_left:
+ br label %loop_next4
+loop_next4_right:
+ br label %loop_next4
+
+loop_next4:
+ br i1 %c5, label %loop_latch_left, label %outer_latch
+loop_latch_left:
+ br label %loop_latch
+loop_latch_right:
+ br label %loop_latch
+
+loop_latch:
+ store volatile i32 0, i32* %addr1
+ %test_loop = icmp slt i32 %iv3, 50
+ br i1 %test_loop, label %loop, label %middle_latch
+middle_latch:
+ store volatile i32 0, i32* %addr2
+ %test_middle = icmp slt i32 %iv2, 50
+ br i1 %test_middle, label %middle, label %outer_latch
+outer_latch:
+ store volatile i32 0, i32* %addr3
+ %test_outer = icmp slt i32 %iv1, 50
+ br i1 %test_outer, label %outer, label %exit
+exit:
+ ret void
+}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch.ll b/llvm/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch.ll
new file mode 100644
index 00000000000..d013c4f6362
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch.ll
@@ -0,0 +1,80 @@
+;
+; There should be just a single copy of loop when strictest mutiplier candidates
+; formula (unscaled candidates == 0) is enforced:
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN: -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=1 \
+; RUN: -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN: -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=8 \
+; RUN: -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1
+;
+; With relaxed candidates multiplier (unscaled candidates == 8) we should allow
+; some unswitches to happen until siblings multiplier starts kicking in:
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN: -unswitch-num-initial-unscaled-candidates=8 -unswitch-siblings-toplevel-div=1 \
+; RUN: -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP5
+;
+; With relaxed candidates multiplier (unscaled candidates == 8) and with relaxed
+; siblings multiplier for top-level loops (toplevel-div == 8) we should get
+; 2^(num conds) == 2^5 == 32
+; copies of the loop:
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN: -unswitch-num-initial-unscaled-candidates=8 -unswitch-siblings-toplevel-div=8 \
+; RUN: -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP32
+;
+; Similarly get
+; 2^(num conds) == 2^5 == 32
+; copies of the loop when cost multiplier is disabled:
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=false \
+; RUN: -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP32
+;
+;
+; Single loop, not unswitched
+; LOOP1: Loop at depth 1 containing:
+; LOOP1-NOT: Loop at depth 1 containing:
+
+; 5 loops, unswitched 4 times
+; LOOP5-COUNT-5: Loop at depth 1 containing:
+; LOOP5-NOT: Loop at depth 1 containing:
+
+; 32 loops, fully unswitched
+; LOOP32-COUNT-32: Loop at depth 1 containing:
+; LOOP32-NOT: Loop at depth 1 containing:
+
+define void @loop_simple5(i32* %addr, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5) {
+entry:
+ br label %loop
+loop:
+ %iv = phi i32 [0, %entry], [%iv.next, %loop_latch]
+ %iv.next = add i32 %iv, 1
+ br i1 %c1, label %loop_next1, label %loop_next1_right
+loop_next1_right:
+ br label %loop_next1
+loop_next1:
+ br i1 %c2, label %loop_next2, label %loop_next2_right
+loop_next2_right:
+ br label %loop_next2
+loop_next2:
+ br i1 %c3, label %loop_next3, label %loop_next3_right
+loop_next3_right:
+ br label %loop_next3
+loop_next3:
+ br i1 %c4, label %loop_next4, label %loop_next4_right
+loop_next4_right:
+ br label %loop_next4
+loop_next4:
+ br i1 %c5, label %loop_latch, label %loop_latch_right
+loop_latch_right:
+ br label %loop_latch
+loop_latch:
+ store volatile i32 0, i32* %addr
+ %test_loop = icmp slt i32 %iv, 50
+ br i1 %test_loop, label %loop, label %exit
+exit:
+ ret void
+}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch2.ll b/llvm/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch2.ll
new file mode 100644
index 00000000000..b9875406933
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch2.ll
@@ -0,0 +1,56 @@
+;
+; Here all the branches are exiting ones. Checking that we dont have
+; exponential behavior with any kind of controlling heuristics here.
+;
+; There we should have just a single loop.
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN: -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=1 \
+; RUN: -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN: -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=8 \
+; RUN: -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN: -unswitch-num-initial-unscaled-candidates=8 -unswitch-siblings-toplevel-div=1 \
+; RUN: -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN: -unswitch-num-initial-unscaled-candidates=8 -unswitch-siblings-toplevel-div=8 \
+; RUN: -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=false \
+; RUN: -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1
+;
+;
+; Single loop, not unswitched
+; LOOP1: Loop at depth 1 containing:
+; LOOP1-NOT: Loop at depth 1 containing:
+
+declare void @bar()
+
+define void @loop_simple5(i32* %addr, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5) {
+entry:
+ br label %loop
+loop:
+ %iv = phi i32 [0, %entry], [%iv.next, %loop_latch]
+ %iv.next = add i32 %iv, 1
+ ;; disabling trivial unswitch
+ call void @bar()
+ br i1 %c1, label %loop_next1, label %exit
+loop_next1:
+ br i1 %c2, label %loop_next2, label %exit
+loop_next2:
+ br i1 %c3, label %loop_next3, label %exit
+loop_next3:
+ br i1 %c4, label %loop_next4, label %exit
+loop_next4:
+ br i1 %c5, label %loop_latch, label %exit
+loop_latch:
+ store volatile i32 0, i32* %addr
+ %test_loop = icmp slt i32 %iv, 50
+ br i1 %test_loop, label %loop, label %exit
+exit:
+ ret void
+}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/exponential-switch-unswitch.ll b/llvm/test/Transforms/SimpleLoopUnswitch/exponential-switch-unswitch.ll
new file mode 100644
index 00000000000..407b632764e
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/exponential-switch-unswitch.ll
@@ -0,0 +1,118 @@
+;
+; Here we have 5-way unswitchable switch with each successor also having an unswitchable
+; exiting branch in it. If we start unswitching those branches we start duplicating the
+; whole switch. This can easily lead to exponential behavior w/o proper control.
+; On a real-life testcase there was 16-way switch and that took forever to compile w/o
+; a cost control.
+;
+;
+; When we use the stricted multiplier candidates formula (unscaled candidates == 0)
+; we should be getting just a single loop.
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN: -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=1 \
+; RUN: -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN: -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=16 \
+; RUN: -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1
+;
+;
+; With relaxed candidates multiplier (unscaled candidates == 8) we should allow
+; some unswitches to happen until siblings multiplier starts kicking in:
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN: -unswitch-num-initial-unscaled-candidates=8 -unswitch-siblings-toplevel-div=1 \
+; RUN: -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | \
+; RUN: sort -b -k 1 | FileCheck %s --check-prefixes=LOOP-RELAX
+;
+; With relaxed candidates multiplier (unscaled candidates == 8) and with relaxed
+; siblings multiplier for top-level loops (toplevel-div == 8) we should get
+; considerably more copies of the loop (especially top-level ones).
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN: -unswitch-num-initial-unscaled-candidates=8 -unswitch-siblings-toplevel-div=8 \
+; RUN: -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | \
+; RUN: sort -b -k 1 | FileCheck %s --check-prefixes=LOOP-RELAX2
+;
+; We get hundreds of copies of the loop when cost multiplier is disabled:
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=false \
+; RUN: -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | \
+; RUN: sort -b -k 1 | FileCheck %s --check-prefixes=LOOP-MAX
+;
+
+; Single loop nest, not unswitched
+; LOOP1: Loop at depth 1 containing:
+; LOOP1-NOT: Loop at depth 1 containing:
+; LOOP1: Loop at depth 2 containing:
+; LOOP1-NOT: Loop at depth 2 containing:
+;
+; Somewhat relaxed restrictions on candidates:
+; LOOP-RELAX-COUNT-5: Loop at depth 1 containing:
+; LOOP-RELAX-NOT: Loop at depth 1 containing:
+; LOOP-RELAX-COUNT-32: Loop at depth 2 containing:
+; LOOP-RELAX-NOT: Loop at depth 2 containing:
+;
+; Even more relaxed restrictions on candidates and siblings.
+; LOOP-RELAX2-COUNT-11: Loop at depth 1 containing:
+; LOOP-RELAX2-NOT: Loop at depth 1 containing:
+; LOOP-RELAX2-COUNT-40: Loop at depth 2 containing:
+; LOOP-RELAX-NOT: Loop at depth 2 containing:
+;
+; Unswitched as much as it could (with multiplier disabled).
+; LOOP-MAX-COUNT-56: Loop at depth 1 containing:
+; LOOP-MAX-NOT: Loop at depth 1 containing:
+; LOOP-MAX-COUNT-111: Loop at depth 2 containing:
+; LOOP-MAX-NOT: Loop at depth 2 containing:
+
+define i32 @loop_switch(i32* %addr, i32 %c1, i32 %c2) {
+entry:
+ %addr1 = getelementptr i32, i32* %addr, i64 0
+ %addr2 = getelementptr i32, i32* %addr, i64 1
+ %check0 = icmp eq i32 %c2, 0
+ %check1 = icmp eq i32 %c2, 31
+ %check2 = icmp eq i32 %c2, 32
+ %check3 = icmp eq i32 %c2, 33
+ %check4 = icmp eq i32 %c2, 34
+ br label %outer_loop
+
+outer_loop:
+ %iv1 = phi i32 [0, %entry], [%iv1.next, %outer_latch]
+ %iv1.next = add i32 %iv1, 1
+ br label %inner_loop
+inner_loop:
+ %iv2 = phi i32 [0, %outer_loop], [%iv2.next, %inner_latch]
+ %iv2.next = add i32 %iv2, 1
+ switch i32 %c1, label %inner_latch [
+ i32 0, label %case0
+ i32 1, label %case1
+ i32 2, label %case2
+ i32 3, label %case3
+ i32 4, label %case4
+ ]
+
+case4:
+ br i1 %check4, label %exit, label %inner_latch
+case3:
+ br i1 %check3, label %exit, label %inner_latch
+case2:
+ br i1 %check2, label %exit, label %inner_latch
+case1:
+ br i1 %check1, label %exit, label %inner_latch
+case0:
+ br i1 %check0, label %exit, label %inner_latch
+
+inner_latch:
+ store volatile i32 0, i32* %addr1
+ %test_inner = icmp slt i32 %iv2, 50
+ br i1 %test_inner, label %inner_loop, label %outer_latch
+
+outer_latch:
+ store volatile i32 0, i32* %addr2
+ %test_outer = icmp slt i32 %iv1, 50
+ br i1 %test_outer, label %outer_loop, label %exit
+
+exit: ; preds = %bci_0
+ ret i32 1
+}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/formDedicatedAfterTrivial1.ll b/llvm/test/Transforms/SimpleLoopUnswitch/formDedicatedAfterTrivial1.ll
new file mode 100644
index 00000000000..b6cbbc48a4d
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/formDedicatedAfterTrivial1.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -simple-loop-unswitch -disable-output
+
+; PR38283
+; PR38737
+define void @f1() {
+for.cond1thread-pre-split.lr.ph.lr.ph:
+ %tobool4 = icmp eq i16 undef, 0
+ br label %for.cond1thread-pre-split
+
+for.cond1thread-pre-split: ; preds = %if.end, %for.cond1thread-pre-split.lr.ph.lr.ph
+ %tobool3 = icmp eq i16 undef, 0
+ br label %for.body2
+
+for.body2: ; preds = %if.end6, %for.cond1thread-pre-split
+ br i1 %tobool3, label %if.end, label %for.end
+
+if.end: ; preds = %for.body2
+ br i1 %tobool4, label %if.end6, label %for.cond1thread-pre-split
+
+if.end6: ; preds = %if.end
+ br i1 undef, label %for.body2, label %for.end
+
+for.end: ; preds = %if.end6, %for.body2
+ ret void
+}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/formDedicatedAfterTrivial2.ll b/llvm/test/Transforms/SimpleLoopUnswitch/formDedicatedAfterTrivial2.ll
new file mode 100644
index 00000000000..1ac4bbd21b5
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/formDedicatedAfterTrivial2.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -simple-loop-unswitch -disable-output
+
+; PR38283
+; PR38737
+define void @Test(i32) {
+entry:
+ %trunc = trunc i32 %0 to i3
+ br label %outer
+outer:
+ br label %inner
+inner:
+ switch i3 %trunc, label %crit_edge [
+ i3 2, label %break
+ i3 1, label %loopexit
+ ]
+crit_edge:
+ br i1 true, label %loopexit, label %inner
+loopexit:
+ ret void
+break:
+ br label %outer
+}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/formDedicatedAfterTrivial3.ll b/llvm/test/Transforms/SimpleLoopUnswitch/formDedicatedAfterTrivial3.ll
new file mode 100644
index 00000000000..64f285db17f
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/formDedicatedAfterTrivial3.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -simple-loop-unswitch -disable-output
+
+; PR38283
+; PR38737
+declare void @func_1()
+
+define void @func_9(i32 signext %arg) {
+bb:
+ br label %bb5
+bb5: ; preds = %bb24, %bb
+ %tmp3.0 = phi i32 [ undef, %bb ], [ %tmp29, %bb24 ]
+ %tmp11 = icmp eq i32 %arg, 0
+ %tmp15 = icmp eq i32 %tmp3.0, 0
+ %spec.select = select i1 %tmp15, i32 0, i32 49
+ %tmp1.2 = select i1 %tmp11, i32 %spec.select, i32 9
+ %trunc = trunc i32 %tmp1.2 to i6
+ br label %bb9
+
+bb9: ; preds = %bb5, %bb19
+ %tmp2.03 = phi i32 [ 0, %bb5 ], [ %tmp21, %bb19 ]
+ switch i6 %trunc, label %bb24 [
+ i6 0, label %bb19
+ i6 -15, label %bb22
+ ]
+
+bb19: ; preds = %bb9
+ %tmp21 = add nuw nsw i32 %tmp2.03, 1
+ %tmp8 = icmp eq i32 %tmp21, 25
+ br i1 %tmp8, label %bb22, label %bb9
+
+bb22: ; preds = %bb19, %bb9
+ unreachable
+
+bb24: ; preds = %bb9
+ %tmp29 = or i32 %tmp3.0, 1
+ br label %bb5
+}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll b/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll
new file mode 100644
index 00000000000..f1b92524332
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll
@@ -0,0 +1,239 @@
+; RUN: opt -passes='loop(unswitch),verify<loops>' -enable-nontrivial-unswitch -simple-loop-unswitch-guards -S < %s | FileCheck %s
+; RUN: opt -simple-loop-unswitch -enable-nontrivial-unswitch -simple-loop-unswitch-guards -S < %s | FileCheck %s
+; RUN: opt -passes='loop(unswitch),verify<loops>' -enable-nontrivial-unswitch -simple-loop-unswitch-guards -enable-mssa-loop-dependency=true -verify-memoryssa -S < %s | FileCheck %s
+
+declare void @llvm.experimental.guard(i1, ...)
+
+define void @test_simple_case(i1 %cond, i32 %N) {
+; CHECK-LABEL: @test_simple_case(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split.us:
+; CHECK-NEXT: br label [[LOOP_US:%.*]]
+; CHECK: loop.us:
+; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[GUARDED_US:%.*]] ]
+; CHECK-NEXT: br label [[GUARDED_US]]
+; CHECK: guarded.us:
+; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1
+; CHECK-NEXT: [[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]]
+; CHECK-NEXT: br i1 [[LOOP_COND_US]], label [[LOOP_US]], label [[EXIT_SPLIT_US:%.*]]
+; CHECK: deopt:
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
+; CHECK-NEXT: unreachable
+;
+
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
+ %iv.next = add i32 %iv, 1
+ %loop.cond = icmp slt i32 %iv.next, %N
+ br i1 %loop.cond, label %loop, label %exit
+
+exit:
+ ret void
+}
+
+define void @test_two_guards(i1 %cond1, i1 %cond2, i32 %N) {
+; CHECK-LABEL: @test_two_guards(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[COND1:%.*]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split.us:
+; CHECK-NEXT: br i1 [[COND2:%.*]], label [[ENTRY_SPLIT_US_SPLIT_US:%.*]], label [[ENTRY_SPLIT_US_SPLIT:%.*]]
+; CHECK: entry.split.us.split.us:
+; CHECK-NEXT: br label [[LOOP_US_US:%.*]]
+; CHECK: loop.us.us:
+; CHECK-NEXT: [[IV_US_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US_SPLIT_US]] ], [ [[IV_NEXT_US_US:%.*]], [[GUARDED_US2:%.*]] ]
+; CHECK-NEXT: br label [[GUARDED_US_US:%.*]]
+; CHECK: guarded.us.us:
+; CHECK-NEXT: br label [[GUARDED_US2]]
+; CHECK: guarded.us2:
+; CHECK-NEXT: [[IV_NEXT_US_US]] = add i32 [[IV_US_US]], 1
+; CHECK-NEXT: [[LOOP_COND_US_US:%.*]] = icmp slt i32 [[IV_NEXT_US_US]], [[N:%.*]]
+; CHECK-NEXT: br i1 [[LOOP_COND_US_US]], label [[LOOP_US_US]], label [[EXIT_SPLIT_US_SPLIT_US:%.*]]
+; CHECK: deopt1:
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
+; CHECK-NEXT: unreachable
+; CHECK: deopt:
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
+; CHECK-NEXT: unreachable
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ call void (i1, ...) @llvm.experimental.guard(i1 %cond1) [ "deopt"() ]
+ call void (i1, ...) @llvm.experimental.guard(i1 %cond2) [ "deopt"() ]
+ %iv.next = add i32 %iv, 1
+ %loop.cond = icmp slt i32 %iv.next, %N
+ br i1 %loop.cond, label %loop, label %exit
+
+exit:
+ ret void
+}
+
+define void @test_conditional_guards(i1 %cond, i32 %N) {
+; CHECK-LABEL: @test_conditional_guards(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split.us:
+; CHECK-NEXT: br label [[LOOP_US:%.*]]
+; CHECK: loop.us:
+; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[BACKEDGE_US:%.*]] ]
+; CHECK-NEXT: [[CONDITION_US:%.*]] = icmp eq i32 [[IV_US]], 123
+; CHECK-NEXT: br i1 [[CONDITION_US]], label [[GUARD_US:%.*]], label [[BACKEDGE_US]]
+; CHECK: guard.us:
+; CHECK-NEXT: br label [[GUARDED_US:%.*]]
+; CHECK: backedge.us:
+; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1
+; CHECK-NEXT: [[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]]
+; CHECK-NEXT: br i1 [[LOOP_COND_US]], label [[LOOP_US]], label [[EXIT_SPLIT_US:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
+; CHECK-NEXT: [[CONDITION:%.*]] = icmp eq i32 [[IV]], 123
+; CHECK-NEXT: br i1 [[CONDITION]], label [[GUARD:%.*]], label [[BACKEDGE]]
+; CHECK: guard:
+; CHECK-NEXT: br label [[DEOPT:%.*]]
+; CHECK: deopt:
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
+; CHECK-NEXT: unreachable
+; CHECK: backedge:
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[LOOP_COND]], label %loop, label [[EXIT_SPLIT:%.*]]
+;
+
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]
+ %condition = icmp eq i32 %iv, 123
+ br i1 %condition, label %guard, label %backedge
+
+guard:
+ call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
+ br label %backedge
+
+backedge:
+ %iv.next = add i32 %iv, 1
+ %loop.cond = icmp slt i32 %iv.next, %N
+ br i1 %loop.cond, label %loop, label %exit
+
+exit:
+ ret void
+}
+
+define void @test_nested_loop(i1 %cond, i32 %N) {
+; CHECK-LABEL: @test_nested_loop(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[ENTRY_SPLIT:%.*]], label [[OUTER_LOOP_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: br label [[OUTER_LOOP:%.*]]
+; CHECK: outer_loop:
+; CHECK-NEXT: br label [[OUTER_LOOP_SPLIT_US:%.*]]
+; CHECK: outer_loop.split.us:
+; CHECK-NEXT: br label [[LOOP_US:%.*]]
+; CHECK: loop.us:
+; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ 0, [[OUTER_LOOP_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[GUARDED_US:%.*]] ]
+; CHECK-NEXT: br label [[GUARDED_US]]
+; CHECK: guarded.us:
+; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1
+; CHECK-NEXT: [[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]]
+; CHECK-NEXT: br i1 [[LOOP_COND_US]], label [[LOOP_US]], label [[OUTER_BACKEDGE_SPLIT_US:%.*]]
+; CHECK: outer_backedge.split.us:
+; CHECK-NEXT: br label [[OUTER_BACKEDGE:%.*]]
+; CHECK: deopt:
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
+; CHECK-NEXT: unreachable
+; CHECK: outer_backedge:
+; CHECK-NEXT: br i1 false, label [[OUTER_LOOP]], label [[EXIT:%.*]]
+;
+
+entry:
+ br label %outer_loop
+
+outer_loop:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %outer_loop ], [ %iv.next, %loop ]
+ call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
+ %iv.next = add i32 %iv, 1
+ %loop.cond = icmp slt i32 %iv.next, %N
+ br i1 %loop.cond, label %loop, label %outer_backedge
+
+outer_backedge:
+ br i1 undef, label %outer_loop, label %exit
+
+exit:
+ ret void
+}
+
+define void @test_sibling_loops(i1 %cond1, i1 %cond2, i32 %N) {
+; CHECK-LABEL: @test_sibling_loops(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[COND1:%.*]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]]
+; CHECK: [[IV1_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV1_NEXT_US:%.*]], [[GUARDED_US:%.*]] ]
+; CHECK-NEXT: br label [[GUARDED_US]]
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
+; CHECK-NEXT: unreachable
+; CHECK: [[IV2_US:%.*]] = phi i32 [ 0, [[BETWEEN:%.*]] ], [ [[IV1_NEXT_US2:%.*]], [[GUARDED_US2:%.*]] ]
+; CHECK-NEXT: br label [[GUARDED_US2]]
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
+; CHECK-NEXT: unreachable
+;
+
+entry:
+ br label %loop1
+
+loop1:
+ %iv1 = phi i32 [ 0, %entry ], [ %iv1.next, %loop1 ]
+ call void (i1, ...) @llvm.experimental.guard(i1 %cond1) [ "deopt"() ]
+ %iv1.next = add i32 %iv1, 1
+ %loop1.cond = icmp slt i32 %iv1.next, %N
+ br i1 %loop1.cond, label %loop1, label %between
+
+between:
+ br label %loop2
+
+loop2:
+ %iv2 = phi i32 [ 0, %between ], [ %iv2.next, %loop2 ]
+ call void (i1, ...) @llvm.experimental.guard(i1 %cond2) [ "deopt"() ]
+ %iv2.next = add i32 %iv2, 1
+ %loop2.cond = icmp slt i32 %iv2.next, %N
+ br i1 %loop2.cond, label %loop2, label %exit
+
+exit:
+ ret void
+}
+
+; Check that we don't do anything because of cleanuppad.
+; CHECK-LABEL: @test_cleanuppad(
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
+; CHECK-NOT: call void (i1, ...) @llvm.experimental.guard(
+define void @test_cleanuppad(i1 %cond, i32 %N) personality i32 (...)* @__CxxFrameHandler3 {
+
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
+ %iv.next = add i32 %iv, 1
+ invoke void @may_throw(i32 %iv) to label %loop unwind label %exit
+
+exit:
+ %cp = cleanuppad within none []
+ cleanupret from %cp unwind to caller
+
+}
+
+declare void @may_throw(i32 %i)
+declare i32 @__CxxFrameHandler3(...)
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/infinite-loop.ll b/llvm/test/Transforms/SimpleLoopUnswitch/infinite-loop.ll
new file mode 100644
index 00000000000..91e1f486b82
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/infinite-loop.ll
@@ -0,0 +1,65 @@
+; REQUIRES: asserts
+; RUN: opt -simple-loop-unswitch -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
+; RUN: opt -simple-loop-unswitch -S < %s | FileCheck %s
+; RUN: opt -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -S < %s | FileCheck %s
+; PR5373
+
+; Loop unswitching shouldn't trivially unswitch the true case of condition %a
+; in the code here because it leads to an infinite loop. While this doesn't
+; contain any instructions with side effects, it's still a kind of side effect.
+; It can trivially unswitch on the false cas of condition %a though.
+
+; STATS: 2 simple-loop-unswitch - Number of branches unswitched
+; STATS: 2 simple-loop-unswitch - Number of unswitches that are trivial
+
+; CHECK-LABEL: @func_16(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %a, label %entry.split, label %abort0
+
+; CHECK: entry.split:
+; CHECK-NEXT: br i1 %b, label %entry.split.split, label %abort1
+
+; CHECK: entry.split.split:
+; CHECK-NEXT: br label %for.body
+
+; CHECK: for.body:
+; CHECK-NEXT: br label %cond.end
+
+; CHECK: cond.end:
+; CHECK-NEXT: br label %for.body
+
+; CHECK: abort0:
+; CHECK-NEXT: call void @end0() [[NOR_NUW:#[0-9]+]]
+; CHECK-NEXT: unreachable
+
+; CHECK: abort1:
+; CHECK-NEXT: call void @end1() [[NOR_NUW]]
+; CHECK-NEXT: unreachable
+
+; CHECK: }
+
+define void @func_16(i1 %a, i1 %b) nounwind {
+entry:
+ br label %for.body
+
+for.body:
+ br i1 %a, label %cond.end, label %abort0
+
+cond.end:
+ br i1 %b, label %for.body, label %abort1
+
+abort0:
+ call void @end0() noreturn nounwind
+ unreachable
+
+abort1:
+ call void @end1() noreturn nounwind
+ unreachable
+}
+
+declare void @end0() noreturn
+declare void @end1() noreturn
+
+; CHECK: attributes #0 = { nounwind }
+; CHECK: attributes #1 = { noreturn }
+; CHECK: attributes [[NOR_NUW]] = { noreturn nounwind }
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/msan.ll b/llvm/test/Transforms/SimpleLoopUnswitch/msan.ll
new file mode 100644
index 00000000000..8a296bcd279
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/msan.ll
@@ -0,0 +1,142 @@
+; RUN: opt -passes='loop(unswitch),verify<loops>' -S < %s | FileCheck %s
+; RUN: opt -enable-mssa-loop-dependency=true -verify-memoryssa -passes='loop(unswitch),verify<loops>' -S < %s | FileCheck %s
+
+declare void @unknown()
+declare void @unknown2()
+
+@y = global i64 0, align 8
+
+; The following is approximately:
+; void f(bool *x) {
+; for (int i = 0; i < 1; ++i) {
+; if (*x) {
+; if (y)
+; unknown();
+; else
+; break;
+; }
+; }
+; }
+; With MemorySanitizer, the loop can not be unswitched on "y", because "y" could
+; be uninitialized when x == false.
+; Test that the branch on "y" is inside the loop (after the first unconditional
+; branch).
+
+define void @may_not_execute_trivial(i1* %x) sanitize_memory {
+; CHECK-LABEL: @may_not_execute_trivial(
+entry:
+ %y = load i64, i64* @y, align 8
+ %y.cmp = icmp eq i64 %y, 0
+ br label %for.body
+; CHECK: %[[Y:.*]] = load i64, i64* @y
+; CHECK: %[[YCMP:.*]] = icmp eq i64 %[[Y]], 0
+; CHECK-NOT: br i1
+; CHECK: br label %for.body
+
+for.body:
+ %i = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %x.load = load i1, i1* %x
+ br i1 %x.load, label %for.inc, label %if.then
+; CHECK: %[[XLOAD:.*]] = load i1, i1* %x
+; CHECK: br i1 %[[XLOAD]]
+
+if.then:
+ br i1 %y.cmp, label %for.end, label %if.then4
+; CHECK: br i1 %[[YCMP]]
+
+if.then4:
+ call void @unknown()
+ br label %for.inc
+
+for.inc:
+ %inc = add nsw i32 %i, 1
+ %cmp = icmp slt i32 %inc, 1
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
+
+
+; The same as above, but "y" is a function parameter instead of a global.
+; This shows that it is not enough to suppress hoisting of load instructions,
+; the actual problem is in the speculative branching.
+
+define void @may_not_execute2_trivial(i1* %x, i1 %y) sanitize_memory {
+; CHECK-LABEL: @may_not_execute2_trivial(
+entry:
+ br label %for.body
+; CHECK-NOT: br i1
+; CHECK: br label %for.body
+
+for.body:
+ %i = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %x.load = load i1, i1* %x
+ br i1 %x.load, label %for.inc, label %if.then
+; CHECK: %[[XLOAD:.*]] = load i1, i1* %x
+; CHECK: br i1 %[[XLOAD]]
+
+if.then:
+ br i1 %y, label %for.end, label %if.then4
+; CHECK: br i1 %y
+
+if.then4:
+ call void @unknown()
+ br label %for.inc
+
+for.inc:
+ %inc = add nsw i32 %i, 1
+ %cmp = icmp slt i32 %inc, 1
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
+
+
+; The following is approximately:
+; void f() {
+; for (int i = 0; i < 1; ++i) {
+; if (y)
+; unknown();
+; else
+; break;
+; }
+; }
+; "if (y)" is guaranteed to execute; the loop can be unswitched.
+
+define void @must_execute_trivial() sanitize_memory {
+; CHECK-LABEL: @must_execute_trivial(
+entry:
+ %y = load i64, i64* @y, align 8
+ %y.cmp = icmp eq i64 %y, 0
+ br label %for.body
+; CHECK: %[[Y:.*]] = load i64, i64* @y
+; CHECK: %[[YCMP:.*]] = icmp eq i64 %[[Y]], 0
+; CHECK: br i1 %[[YCMP]], label %[[EXIT_SPLIT:.*]], label %[[PH:.*]]
+;
+; CHECK: [[PH]]:
+; CHECK: br label %for.body
+
+for.body:
+ %i = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ br i1 %y.cmp, label %for.end, label %if.then4
+; CHECK: br label %if.then4
+
+if.then4:
+ call void @unknown()
+ br label %for.inc
+
+for.inc:
+ %inc = add nsw i32 %i, 1
+ %cmp = icmp slt i32 %inc, 1
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+ ret void
+; CHECK: for.end:
+; CHECK: br label %[[EXIT_SPLIT]]
+;
+; CHECK: [[EXIT_SPLIT]]:
+; CHECK: ret void
+}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-cost.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-cost.ll
new file mode 100644
index 00000000000..333378a0984
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-cost.ll
@@ -0,0 +1,502 @@
+; Specifically exercise the cost modeling for non-trivial loop unswitching.
+;
+; RUN: opt -passes='loop(unswitch),verify<loops>' -enable-nontrivial-unswitch -unswitch-threshold=5 -S < %s | FileCheck %s
+; RUN: opt -simple-loop-unswitch -enable-nontrivial-unswitch -unswitch-threshold=5 -S < %s | FileCheck %s
+; RUN: opt -simple-loop-unswitch -enable-nontrivial-unswitch -unswitch-threshold=5 -enable-mssa-loop-dependency=true -verify-memoryssa -S < %s | FileCheck %s
+
+declare void @a()
+declare void @b()
+declare void @x()
+
+; First establish enough code size in the duplicated 'loop_begin' block to
+; suppress unswitching.
+define void @test_no_unswitch(i1* %ptr, i1 %cond) {
+; CHECK-LABEL: @test_no_unswitch(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label %loop_begin
+;
+; We shouldn't have unswitched into any other block either.
+; CHECK-NOT: br i1 %cond
+
+loop_begin:
+ call void @x()
+ call void @x()
+ call void @x()
+ call void @x()
+ br i1 %cond, label %loop_a, label %loop_b
+; CHECK: loop_begin:
+; CHECK-NEXT: call void @x()
+; CHECK-NEXT: call void @x()
+; CHECK-NEXT: call void @x()
+; CHECK-NEXT: call void @x()
+; CHECK-NEXT: br i1 %cond, label %loop_a, label %loop_b
+
+loop_a:
+ call void @a()
+ br label %loop_latch
+
+loop_b:
+ call void @b()
+ br label %loop_latch
+
+loop_latch:
+ %v = load i1, i1* %ptr
+ br i1 %v, label %loop_begin, label %loop_exit
+
+loop_exit:
+ ret void
+}
+
+; Now check that the smaller formulation of 'loop_begin' does in fact unswitch
+; with our low threshold.
+define void @test_unswitch(i1* %ptr, i1 %cond) {
+; CHECK-LABEL: @test_unswitch(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond, label %entry.split.us, label %entry.split
+
+loop_begin:
+ call void @x()
+ br i1 %cond, label %loop_a, label %loop_b
+
+loop_a:
+ call void @a()
+ br label %loop_latch
+; The 'loop_a' unswitched loop.
+;
+; CHECK: entry.split.us:
+; CHECK-NEXT: br label %loop_begin.us
+;
+; CHECK: loop_begin.us:
+; CHECK-NEXT: call void @x()
+; CHECK-NEXT: br label %loop_a.us
+;
+; CHECK: loop_a.us:
+; CHECK-NEXT: call void @a()
+; CHECK-NEXT: br label %loop_latch.us
+;
+; CHECK: loop_latch.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us
+;
+; CHECK: loop_exit.split.us:
+; CHECK-NEXT: br label %loop_exit
+
+loop_b:
+ call void @b()
+ br label %loop_latch
+; The 'loop_b' unswitched loop.
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br label %loop_begin
+;
+; CHECK: loop_begin:
+; CHECK-NEXT: call void @x()
+; CHECK-NEXT: br label %loop_b
+;
+; CHECK: loop_b:
+; CHECK-NEXT: call void @b()
+; CHECK-NEXT: br label %loop_latch
+;
+; CHECK: loop_latch:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit.split
+;
+; CHECK: loop_exit.split:
+; CHECK-NEXT: br label %loop_exit
+
+loop_latch:
+ %v = load i1, i1* %ptr
+ br i1 %v, label %loop_begin, label %loop_exit
+
+loop_exit:
+ ret void
+; CHECK: loop_exit:
+; CHECK-NEXT: ret void
+}
+
+; Check that even with large amounts of code on either side of the unswitched
+; branch, if that code would be kept in only one of the unswitched clones it
+; doesn't contribute to the cost.
+define void @test_unswitch_non_dup_code(i1* %ptr, i1 %cond) {
+; CHECK-LABEL: @test_unswitch_non_dup_code(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond, label %entry.split.us, label %entry.split
+
+loop_begin:
+ call void @x()
+ br i1 %cond, label %loop_a, label %loop_b
+
+loop_a:
+ call void @a()
+ call void @a()
+ call void @a()
+ call void @a()
+ br label %loop_latch
+; The 'loop_a' unswitched loop.
+;
+; CHECK: entry.split.us:
+; CHECK-NEXT: br label %loop_begin.us
+;
+; CHECK: loop_begin.us:
+; CHECK-NEXT: call void @x()
+; CHECK-NEXT: br label %loop_a.us
+;
+; CHECK: loop_a.us:
+; CHECK-NEXT: call void @a()
+; CHECK-NEXT: call void @a()
+; CHECK-NEXT: call void @a()
+; CHECK-NEXT: call void @a()
+; CHECK-NEXT: br label %loop_latch.us
+;
+; CHECK: loop_latch.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us
+;
+; CHECK: loop_exit.split.us:
+; CHECK-NEXT: br label %loop_exit
+
+loop_b:
+ call void @b()
+ call void @b()
+ call void @b()
+ call void @b()
+ br label %loop_latch
+; The 'loop_b' unswitched loop.
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br label %loop_begin
+;
+; CHECK: loop_begin:
+; CHECK-NEXT: call void @x()
+; CHECK-NEXT: br label %loop_b
+;
+; CHECK: loop_b:
+; CHECK-NEXT: call void @b()
+; CHECK-NEXT: call void @b()
+; CHECK-NEXT: call void @b()
+; CHECK-NEXT: call void @b()
+; CHECK-NEXT: br label %loop_latch
+;
+; CHECK: loop_latch:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit.split
+;
+; CHECK: loop_exit.split:
+; CHECK-NEXT: br label %loop_exit
+
+loop_latch:
+ %v = load i1, i1* %ptr
+ br i1 %v, label %loop_begin, label %loop_exit
+
+loop_exit:
+ ret void
+; CHECK: loop_exit:
+; CHECK-NEXT: ret void
+}
+
+; Much like with non-duplicated code directly in the successor, we also won't
+; duplicate even interesting CFGs.
+define void @test_unswitch_non_dup_code_in_cfg(i1* %ptr, i1 %cond) {
+; CHECK-LABEL: @test_unswitch_non_dup_code_in_cfg(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond, label %entry.split.us, label %entry.split
+
+loop_begin:
+ call void @x()
+ br i1 %cond, label %loop_a, label %loop_b
+
+loop_a:
+ %v1 = load i1, i1* %ptr
+ br i1 %v1, label %loop_a_a, label %loop_a_b
+
+loop_a_a:
+ call void @a()
+ br label %loop_latch
+
+loop_a_b:
+ call void @a()
+ br label %loop_latch
+; The 'loop_a' unswitched loop.
+;
+; CHECK: entry.split.us:
+; CHECK-NEXT: br label %loop_begin.us
+;
+; CHECK: loop_begin.us:
+; CHECK-NEXT: call void @x()
+; CHECK-NEXT: br label %loop_a.us
+;
+; CHECK: loop_a.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_a_a.us, label %loop_a_b.us
+;
+; CHECK: loop_a_b.us:
+; CHECK-NEXT: call void @a()
+; CHECK-NEXT: br label %loop_latch.us
+;
+; CHECK: loop_a_a.us:
+; CHECK-NEXT: call void @a()
+; CHECK-NEXT: br label %loop_latch.us
+;
+; CHECK: loop_latch.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us
+;
+; CHECK: loop_exit.split.us:
+; CHECK-NEXT: br label %loop_exit
+
+loop_b:
+ %v2 = load i1, i1* %ptr
+ br i1 %v2, label %loop_b_a, label %loop_b_b
+
+loop_b_a:
+ call void @b()
+ br label %loop_latch
+
+loop_b_b:
+ call void @b()
+ br label %loop_latch
+; The 'loop_b' unswitched loop.
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br label %loop_begin
+;
+; CHECK: loop_begin:
+; CHECK-NEXT: call void @x()
+; CHECK-NEXT: br label %loop_b
+;
+; CHECK: loop_b:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_b_a, label %loop_b_b
+;
+; CHECK: loop_b_a:
+; CHECK-NEXT: call void @b()
+; CHECK-NEXT: br label %loop_latch
+;
+; CHECK: loop_b_b:
+; CHECK-NEXT: call void @b()
+; CHECK-NEXT: br label %loop_latch
+;
+; CHECK: loop_latch:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit.split
+;
+; CHECK: loop_exit.split:
+; CHECK-NEXT: br label %loop_exit
+
+loop_latch:
+ %v3 = load i1, i1* %ptr
+ br i1 %v3, label %loop_begin, label %loop_exit
+
+loop_exit:
+ ret void
+; CHECK: loop_exit:
+; CHECK-NEXT: ret void
+}
+
+; Check that even if there is *some* non-duplicated code on one side of an
+; unswitch, we don't count any other code in the loop that will in fact have to
+; be duplicated.
+define void @test_no_unswitch_non_dup_code(i1* %ptr, i1 %cond) {
+; CHECK-LABEL: @test_no_unswitch_non_dup_code(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label %loop_begin
+;
+; We shouldn't have unswitched into any other block either.
+; CHECK-NOT: br i1 %cond
+
+loop_begin:
+ call void @x()
+ br i1 %cond, label %loop_a, label %loop_b
+; CHECK: loop_begin:
+; CHECK-NEXT: call void @x()
+; CHECK-NEXT: br i1 %cond, label %loop_a, label %loop_b
+
+loop_a:
+ %v1 = load i1, i1* %ptr
+ br i1 %v1, label %loop_a_a, label %loop_a_b
+
+loop_a_a:
+ call void @a()
+ br label %loop_latch
+
+loop_a_b:
+ call void @a()
+ br label %loop_latch
+
+loop_b:
+ %v2 = load i1, i1* %ptr
+ br i1 %v2, label %loop_b_a, label %loop_b_b
+
+loop_b_a:
+ call void @b()
+ br label %loop_latch
+
+loop_b_b:
+ call void @b()
+ br label %loop_latch
+
+loop_latch:
+ call void @x()
+ call void @x()
+ %v = load i1, i1* %ptr
+ br i1 %v, label %loop_begin, label %loop_exit
+
+loop_exit:
+ ret void
+}
+
+; Check that we still unswitch when the exit block contains lots of code, even
+; though we do clone the exit block as part of unswitching. This should work
+; because we should split the exit block before anything inside it.
+define void @test_unswitch_large_exit(i1* %ptr, i1 %cond) {
+; CHECK-LABEL: @test_unswitch_large_exit(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond, label %entry.split.us, label %entry.split
+
+loop_begin:
+ call void @x()
+ br i1 %cond, label %loop_a, label %loop_b
+
+loop_a:
+ call void @a()
+ br label %loop_latch
+; The 'loop_a' unswitched loop.
+;
+; CHECK: entry.split.us:
+; CHECK-NEXT: br label %loop_begin.us
+;
+; CHECK: loop_begin.us:
+; CHECK-NEXT: call void @x()
+; CHECK-NEXT: br label %loop_a.us
+;
+; CHECK: loop_a.us:
+; CHECK-NEXT: call void @a()
+; CHECK-NEXT: br label %loop_latch.us
+;
+; CHECK: loop_latch.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us
+;
+; CHECK: loop_exit.split.us:
+; CHECK-NEXT: br label %loop_exit
+
+loop_b:
+ call void @b()
+ br label %loop_latch
+; The 'loop_b' unswitched loop.
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br label %loop_begin
+;
+; CHECK: loop_begin:
+; CHECK-NEXT: call void @x()
+; CHECK-NEXT: br label %loop_b
+;
+; CHECK: loop_b:
+; CHECK-NEXT: call void @b()
+; CHECK-NEXT: br label %loop_latch
+;
+; CHECK: loop_latch:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit.split
+;
+; CHECK: loop_exit.split:
+; CHECK-NEXT: br label %loop_exit
+
+loop_latch:
+ %v = load i1, i1* %ptr
+ br i1 %v, label %loop_begin, label %loop_exit
+
+loop_exit:
+ call void @x()
+ call void @x()
+ call void @x()
+ call void @x()
+ ret void
+; CHECK: loop_exit:
+; CHECK-NEXT: call void @x()
+; CHECK-NEXT: call void @x()
+; CHECK-NEXT: call void @x()
+; CHECK-NEXT: call void @x()
+; CHECK-NEXT: ret void
+}
+
+; Check that we handle a dedicated exit edge unswitch which is still
+; non-trivial and has lots of code in the exit.
+define void @test_unswitch_dedicated_exiting(i1* %ptr, i1 %cond) {
+; CHECK-LABEL: @test_unswitch_dedicated_exiting(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond, label %entry.split.us, label %entry.split
+
+loop_begin:
+ call void @x()
+ br i1 %cond, label %loop_a, label %loop_b_exit
+
+loop_a:
+ call void @a()
+ br label %loop_latch
+; The 'loop_a' unswitched loop.
+;
+; CHECK: entry.split.us:
+; CHECK-NEXT: br label %loop_begin.us
+;
+; CHECK: loop_begin.us:
+; CHECK-NEXT: call void @x()
+; CHECK-NEXT: br label %loop_a.us
+;
+; CHECK: loop_a.us:
+; CHECK-NEXT: call void @a()
+; CHECK-NEXT: br label %loop_latch.us
+;
+; CHECK: loop_latch.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us
+;
+; CHECK: loop_exit.split.us:
+; CHECK-NEXT: br label %loop_exit
+
+loop_b_exit:
+ call void @b()
+ call void @b()
+ call void @b()
+ call void @b()
+ ret void
+; The 'loop_b_exit' unswitched exit path.
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br label %loop_begin
+;
+; CHECK: loop_begin:
+; CHECK-NEXT: call void @x()
+; CHECK-NEXT: br label %loop_b_exit
+;
+; CHECK: loop_b_exit:
+; CHECK-NEXT: call void @b()
+; CHECK-NEXT: call void @b()
+; CHECK-NEXT: call void @b()
+; CHECK-NEXT: call void @b()
+; CHECK-NEXT: ret void
+
+loop_latch:
+ %v = load i1, i1* %ptr
+ br i1 %v, label %loop_begin, label %loop_exit
+
+loop_exit:
+ ret void
+; CHECK: loop_exit:
+; CHECK-NEXT: ret void
+}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll
new file mode 100644
index 00000000000..f07c812819f
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll
@@ -0,0 +1,4216 @@
+; RUN: opt -passes='loop(unswitch),verify<loops>' -enable-nontrivial-unswitch -S < %s | FileCheck %s
+; RUN: opt -simple-loop-unswitch -enable-nontrivial-unswitch -S < %s | FileCheck %s
+; RUN: opt -simple-loop-unswitch -enable-nontrivial-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -S < %s | FileCheck %s
+
+declare i32 @a()
+declare i32 @b()
+declare i32 @c()
+declare i32 @d()
+
+declare void @sink1(i32)
+declare void @sink2(i32)
+
+declare i1 @cond()
+declare i32 @cond.i32()
+
+; Negative test: we cannot unswitch convergent calls.
+define void @test_no_unswitch_convergent(i1* %ptr, i1 %cond) {
+; CHECK-LABEL: @test_no_unswitch_convergent(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label %loop_begin
+;
+; We shouldn't have unswitched into any other block either.
+; CHECK-NOT: br i1 %cond
+
+loop_begin:
+ br i1 %cond, label %loop_a, label %loop_b
+; CHECK: loop_begin:
+; CHECK-NEXT: br i1 %cond, label %loop_a, label %loop_b
+
+loop_a:
+ call i32 @a() convergent
+ br label %loop_latch
+
+loop_b:
+ call i32 @b()
+ br label %loop_latch
+
+loop_latch:
+ %v = load i1, i1* %ptr
+ br i1 %v, label %loop_begin, label %loop_exit
+
+loop_exit:
+ ret void
+}
+
+; Negative test: we cannot unswitch noduplicate calls.
+define void @test_no_unswitch_noduplicate(i1* %ptr, i1 %cond) {
+; CHECK-LABEL: @test_no_unswitch_noduplicate(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label %loop_begin
+;
+; We shouldn't have unswitched into any other block either.
+; CHECK-NOT: br i1 %cond
+
+loop_begin:
+ br i1 %cond, label %loop_a, label %loop_b
+; CHECK: loop_begin:
+; CHECK-NEXT: br i1 %cond, label %loop_a, label %loop_b
+
+loop_a:
+ call i32 @a() noduplicate
+ br label %loop_latch
+
+loop_b:
+ call i32 @b()
+ br label %loop_latch
+
+loop_latch:
+ %v = load i1, i1* %ptr
+ br i1 %v, label %loop_begin, label %loop_exit
+
+loop_exit:
+ ret void
+}
+
+declare i32 @__CxxFrameHandler3(...)
+
+; Negative test: we cannot unswitch when tokens are used across blocks as we
+; might introduce PHIs.
+define void @test_no_unswitch_cross_block_token(i1* %ptr, i1 %cond) nounwind personality i32 (...)* @__CxxFrameHandler3 {
+; CHECK-LABEL: @test_no_unswitch_cross_block_token(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label %loop_begin
+;
+; We shouldn't have unswitched into any other block either.
+; CHECK-NOT: br i1 %cond
+
+loop_begin:
+ br i1 %cond, label %loop_a, label %loop_b
+; CHECK: loop_begin:
+; CHECK-NEXT: br i1 %cond, label %loop_a, label %loop_b
+
+loop_a:
+ call i32 @a()
+ br label %loop_cont
+
+loop_b:
+ call i32 @b()
+ br label %loop_cont
+
+loop_cont:
+ invoke i32 @a()
+ to label %loop_latch unwind label %loop_catch
+
+loop_latch:
+ br label %loop_begin
+
+loop_catch:
+ %catch = catchswitch within none [label %loop_catch_latch, label %loop_exit] unwind to caller
+
+loop_catch_latch:
+ %catchpad_latch = catchpad within %catch []
+ catchret from %catchpad_latch to label %loop_begin
+
+loop_exit:
+ %catchpad_exit = catchpad within %catch []
+ catchret from %catchpad_exit to label %exit
+
+exit:
+ ret void
+}
+
+
+; Non-trivial loop unswitching where there are two distinct trivial conditions
+; to unswitch within the loop.
+define i32 @test1(i1* %ptr, i1 %cond1, i1 %cond2) {
+; CHECK-LABEL: @test1(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond1, label %entry.split.us, label %entry.split
+
+loop_begin:
+ br i1 %cond1, label %loop_a, label %loop_b
+
+loop_a:
+ call i32 @a()
+ br label %latch
+; The 'loop_a' unswitched loop.
+;
+; CHECK: entry.split.us:
+; CHECK-NEXT: br label %loop_begin.us
+;
+; CHECK: loop_begin.us:
+; CHECK-NEXT: br label %loop_a.us
+;
+; CHECK: loop_a.us:
+; CHECK-NEXT: call i32 @a()
+; CHECK-NEXT: br label %latch.us
+;
+; CHECK: latch.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us
+;
+; CHECK: loop_exit.split.us:
+; CHECK-NEXT: br label %loop_exit
+
+loop_b:
+ br i1 %cond2, label %loop_b_a, label %loop_b_b
+; The second unswitched condition.
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br i1 %cond2, label %entry.split.split.us, label %entry.split.split
+
+loop_b_a:
+ call i32 @b()
+ br label %latch
+; The 'loop_b_a' unswitched loop.
+;
+; CHECK: entry.split.split.us:
+; CHECK-NEXT: br label %loop_begin.us1
+;
+; CHECK: loop_begin.us1:
+; CHECK-NEXT: br label %loop_b.us
+;
+; CHECK: loop_b.us:
+; CHECK-NEXT: br label %loop_b_a.us
+;
+; CHECK: loop_b_a.us:
+; CHECK-NEXT: call i32 @b()
+; CHECK-NEXT: br label %latch.us2
+;
+; CHECK: latch.us2:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us1, label %loop_exit.split.split.us
+;
+; CHECK: loop_exit.split.split.us:
+; CHECK-NEXT: br label %loop_exit.split
+
+loop_b_b:
+ call i32 @c()
+ br label %latch
+; The 'loop_b_b' unswitched loop.
+;
+; CHECK: entry.split.split:
+; CHECK-NEXT: br label %loop_begin
+;
+; CHECK: loop_begin:
+; CHECK-NEXT: br label %loop_b
+;
+; CHECK: loop_b:
+; CHECK-NEXT: br label %loop_b_b
+;
+; CHECK: loop_b_b:
+; CHECK-NEXT: call i32 @c()
+; CHECK-NEXT: br label %latch
+;
+; CHECK: latch:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit.split.split
+;
+; CHECK: loop_exit.split.split:
+; CHECK-NEXT: br label %loop_exit.split
+
+latch:
+ %v = load i1, i1* %ptr
+ br i1 %v, label %loop_begin, label %loop_exit
+
+loop_exit:
+ ret i32 0
+; CHECK: loop_exit.split:
+; CHECK-NEXT: br label %loop_exit
+;
+; CHECK: loop_exit:
+; CHECK-NEXT: ret
+}
+
+define i32 @test2(i1* %ptr, i1 %cond1, i32* %a.ptr, i32* %b.ptr, i32* %c.ptr) {
+; CHECK-LABEL: @test2(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond1, label %entry.split.us, label %entry.split
+
+loop_begin:
+ %v = load i1, i1* %ptr
+ br i1 %cond1, label %loop_a, label %loop_b
+
+loop_a:
+ %a = load i32, i32* %a.ptr
+ %ac = load i32, i32* %c.ptr
+ br i1 %v, label %loop_begin, label %loop_exit
+; The 'loop_a' unswitched loop.
+;
+; CHECK: entry.split.us:
+; CHECK-NEXT: br label %loop_begin.us
+;
+; CHECK: loop_begin.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br label %loop_a.us
+;
+; CHECK: loop_a.us:
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: %[[AC:.*]] = load i32, i32* %c.ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_begin.backedge.us, label %loop_exit.split.us
+;
+; CHECK: loop_exit.split.us:
+; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_a.us ]
+; CHECK-NEXT: %[[AC_LCSSA:.*]] = phi i32 [ %[[AC]], %loop_a.us ]
+; CHECK-NEXT: br label %loop_exit
+
+loop_b:
+ %b = load i32, i32* %b.ptr
+ %bc = load i32, i32* %c.ptr
+ br i1 %v, label %loop_begin, label %loop_exit
+; The 'loop_b' unswitched loop.
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br label %loop_begin
+;
+; CHECK: loop_begin:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br label %loop_b
+;
+; CHECK: loop_b:
+; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT: %[[BC:.*]] = load i32, i32* %c.ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_begin.backedge, label %loop_exit.split
+;
+; CHECK: loop_exit.split:
+; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %loop_b ]
+; CHECK-NEXT: %[[BC_LCSSA:.*]] = phi i32 [ %[[BC]], %loop_b ]
+; CHECK-NEXT: br label %loop_exit
+
+loop_exit:
+ %ab.phi = phi i32 [ %a, %loop_a ], [ %b, %loop_b ]
+ %c.phi = phi i32 [ %ac, %loop_a ], [ %bc, %loop_b ]
+ %result = add i32 %ab.phi, %c.phi
+ ret i32 %result
+; CHECK: loop_exit:
+; CHECK-NEXT: %[[AB_PHI:.*]] = phi i32 [ %[[B_LCSSA]], %loop_exit.split ], [ %[[A_LCSSA]], %loop_exit.split.us ]
+; CHECK-NEXT: %[[C_PHI:.*]] = phi i32 [ %[[BC_LCSSA]], %loop_exit.split ], [ %[[AC_LCSSA]], %loop_exit.split.us ]
+; CHECK-NEXT: %[[RESULT:.*]] = add i32 %[[AB_PHI]], %[[C_PHI]]
+; CHECK-NEXT: ret i32 %[[RESULT]]
+}
+
+; Test a non-trivial unswitch of an exiting edge to an exit block with other
+; in-loop predecessors.
+define i32 @test3a(i1* %ptr, i1 %cond1, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test3a(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond1, label %entry.split.us, label %entry.split
+
+loop_begin:
+ %v = load i1, i1* %ptr
+ %a = load i32, i32* %a.ptr
+ br i1 %cond1, label %loop_exit, label %loop_b
+; The 'loop_exit' clone.
+;
+; CHECK: entry.split.us:
+; CHECK-NEXT: br label %loop_begin.us
+;
+; CHECK: loop_begin.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: br label %loop_exit.split.us
+;
+; CHECK: loop_exit.split.us:
+; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_begin.us ]
+; CHECK-NEXT: br label %loop_exit
+
+loop_b:
+ %b = load i32, i32* %b.ptr
+ br i1 %v, label %loop_begin, label %loop_exit
+; The 'loop_b' unswitched loop.
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br label %loop_begin
+;
+; CHECK: loop_begin:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: br label %loop_b
+;
+; CHECK: loop_b:
+; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit.split
+;
+; CHECK: loop_exit.split:
+; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %loop_b ]
+; CHECK-NEXT: br label %loop_exit
+
+loop_exit:
+ %ab.phi = phi i32 [ %a, %loop_begin ], [ %b, %loop_b ]
+ ret i32 %ab.phi
+; CHECK: loop_exit:
+; CHECK-NEXT: %[[AB_PHI:.*]] = phi i32 [ %[[B_LCSSA]], %loop_exit.split ], [ %[[A_LCSSA]], %loop_exit.split.us ]
+; CHECK-NEXT: ret i32 %[[AB_PHI]]
+}
+
+; Test a non-trivial unswitch of an exiting edge to an exit block with other
+; in-loop predecessors. This is the same as @test3a but with the reversed order
+; of successors so that the exiting edge is *not* the cloned edge.
+define i32 @test3b(i1* %ptr, i1 %cond1, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test3b(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond1, label %entry.split.us, label %entry.split
+
+loop_begin:
+ %v = load i1, i1* %ptr
+ %a = load i32, i32* %a.ptr
+ br i1 %cond1, label %loop_b, label %loop_exit
+; The 'loop_b' unswitched loop.
+;
+; CHECK: entry.split.us:
+; CHECK-NEXT: br label %loop_begin.us
+;
+; CHECK: loop_begin.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: br label %loop_b.us
+;
+; CHECK: loop_b.us:
+; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us
+;
+; CHECK: loop_exit.split.us:
+; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %loop_b.us ]
+; CHECK-NEXT: br label %loop_exit
+
+loop_b:
+ %b = load i32, i32* %b.ptr
+ br i1 %v, label %loop_begin, label %loop_exit
+; The original loop, now non-looping due to unswitching..
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br label %loop_begin
+;
+; CHECK: loop_begin:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: br label %loop_exit.split
+;
+; CHECK: loop_exit.split:
+; CHECK-NEXT: br label %loop_exit
+
+loop_exit:
+ %ab.phi = phi i32 [ %b, %loop_b ], [ %a, %loop_begin ]
+ ret i32 %ab.phi
+; CHECK: loop_exit:
+; CHECK-NEXT: %[[AB_PHI:.*]] = phi i32 [ %[[A]], %loop_exit.split ], [ %[[B_LCSSA]], %loop_exit.split.us ]
+; CHECK-NEXT: ret i32 %[[AB_PHI]]
+}
+
+; Test a non-trivial unswitch of an exiting edge to an exit block with no other
+; in-loop predecessors.
+define void @test4a(i1* %ptr, i1 %cond1, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test4a(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond1, label %entry.split.us, label %entry.split
+
+loop_begin:
+ %v = load i1, i1* %ptr
+ %a = load i32, i32* %a.ptr
+ br i1 %cond1, label %loop_exit1, label %loop_b
+; The 'loop_exit' clone.
+;
+; CHECK: entry.split.us:
+; CHECK-NEXT: br label %loop_begin.us
+;
+; CHECK: loop_begin.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: br label %loop_exit1.split.us
+;
+; CHECK: loop_exit1.split.us:
+; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_begin.us ]
+; CHECK-NEXT: br label %loop_exit1
+
+loop_b:
+ %b = load i32, i32* %b.ptr
+ br i1 %v, label %loop_begin, label %loop_exit2
+; The 'loop_b' unswitched loop.
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br label %loop_begin
+;
+; CHECK: loop_begin:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: br label %loop_b
+;
+; CHECK: loop_b:
+; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit2
+
+loop_exit1:
+ %a.phi = phi i32 [ %a, %loop_begin ]
+ call void @sink1(i32 %a.phi)
+ ret void
+; CHECK: loop_exit1:
+; CHECK-NEXT: call void @sink1(i32 %[[A_LCSSA]])
+; CHECK-NEXT: ret void
+
+loop_exit2:
+ %b.phi = phi i32 [ %b, %loop_b ]
+ call void @sink2(i32 %b.phi)
+ ret void
+; CHECK: loop_exit2:
+; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %loop_b ]
+; CHECK-NEXT: call void @sink2(i32 %[[B_LCSSA]])
+; CHECK-NEXT: ret void
+}
+
+; Test a non-trivial unswitch of an exiting edge to an exit block with no other
+; in-loop predecessors. This is the same as @test4a but with the edges reversed
+; so that the exiting edge is *not* the cloned edge.
+define void @test4b(i1* %ptr, i1 %cond1, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test4b(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond1, label %entry.split.us, label %entry.split
+
+loop_begin:
+ %v = load i1, i1* %ptr
+ %a = load i32, i32* %a.ptr
+ br i1 %cond1, label %loop_b, label %loop_exit1
+; The 'loop_b' clone.
+;
+; CHECK: entry.split.us:
+; CHECK-NEXT: br label %loop_begin.us
+;
+; CHECK: loop_begin.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: br label %loop_b.us
+;
+; CHECK: loop_b.us:
+; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit2.split.us
+;
+; CHECK: loop_exit2.split.us:
+; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %loop_b.us ]
+; CHECK-NEXT: br label %loop_exit2
+
+loop_b:
+ %b = load i32, i32* %b.ptr
+ br i1 %v, label %loop_begin, label %loop_exit2
+; The 'loop_exit' unswitched path.
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br label %loop_begin
+;
+; CHECK: loop_begin:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: br label %loop_exit1
+
+loop_exit1:
+ %a.phi = phi i32 [ %a, %loop_begin ]
+ call void @sink1(i32 %a.phi)
+ ret void
+; CHECK: loop_exit1:
+; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A]], %loop_begin ]
+; CHECK-NEXT: call void @sink1(i32 %[[A_PHI]])
+; CHECK-NEXT: ret void
+
+loop_exit2:
+ %b.phi = phi i32 [ %b, %loop_b ]
+ call void @sink2(i32 %b.phi)
+ ret void
+; CHECK: loop_exit2:
+; CHECK-NEXT: call void @sink2(i32 %[[B_LCSSA]])
+; CHECK-NEXT: ret void
+}
+
+; Test a non-trivial unswitch of an exiting edge to an exit block with no other
+; in-loop predecessors. This is the same as @test4a but with a common merge
+; block after the independent loop exits. This requires a different structural
+; update to the dominator tree.
+define void @test4c(i1* %ptr, i1 %cond1, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test4c(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond1, label %entry.split.us, label %entry.split
+
+loop_begin:
+ %v = load i1, i1* %ptr
+ %a = load i32, i32* %a.ptr
+ br i1 %cond1, label %loop_exit1, label %loop_b
+; The 'loop_exit' clone.
+;
+; CHECK: entry.split.us:
+; CHECK-NEXT: br label %loop_begin.us
+;
+; CHECK: loop_begin.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: br label %loop_exit1.split.us
+;
+; CHECK: loop_exit1.split.us:
+; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_begin.us ]
+; CHECK-NEXT: br label %loop_exit1
+
+loop_b:
+ %b = load i32, i32* %b.ptr
+ br i1 %v, label %loop_begin, label %loop_exit2
+; The 'loop_b' unswitched loop.
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br label %loop_begin
+;
+; CHECK: loop_begin:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: br label %loop_b
+;
+; CHECK: loop_b:
+; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit2
+
+loop_exit1:
+ %a.phi = phi i32 [ %a, %loop_begin ]
+ call void @sink1(i32 %a.phi)
+ br label %exit
+; CHECK: loop_exit1:
+; CHECK-NEXT: call void @sink1(i32 %[[A_LCSSA]])
+; CHECK-NEXT: br label %exit
+
+loop_exit2:
+ %b.phi = phi i32 [ %b, %loop_b ]
+ call void @sink2(i32 %b.phi)
+ br label %exit
+; CHECK: loop_exit2:
+; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %loop_b ]
+; CHECK-NEXT: call void @sink2(i32 %[[B_LCSSA]])
+; CHECK-NEXT: br label %exit
+
+exit:
+ ret void
+; CHECK: exit:
+; CHECK-NEXT: ret void
+}
+
+; Test that we can unswitch a condition out of multiple layers of a loop nest.
+define i32 @test5(i1* %ptr, i1 %cond1, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test5(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond1, label %loop_begin.split.us, label %entry.split
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br label %loop_begin
+;
+; CHECK: loop_begin:
+; CHECK-NEXT: br label %loop_begin.split
+
+loop_begin:
+ br label %inner_loop_begin
+
+inner_loop_begin:
+ %v = load i1, i1* %ptr
+ %a = load i32, i32* %a.ptr
+ br i1 %cond1, label %loop_exit, label %inner_loop_b
+; The 'loop_exit' clone.
+;
+; CHECK: loop_begin.split.us:
+; CHECK-NEXT: br label %inner_loop_begin.us
+;
+; CHECK: inner_loop_begin.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: br label %loop_exit.loopexit.split.us
+;
+; CHECK: loop_exit.loopexit.split.us:
+; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %inner_loop_begin.us ]
+; CHECK-NEXT: br label %loop_exit
+
+inner_loop_b:
+ %b = load i32, i32* %b.ptr
+ br i1 %v, label %inner_loop_begin, label %loop_latch
+; The 'inner_loop_b' unswitched loop.
+;
+; CHECK: loop_begin.split:
+; CHECK-NEXT: br label %inner_loop_begin
+;
+; CHECK: inner_loop_begin:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: br label %inner_loop_b
+;
+; CHECK: inner_loop_b:
+; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT: br i1 %[[V]], label %inner_loop_begin, label %loop_latch
+
+loop_latch:
+ %b.phi = phi i32 [ %b, %inner_loop_b ]
+ %v2 = load i1, i1* %ptr
+ br i1 %v2, label %loop_begin, label %loop_exit
+; CHECK: loop_latch:
+; CHECK-NEXT: %[[B_INNER_LCSSA:.*]] = phi i32 [ %[[B]], %inner_loop_b ]
+; CHECK-NEXT: %[[V2:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V2]], label %loop_begin, label %loop_exit.loopexit1
+
+loop_exit:
+ %ab.phi = phi i32 [ %a, %inner_loop_begin ], [ %b.phi, %loop_latch ]
+ ret i32 %ab.phi
+; CHECK: loop_exit.loopexit:
+; CHECK-NEXT: br label %loop_exit
+;
+; CHECK: loop_exit.loopexit1:
+; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B_INNER_LCSSA]], %loop_latch ]
+; CHECK-NEXT: br label %loop_exit
+;
+; CHECK: loop_exit:
+; CHECK-NEXT: %[[AB_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.loopexit ], [ %[[B_LCSSA]], %loop_exit.loopexit1 ]
+; CHECK-NEXT: ret i32 %[[AB_PHI]]
+}
+
+; Test that we can unswitch a condition where we end up only cloning some of
+; the nested loops and needing to delete some of the nested loops.
+define i32 @test6(i1* %ptr, i1 %cond1, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test6(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond1, label %entry.split.us, label %entry.split
+
+loop_begin:
+ %v = load i1, i1* %ptr
+ br i1 %cond1, label %loop_a, label %loop_b
+
+loop_a:
+ br label %loop_a_inner
+
+loop_a_inner:
+ %va = load i1, i1* %ptr
+ %a = load i32, i32* %a.ptr
+ br i1 %va, label %loop_a_inner, label %loop_a_inner_exit
+
+loop_a_inner_exit:
+ %a.lcssa = phi i32 [ %a, %loop_a_inner ]
+ br label %latch
+; The 'loop_a' cloned loop.
+;
+; CHECK: entry.split.us:
+; CHECK-NEXT: br label %loop_begin.us
+;
+; CHECK: loop_begin.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br label %loop_a.us
+;
+; CHECK: loop_a.us:
+; CHECK-NEXT: br label %loop_a_inner.us
+;
+; CHECK: loop_a_inner.us
+; CHECK-NEXT: %[[VA:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: br i1 %[[VA]], label %loop_a_inner.us, label %loop_a_inner_exit.us
+;
+; CHECK: loop_a_inner_exit.us:
+; CHECK-NEXT: %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A]], %loop_a_inner.us ]
+; CHECK-NEXT: br label %latch.us
+;
+; CHECK: latch.us:
+; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA]], %loop_a_inner_exit.us ]
+; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us
+;
+; CHECK: loop_exit.split.us:
+; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_PHI]], %latch.us ]
+; CHECK-NEXT: br label %loop_exit
+
+loop_b:
+ br label %loop_b_inner
+
+loop_b_inner:
+ %vb = load i1, i1* %ptr
+ %b = load i32, i32* %b.ptr
+ br i1 %vb, label %loop_b_inner, label %loop_b_inner_exit
+
+loop_b_inner_exit:
+ %b.lcssa = phi i32 [ %b, %loop_b_inner ]
+ br label %latch
+
+latch:
+ %ab.phi = phi i32 [ %a.lcssa, %loop_a_inner_exit ], [ %b.lcssa, %loop_b_inner_exit ]
+ br i1 %v, label %loop_begin, label %loop_exit
+; The 'loop_b' unswitched loop.
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br label %loop_begin
+;
+; CHECK: loop_begin:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br label %loop_b
+;
+; CHECK: loop_b:
+; CHECK-NEXT: br label %loop_b_inner
+;
+; CHECK: loop_b_inner
+; CHECK-NEXT: %[[VB:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT: br i1 %[[VB]], label %loop_b_inner, label %loop_b_inner_exit
+;
+; CHECK: loop_b_inner_exit:
+; CHECK-NEXT: %[[B_INNER_LCSSA:.*]] = phi i32 [ %[[B]], %loop_b_inner ]
+; CHECK-NEXT: br label %latch
+;
+; CHECK: latch:
+; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit.split
+;
+; CHECK: loop_exit.split:
+; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B_INNER_LCSSA]], %latch ]
+; CHECK-NEXT: br label %loop_exit
+
+loop_exit:
+ %ab.lcssa = phi i32 [ %ab.phi, %latch ]
+ ret i32 %ab.lcssa
+; CHECK: loop_exit:
+; CHECK-NEXT: %[[AB_PHI:.*]] = phi i32 [ %[[B_LCSSA]], %loop_exit.split ], [ %[[A_LCSSA]], %loop_exit.split.us ]
+; CHECK-NEXT: ret i32 %[[AB_PHI]]
+}
+
+; Test that when unswitching a deeply nested loop condition in a way that
+; produces a non-loop clone that can reach multiple exit blocks which are part
+; of different outer loops we correctly divide the cloned loop blocks between
+; the outer loops based on reachability.
+define i32 @test7a(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test7a(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label %loop_begin
+
+loop_begin:
+ %a = load i32, i32* %a.ptr
+ br label %inner_loop_begin
+; CHECK: loop_begin:
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: br label %inner_loop_begin
+
+inner_loop_begin:
+ %a.phi = phi i32 [ %a, %loop_begin ], [ %a2, %inner_inner_loop_exit ]
+ %cond = load i1, i1* %cond.ptr
+ %b = load i32, i32* %b.ptr
+ br label %inner_inner_loop_begin
+; CHECK: inner_loop_begin:
+; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A]], %loop_begin ], [ %[[A2:.*]], %inner_inner_loop_exit ]
+; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr
+; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT: br i1 %[[COND]], label %inner_loop_begin.split.us, label %inner_loop_begin.split
+
+inner_inner_loop_begin:
+ %v1 = load i1, i1* %ptr
+ br i1 %v1, label %inner_inner_loop_a, label %inner_inner_loop_b
+
+inner_inner_loop_a:
+ %v2 = load i1, i1* %ptr
+ br i1 %v2, label %loop_exit, label %inner_inner_loop_c
+
+inner_inner_loop_b:
+ %v3 = load i1, i1* %ptr
+ br i1 %v3, label %inner_inner_loop_exit, label %inner_inner_loop_c
+
+inner_inner_loop_c:
+ %v4 = load i1, i1* %ptr
+ br i1 %v4, label %inner_loop_exit, label %inner_inner_loop_d
+
+inner_inner_loop_d:
+ br i1 %cond, label %inner_loop_exit, label %inner_inner_loop_begin
+; The cloned copy that always exits with the adjustments required to fix up
+; loop exits.
+;
+; CHECK: inner_loop_begin.split.us:
+; CHECK-NEXT: br label %inner_inner_loop_begin.us
+;
+; CHECK: inner_inner_loop_begin.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a.us, label %inner_inner_loop_b.us
+;
+; CHECK: inner_inner_loop_b.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_exit.split.us, label %inner_inner_loop_c.us.loopexit
+;
+; CHECK: inner_inner_loop_a.us:
+; CHECK-NEXT: %[[A_NEW_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_begin.us ]
+; CHECK-NEXT: %[[B_NEW_LCSSA:.*]] = phi i32 [ %[[B]], %inner_inner_loop_begin.us ]
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split.us, label %inner_inner_loop_c.us
+;
+; CHECK: inner_inner_loop_c.us.loopexit:
+; CHECK-NEXT: br label %inner_inner_loop_c.us
+;
+; CHECK: inner_inner_loop_c.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit.split.us, label %inner_inner_loop_d.us
+;
+; CHECK: inner_inner_loop_d.us:
+; CHECK-NEXT: br label %inner_loop_exit.loopexit.split
+;
+; CHECK: inner_inner_loop_exit.split.us:
+; CHECK-NEXT: br label %inner_inner_loop_exit
+;
+; CHECK: loop_exit.split.us:
+; CHECK-NEXT: %[[A_LCSSA_US:.*]] = phi i32 [ %[[A_NEW_LCSSA]], %inner_inner_loop_a.us ]
+; CHECK-NEXT: %[[B_LCSSA_US:.*]] = phi i32 [ %[[B_NEW_LCSSA]], %inner_inner_loop_a.us ]
+; CHECK-NEXT: br label %loop_exit
+;
+; CHECK: inner_loop_exit.loopexit.split.us:
+; CHECK-NEXT: br label %inner_loop_exit.loopexit
+;
+; The original copy that continues to loop.
+;
+; CHECK: inner_loop_begin.split:
+; CHECK-NEXT: br label %inner_inner_loop_begin
+;
+; CHECK: inner_inner_loop_begin:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a, label %inner_inner_loop_b
+;
+; CHECK: inner_inner_loop_a:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split, label %inner_inner_loop_c
+;
+; CHECK: inner_inner_loop_b:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_exit.split, label %inner_inner_loop_c
+;
+; CHECK: inner_inner_loop_c:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit.split, label %inner_inner_loop_d
+;
+; CHECK: inner_inner_loop_d:
+; CHECK-NEXT: br label %inner_inner_loop_begin
+;
+; CHECK: inner_inner_loop_exit.split:
+; CHECK-NEXT: br label %inner_inner_loop_exit
+
+inner_inner_loop_exit:
+ %a2 = load i32, i32* %a.ptr
+ %v5 = load i1, i1* %ptr
+ br i1 %v5, label %inner_loop_exit, label %inner_loop_begin
+; CHECK: inner_inner_loop_exit:
+; CHECK-NEXT: %[[A2]] = load i32, i32* %a.ptr
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit1, label %inner_loop_begin
+
+inner_loop_exit:
+ br label %loop_begin
+; CHECK: inner_loop_exit.loopexit.split:
+; CHECK-NEXT: br label %inner_loop_exit.loopexit
+;
+; CHECK: inner_loop_exit.loopexit:
+; CHECK-NEXT: br label %inner_loop_exit
+;
+; CHECK: inner_loop_exit.loopexit1:
+; CHECK-NEXT: br label %inner_loop_exit
+;
+; CHECK: inner_loop_exit:
+; CHECK-NEXT: br label %loop_begin
+
+loop_exit:
+ %a.lcssa = phi i32 [ %a.phi, %inner_inner_loop_a ]
+ %b.lcssa = phi i32 [ %b, %inner_inner_loop_a ]
+ %result = add i32 %a.lcssa, %b.lcssa
+ ret i32 %result
+; CHECK: loop_exit.split:
+; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_a ]
+; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %inner_inner_loop_a ]
+; CHECK-NEXT: br label %loop_exit
+;
+; CHECK: loop_exit:
+; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.split ], [ %[[A_LCSSA_US]], %loop_exit.split.us ]
+; CHECK-NEXT: %[[B_PHI:.*]] = phi i32 [ %[[B_LCSSA]], %loop_exit.split ], [ %[[B_LCSSA_US]], %loop_exit.split.us ]
+; CHECK-NEXT: %[[RESULT:.*]] = add i32 %[[A_PHI]], %[[B_PHI]]
+; CHECK-NEXT: ret i32 %[[RESULT]]
+}
+
+; Same pattern as @test7a but here the original loop becomes a non-loop that
+; can reach multiple exit blocks which are part of different outer loops.
+define i32 @test7b(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test7b(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label %loop_begin
+
+loop_begin:
+ %a = load i32, i32* %a.ptr
+ br label %inner_loop_begin
+; CHECK: loop_begin:
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: br label %inner_loop_begin
+
+inner_loop_begin:
+ %a.phi = phi i32 [ %a, %loop_begin ], [ %a2, %inner_inner_loop_exit ]
+ %cond = load i1, i1* %cond.ptr
+ %b = load i32, i32* %b.ptr
+ br label %inner_inner_loop_begin
+; CHECK: inner_loop_begin:
+; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A]], %loop_begin ], [ %[[A2:.*]], %inner_inner_loop_exit ]
+; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr
+; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT: br i1 %[[COND]], label %inner_loop_begin.split.us, label %inner_loop_begin.split
+
+inner_inner_loop_begin:
+ %v1 = load i1, i1* %ptr
+ br i1 %v1, label %inner_inner_loop_a, label %inner_inner_loop_b
+
+inner_inner_loop_a:
+ %v2 = load i1, i1* %ptr
+ br i1 %v2, label %loop_exit, label %inner_inner_loop_c
+
+inner_inner_loop_b:
+ %v3 = load i1, i1* %ptr
+ br i1 %v3, label %inner_inner_loop_exit, label %inner_inner_loop_c
+
+inner_inner_loop_c:
+ %v4 = load i1, i1* %ptr
+ br i1 %v4, label %inner_loop_exit, label %inner_inner_loop_d
+
+inner_inner_loop_d:
+ br i1 %cond, label %inner_inner_loop_begin, label %inner_loop_exit
+; The cloned copy that continues looping.
+;
+; CHECK: inner_loop_begin.split.us:
+; CHECK-NEXT: br label %inner_inner_loop_begin.us
+;
+; CHECK: inner_inner_loop_begin.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a.us, label %inner_inner_loop_b.us
+;
+; CHECK: inner_inner_loop_b.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_exit.split.us, label %inner_inner_loop_c.us
+;
+; CHECK: inner_inner_loop_a.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split.us, label %inner_inner_loop_c.us
+;
+; CHECK: inner_inner_loop_c.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit.split.us, label %inner_inner_loop_d.us
+;
+; CHECK: inner_inner_loop_d.us:
+; CHECK-NEXT: br label %inner_inner_loop_begin.us
+;
+; CHECK: inner_inner_loop_exit.split.us:
+; CHECK-NEXT: br label %inner_inner_loop_exit
+;
+; CHECK: loop_exit.split.us:
+; CHECK-NEXT: %[[A_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_a.us ]
+; CHECK-NEXT: %[[B_LCSSA_US:.*]] = phi i32 [ %[[B]], %inner_inner_loop_a.us ]
+; CHECK-NEXT: br label %loop_exit
+;
+; CHECK: inner_loop_exit.loopexit.split.us:
+; CHECK-NEXT: br label %inner_loop_exit.loopexit
+;
+; The original copy that now always exits and needs adjustments for exit
+; blocks.
+;
+; CHECK: inner_loop_begin.split:
+; CHECK-NEXT: br label %inner_inner_loop_begin
+;
+; CHECK: inner_inner_loop_begin:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a, label %inner_inner_loop_b
+;
+; CHECK: inner_inner_loop_a:
+; CHECK-NEXT: %[[A_NEW_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_begin ]
+; CHECK-NEXT: %[[B_NEW_LCSSA:.*]] = phi i32 [ %[[B]], %inner_inner_loop_begin ]
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split, label %inner_inner_loop_c
+;
+; CHECK: inner_inner_loop_b:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_exit.split, label %inner_inner_loop_c.loopexit
+;
+; CHECK: inner_inner_loop_c.loopexit:
+; CHECK-NEXT: br label %inner_inner_loop_c
+;
+; CHECK: inner_inner_loop_c:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit.split, label %inner_inner_loop_d
+;
+; CHECK: inner_inner_loop_d:
+; CHECK-NEXT: br label %inner_loop_exit.loopexit.split
+;
+; CHECK: inner_inner_loop_exit.split:
+; CHECK-NEXT: br label %inner_inner_loop_exit
+
+inner_inner_loop_exit:
+ %a2 = load i32, i32* %a.ptr
+ %v5 = load i1, i1* %ptr
+ br i1 %v5, label %inner_loop_exit, label %inner_loop_begin
+; CHECK: inner_inner_loop_exit:
+; CHECK-NEXT: %[[A2]] = load i32, i32* %a.ptr
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit1, label %inner_loop_begin
+
+inner_loop_exit:
+ br label %loop_begin
+; CHECK: inner_loop_exit.loopexit.split:
+; CHECK-NEXT: br label %inner_loop_exit.loopexit
+;
+; CHECK: inner_loop_exit.loopexit:
+; CHECK-NEXT: br label %inner_loop_exit
+;
+; CHECK: inner_loop_exit.loopexit1:
+; CHECK-NEXT: br label %inner_loop_exit
+;
+; CHECK: inner_loop_exit:
+; CHECK-NEXT: br label %loop_begin
+
+loop_exit:
+ %a.lcssa = phi i32 [ %a.phi, %inner_inner_loop_a ]
+ %b.lcssa = phi i32 [ %b, %inner_inner_loop_a ]
+ %result = add i32 %a.lcssa, %b.lcssa
+ ret i32 %result
+; CHECK: loop_exit.split:
+; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_NEW_LCSSA]], %inner_inner_loop_a ]
+; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B_NEW_LCSSA]], %inner_inner_loop_a ]
+; CHECK-NEXT: br label %loop_exit
+;
+; CHECK: loop_exit:
+; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.split ], [ %[[A_LCSSA_US]], %loop_exit.split.us ]
+; CHECK-NEXT: %[[B_PHI:.*]] = phi i32 [ %[[B_LCSSA]], %loop_exit.split ], [ %[[B_LCSSA_US]], %loop_exit.split.us ]
+; CHECK-NEXT: %[[RESULT:.*]] = add i32 %[[A_PHI]], %[[B_PHI]]
+; CHECK-NEXT: ret i32 %[[RESULT]]
+}
+
+; Test that when the exit block set of an inner loop changes to start at a less
+; high level of the loop nest we correctly hoist the loop up the nest.
+define i32 @test8a(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test8a(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label %loop_begin
+
+loop_begin:
+ %a = load i32, i32* %a.ptr
+ br label %inner_loop_begin
+; CHECK: loop_begin:
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: br label %inner_loop_begin
+
+inner_loop_begin:
+ %a.phi = phi i32 [ %a, %loop_begin ], [ %a2, %inner_inner_loop_exit ]
+ %cond = load i1, i1* %cond.ptr
+ %b = load i32, i32* %b.ptr
+ br label %inner_inner_loop_begin
+; CHECK: inner_loop_begin:
+; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A]], %loop_begin ], [ %[[A2:.*]], %inner_inner_loop_exit ]
+; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr
+; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT: br i1 %[[COND]], label %inner_loop_begin.split.us, label %inner_loop_begin.split
+
+inner_inner_loop_begin:
+ %v1 = load i1, i1* %ptr
+ br i1 %v1, label %inner_inner_loop_a, label %inner_inner_loop_b
+
+inner_inner_loop_a:
+ %v2 = load i1, i1* %ptr
+ br i1 %v2, label %inner_inner_loop_latch, label %inner_loop_exit
+
+inner_inner_loop_b:
+ br i1 %cond, label %inner_inner_loop_latch, label %inner_inner_loop_exit
+
+inner_inner_loop_latch:
+ br label %inner_inner_loop_begin
+; The cloned region is now an exit from the inner loop.
+;
+; CHECK: inner_loop_begin.split.us:
+; CHECK-NEXT: %[[A_INNER_INNER_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_begin ]
+; CHECK-NEXT: br label %inner_inner_loop_begin.us
+;
+; CHECK: inner_inner_loop_begin.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a.us, label %inner_inner_loop_b.us
+;
+; CHECK: inner_inner_loop_b.us:
+; CHECK-NEXT: br label %inner_inner_loop_latch.us
+;
+; CHECK: inner_inner_loop_a.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_latch.us, label %inner_loop_exit.loopexit.split.us
+;
+; CHECK: inner_inner_loop_latch.us:
+; CHECK-NEXT: br label %inner_inner_loop_begin.us
+;
+; CHECK: inner_loop_exit.loopexit.split.us:
+; CHECK-NEXT: %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_INNER_LCSSA]], %inner_inner_loop_a.us ]
+; CHECK-NEXT: br label %inner_loop_exit.loopexit
+;
+; The original region exits the loop earlier.
+;
+; CHECK: inner_loop_begin.split:
+; CHECK-NEXT: br label %inner_inner_loop_begin
+;
+; CHECK: inner_inner_loop_begin:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a, label %inner_inner_loop_b
+;
+; CHECK: inner_inner_loop_a:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_latch, label %inner_loop_exit.loopexit.split
+;
+; CHECK: inner_inner_loop_b:
+; CHECK-NEXT: br label %inner_inner_loop_exit
+;
+; CHECK: inner_inner_loop_latch:
+; CHECK-NEXT: br label %inner_inner_loop_begin
+
+inner_inner_loop_exit:
+ %a2 = load i32, i32* %a.ptr
+ %v4 = load i1, i1* %ptr
+ br i1 %v4, label %inner_loop_exit, label %inner_loop_begin
+; CHECK: inner_inner_loop_exit:
+; CHECK-NEXT: %[[A2]] = load i32, i32* %a.ptr
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit1, label %inner_loop_begin
+
+inner_loop_exit:
+ %v5 = load i1, i1* %ptr
+ br i1 %v5, label %loop_exit, label %loop_begin
+; CHECK: inner_loop_exit.loopexit.split:
+; CHECK-NEXT: %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_a ]
+; CHECK-NEXT: br label %inner_loop_exit.loopexit
+;
+; CHECK: inner_loop_exit.loopexit:
+; CHECK-NEXT: %[[A_INNER_US_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA]], %inner_loop_exit.loopexit.split ], [ %[[A_INNER_LCSSA_US]], %inner_loop_exit.loopexit.split.us ]
+; CHECK-NEXT: br label %inner_loop_exit
+;
+; CHECK: inner_loop_exit.loopexit1:
+; CHECK-NEXT: %[[A_INNER_LCSSA2:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_exit ]
+; CHECK-NEXT: br label %inner_loop_exit
+;
+; CHECK: inner_loop_exit:
+; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA2]], %inner_loop_exit.loopexit1 ], [ %[[A_INNER_US_PHI]], %inner_loop_exit.loopexit ]
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_exit, label %loop_begin
+
+loop_exit:
+ %a.lcssa = phi i32 [ %a.phi, %inner_loop_exit ]
+ ret i32 %a.lcssa
+; CHECK: loop_exit:
+; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_exit ]
+; CHECK-NEXT: ret i32 %[[A_LCSSA]]
+}
+
+; Same pattern as @test8a but where the original loop looses an exit block and
+; needs to be hoisted up the nest.
+define i32 @test8b(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test8b(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label %loop_begin
+
+loop_begin:
+ %a = load i32, i32* %a.ptr
+ br label %inner_loop_begin
+; CHECK: loop_begin:
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: br label %inner_loop_begin
+
+inner_loop_begin:
+ %a.phi = phi i32 [ %a, %loop_begin ], [ %a2, %inner_inner_loop_exit ]
+ %cond = load i1, i1* %cond.ptr
+ %b = load i32, i32* %b.ptr
+ br label %inner_inner_loop_begin
+; CHECK: inner_loop_begin:
+; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A]], %loop_begin ], [ %[[A2:.*]], %inner_inner_loop_exit ]
+; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr
+; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT: br i1 %[[COND]], label %inner_loop_begin.split.us, label %inner_loop_begin.split
+
+inner_inner_loop_begin:
+ %v1 = load i1, i1* %ptr
+ br i1 %v1, label %inner_inner_loop_a, label %inner_inner_loop_b
+
+inner_inner_loop_a:
+ %v2 = load i1, i1* %ptr
+ br i1 %v2, label %inner_inner_loop_latch, label %inner_loop_exit
+
+inner_inner_loop_b:
+ br i1 %cond, label %inner_inner_loop_exit, label %inner_inner_loop_latch
+
+inner_inner_loop_latch:
+ br label %inner_inner_loop_begin
+; The cloned region is similar to before but with one earlier exit.
+;
+; CHECK: inner_loop_begin.split.us:
+; CHECK-NEXT: br label %inner_inner_loop_begin.us
+;
+; CHECK: inner_inner_loop_begin.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a.us, label %inner_inner_loop_b.us
+;
+; CHECK: inner_inner_loop_b.us:
+; CHECK-NEXT: br label %inner_inner_loop_exit.split.us
+;
+; CHECK: inner_inner_loop_a.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_latch.us, label %inner_loop_exit.loopexit.split.us
+;
+; CHECK: inner_inner_loop_latch.us:
+; CHECK-NEXT: br label %inner_inner_loop_begin.us
+;
+; CHECK: inner_inner_loop_exit.split.us:
+; CHECK-NEXT: br label %inner_inner_loop_exit
+;
+; CHECK: inner_loop_exit.loopexit.split.us:
+; CHECK-NEXT: %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_a.us ]
+; CHECK-NEXT: br label %inner_loop_exit.loopexit
+;
+; The original region is now an exit in the preheader.
+;
+; CHECK: inner_loop_begin.split:
+; CHECK-NEXT: %[[A_INNER_INNER_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_begin ]
+; CHECK-NEXT: br label %inner_inner_loop_begin
+;
+; CHECK: inner_inner_loop_begin:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a, label %inner_inner_loop_b
+;
+; CHECK: inner_inner_loop_a:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_latch, label %inner_loop_exit.loopexit.split
+;
+; CHECK: inner_inner_loop_b:
+; CHECK-NEXT: br label %inner_inner_loop_latch
+;
+; CHECK: inner_inner_loop_latch:
+; CHECK-NEXT: br label %inner_inner_loop_begin
+
+inner_inner_loop_exit:
+ %a2 = load i32, i32* %a.ptr
+ %v4 = load i1, i1* %ptr
+ br i1 %v4, label %inner_loop_exit, label %inner_loop_begin
+; CHECK: inner_inner_loop_exit:
+; CHECK-NEXT: %[[A2]] = load i32, i32* %a.ptr
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit1, label %inner_loop_begin
+
+inner_loop_exit:
+ %v5 = load i1, i1* %ptr
+ br i1 %v5, label %loop_exit, label %loop_begin
+; CHECK: inner_loop_exit.loopexit.split:
+; CHECK-NEXT: %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A_INNER_INNER_LCSSA]], %inner_inner_loop_a ]
+; CHECK-NEXT: br label %inner_loop_exit.loopexit
+;
+; CHECK: inner_loop_exit.loopexit:
+; CHECK-NEXT: %[[A_INNER_US_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA]], %inner_loop_exit.loopexit.split ], [ %[[A_INNER_LCSSA_US]], %inner_loop_exit.loopexit.split.us ]
+; CHECK-NEXT: br label %inner_loop_exit
+;
+; CHECK: inner_loop_exit.loopexit1:
+; CHECK-NEXT: %[[A_INNER_LCSSA2:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_exit ]
+; CHECK-NEXT: br label %inner_loop_exit
+;
+; CHECK: inner_loop_exit:
+; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA2]], %inner_loop_exit.loopexit1 ], [ %[[A_INNER_US_PHI]], %inner_loop_exit.loopexit ]
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_exit, label %loop_begin
+
+loop_exit:
+ %a.lcssa = phi i32 [ %a.phi, %inner_loop_exit ]
+ ret i32 %a.lcssa
+; CHECK: loop_exit:
+; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_exit ]
+; CHECK-NEXT: ret i32 %[[A_LCSSA]]
+}
+
+; Test for when unswitching produces a clone of an inner loop but
+; the clone no longer has an exiting edge *at all* and loops infinitely.
+; Because it doesn't ever exit to the outer loop it is no longer an inner loop
+; but needs to be hoisted up the nest to be a top-level loop.
+define i32 @test9a(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test9a(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label %loop_begin
+
+loop_begin:
+ %b = load i32, i32* %b.ptr
+ %cond = load i1, i1* %cond.ptr
+ br label %inner_loop_begin
+; CHECK: loop_begin:
+; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr
+; CHECK-NEXT: br i1 %[[COND]], label %loop_begin.split.us, label %loop_begin.split
+
+inner_loop_begin:
+ %a = load i32, i32* %a.ptr
+ br i1 %cond, label %inner_loop_latch, label %inner_loop_exit
+
+inner_loop_latch:
+ call void @sink1(i32 %b)
+ br label %inner_loop_begin
+; The cloned inner loop ends up as an infinite loop and thus being a top-level
+; loop with the preheader as an exit block of the outer loop.
+;
+; CHECK: loop_begin.split.us
+; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %loop_begin ]
+; CHECK-NEXT: br label %inner_loop_begin.us
+;
+; CHECK: inner_loop_begin.us:
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: br label %inner_loop_latch.us
+;
+; CHECK: inner_loop_latch.us:
+; CHECK-NEXT: call void @sink1(i32 %[[B_LCSSA]])
+; CHECK-NEXT: br label %inner_loop_begin.us
+;
+; The original loop becomes boring non-loop code.
+;
+; CHECK: loop_begin.split
+; CHECK-NEXT: br label %inner_loop_begin
+;
+; CHECK: inner_loop_begin:
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: br label %inner_loop_exit
+
+inner_loop_exit:
+ %a.inner_lcssa = phi i32 [ %a, %inner_loop_begin ]
+ %v = load i1, i1* %ptr
+ br i1 %v, label %loop_begin, label %loop_exit
+; CHECK: inner_loop_exit:
+; CHECK-NEXT: %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A]], %inner_loop_begin ]
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit
+
+loop_exit:
+ %a.lcssa = phi i32 [ %a.inner_lcssa, %inner_loop_exit ]
+ ret i32 %a.lcssa
+; CHECK: loop_exit:
+; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_LCSSA]], %inner_loop_exit ]
+; CHECK-NEXT: ret i32 %[[A_LCSSA]]
+}
+
+; The same core pattern as @test9a, but instead of the cloned loop becoming an
+; infinite loop, the original loop has its only exit unswitched and the
+; original loop becomes infinite and must be hoisted out of the loop nest.
+define i32 @test9b(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test9b(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label %loop_begin
+
+loop_begin:
+ %b = load i32, i32* %b.ptr
+ %cond = load i1, i1* %cond.ptr
+ br label %inner_loop_begin
+; CHECK: loop_begin:
+; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr
+; CHECK-NEXT: br i1 %[[COND]], label %loop_begin.split.us, label %loop_begin.split
+
+inner_loop_begin:
+ %a = load i32, i32* %a.ptr
+ br i1 %cond, label %inner_loop_exit, label %inner_loop_latch
+
+inner_loop_latch:
+ call void @sink1(i32 %b)
+ br label %inner_loop_begin
+; The cloned inner loop becomes a boring non-loop.
+;
+; CHECK: loop_begin.split.us
+; CHECK-NEXT: br label %inner_loop_begin.us
+;
+; CHECK: inner_loop_begin.us:
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: br label %inner_loop_exit.split.us
+;
+; CHECK: inner_loop_exit.split.us
+; CHECK-NEXT: %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A]], %inner_loop_begin.us ]
+; CHECK-NEXT: br label %inner_loop_exit
+;
+; The original loop becomes an infinite loop and thus a top-level loop with the
+; preheader as an exit block for the outer loop.
+;
+; CHECK: loop_begin.split
+; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %loop_begin ]
+; CHECK-NEXT: br label %inner_loop_begin
+;
+; CHECK: inner_loop_begin:
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: br label %inner_loop_latch
+;
+; CHECK: inner_loop_latch:
+; CHECK-NEXT: call void @sink1(i32 %[[B_LCSSA]])
+; CHECK-NEXT: br label %inner_loop_begin
+
+inner_loop_exit:
+ %a.inner_lcssa = phi i32 [ %a, %inner_loop_begin ]
+ %v = load i1, i1* %ptr
+ br i1 %v, label %loop_begin, label %loop_exit
+; CHECK: inner_loop_exit:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit
+
+loop_exit:
+ %a.lcssa = phi i32 [ %a.inner_lcssa, %inner_loop_exit ]
+ ret i32 %a.lcssa
+; CHECK: loop_exit:
+; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_LCSSA_US]], %inner_loop_exit ]
+; CHECK-NEXT: ret i32 %[[A_LCSSA]]
+}
+
+; Test that requires re-forming dedicated exits for the cloned loop.
+define i32 @test10a(i1* %ptr, i1 %cond, i32* %a.ptr) {
+; CHECK-LABEL: @test10a(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond, label %entry.split.us, label %entry.split
+
+loop_begin:
+ %a = load i32, i32* %a.ptr
+ %v1 = load i1, i1* %ptr
+ br i1 %v1, label %loop_a, label %loop_b
+
+loop_a:
+ %v2 = load i1, i1* %ptr
+ br i1 %v2, label %loop_exit, label %loop_begin
+
+loop_b:
+ br i1 %cond, label %loop_exit, label %loop_begin
+; The cloned loop with one edge as a direct exit.
+;
+; CHECK: entry.split.us:
+; CHECK-NEXT: br label %loop_begin.us
+;
+; CHECK: loop_begin.us:
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_a.us, label %loop_b.us
+;
+; CHECK: loop_b.us:
+; CHECK-NEXT: %[[A_LCSSA_B:.*]] = phi i32 [ %[[A]], %loop_begin.us ]
+; CHECK-NEXT: br label %loop_exit.split.us
+;
+; CHECK: loop_a.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split.us.loopexit, label %loop_begin.backedge.us
+;
+; CHECK: loop_begin.backedge.us:
+; CHECK-NEXT: br label %loop_begin.us
+;
+; CHECK: loop_exit.split.us.loopexit:
+; CHECK-NEXT: %[[A_LCSSA_A:.*]] = phi i32 [ %[[A]], %loop_a.us ]
+; CHECK-NEXT: br label %loop_exit
+;
+; CHECK: loop_exit.split.us:
+; CHECK-NEXT: %[[A_PHI_US:.*]] = phi i32 [ %[[A_LCSSA_B]], %loop_b.us ], [ %[[A_LCSSA_A]], %loop_exit.split.us.loopexit ]
+; CHECK-NEXT: br label %loop_exit
+
+; The original loop without one 'loop_exit' edge.
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br label %loop_begin
+;
+; CHECK: loop_begin:
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_a, label %loop_b
+;
+; CHECK: loop_a:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split, label %loop_begin.backedge
+;
+; CHECK: loop_begin.backedge:
+; CHECK-NEXT: br label %loop_begin
+;
+; CHECK: loop_b:
+; CHECK-NEXT: br label %loop_begin.backedge
+;
+; CHECK: loop_exit.split:
+; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_a ]
+; CHECK-NEXT: br label %loop_exit
+
+loop_exit:
+ %a.lcssa = phi i32 [ %a, %loop_a ], [ %a, %loop_b ]
+ ret i32 %a.lcssa
+; CHECK: loop_exit:
+; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.split ], [ %[[A_PHI_US]], %loop_exit.split.us ]
+; CHECK-NEXT: ret i32 %[[A_PHI]]
+}
+
+; Test that requires re-forming dedicated exits for the original loop.
+define i32 @test10b(i1* %ptr, i1 %cond, i32* %a.ptr) {
+; CHECK-LABEL: @test10b(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond, label %entry.split.us, label %entry.split
+
+loop_begin:
+ %a = load i32, i32* %a.ptr
+ %v1 = load i1, i1* %ptr
+ br i1 %v1, label %loop_a, label %loop_b
+
+loop_a:
+ %v2 = load i1, i1* %ptr
+ br i1 %v2, label %loop_begin, label %loop_exit
+
+loop_b:
+ br i1 %cond, label %loop_begin, label %loop_exit
+; The cloned loop without one of the exits.
+;
+; CHECK: entry.split.us:
+; CHECK-NEXT: br label %loop_begin.us
+;
+; CHECK: loop_begin.us:
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_a.us, label %loop_b.us
+;
+; CHECK: loop_b.us:
+; CHECK-NEXT: br label %loop_begin.backedge.us
+;
+; CHECK: loop_a.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_begin.backedge.us, label %loop_exit.split.us
+;
+; CHECK: loop_begin.backedge.us:
+; CHECK-NEXT: br label %loop_begin.us
+;
+; CHECK: loop_exit.split.us:
+; CHECK-NEXT: %[[A_LCSSA_US:.*]] = phi i32 [ %[[A]], %loop_a.us ]
+; CHECK-NEXT: br label %loop_exit
+
+; The original loop without one 'loop_exit' edge.
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br label %loop_begin
+;
+; CHECK: loop_begin:
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_a, label %loop_b
+;
+; CHECK: loop_a:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_begin.backedge, label %loop_exit.split.loopexit
+;
+; CHECK: loop_begin.backedge:
+; CHECK-NEXT: br label %loop_begin
+;
+; CHECK: loop_b:
+; CHECK-NEXT: %[[A_LCSSA_B:.*]] = phi i32 [ %[[A]], %loop_begin ]
+; CHECK-NEXT: br label %loop_exit.split
+;
+; CHECK: loop_exit.split.loopexit:
+; CHECK-NEXT: %[[A_LCSSA_A:.*]] = phi i32 [ %[[A]], %loop_a ]
+; CHECK-NEXT: br label %loop_exit.split
+;
+; CHECK: loop_exit.split:
+; CHECK-NEXT: %[[A_PHI_SPLIT:.*]] = phi i32 [ %[[A_LCSSA_B]], %loop_b ], [ %[[A_LCSSA_A]], %loop_exit.split.loopexit ]
+; CHECK-NEXT: br label %loop_exit
+
+loop_exit:
+ %a.lcssa = phi i32 [ %a, %loop_a ], [ %a, %loop_b ]
+ ret i32 %a.lcssa
+; CHECK: loop_exit:
+; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_PHI_SPLIT]], %loop_exit.split ], [ %[[A_LCSSA_US]], %loop_exit.split.us ]
+; CHECK-NEXT: ret i32 %[[A_PHI]]
+}
+
+; Check that if a cloned inner loop after unswitching doesn't loop and directly
+; exits even an outer loop, we don't add the cloned preheader to the outer
+; loop and do add the needed LCSSA phi nodes for the new exit block from the
+; outer loop.
+define i32 @test11a(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test11a(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label %loop_begin
+
+loop_begin:
+ %b = load i32, i32* %b.ptr
+ %v1 = load i1, i1* %ptr
+ br i1 %v1, label %loop_latch, label %inner_loop_ph
+; CHECK: loop_begin:
+; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_latch, label %inner_loop_ph
+
+inner_loop_ph:
+ %cond = load i1, i1* %cond.ptr
+ br label %inner_loop_begin
+; CHECK: inner_loop_ph:
+; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr
+; CHECK-NEXT: br i1 %[[COND]], label %inner_loop_ph.split.us, label %inner_loop_ph.split
+
+inner_loop_begin:
+ call void @sink1(i32 %b)
+ %a = load i32, i32* %a.ptr
+ br i1 %cond, label %loop_exit, label %inner_loop_a
+
+inner_loop_a:
+ %v2 = load i1, i1* %ptr
+ br i1 %v2, label %inner_loop_exit, label %inner_loop_begin
+; The cloned path doesn't actually loop and is an exit from the outer loop as
+; well.
+;
+; CHECK: inner_loop_ph.split.us:
+; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %inner_loop_ph ]
+; CHECK-NEXT: br label %inner_loop_begin.us
+;
+; CHECK: inner_loop_begin.us:
+; CHECK-NEXT: call void @sink1(i32 %[[B_LCSSA]])
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: br label %loop_exit.loopexit.split.us
+;
+; CHECK: loop_exit.loopexit.split.us:
+; CHECK-NEXT: %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A]], %inner_loop_begin.us ]
+; CHECK-NEXT: br label %loop_exit.loopexit
+;
+; The original remains a loop losing the exit edge.
+;
+; CHECK: inner_loop_ph.split:
+; CHECK-NEXT: br label %inner_loop_begin
+;
+; CHECK: inner_loop_begin:
+; CHECK-NEXT: call void @sink1(i32 %[[B]])
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: br label %inner_loop_a
+;
+; CHECK: inner_loop_a:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit, label %inner_loop_begin
+
+inner_loop_exit:
+ %a.inner_lcssa = phi i32 [ %a, %inner_loop_a ]
+ %v3 = load i1, i1* %ptr
+ br i1 %v3, label %loop_latch, label %loop_exit
+; CHECK: inner_loop_exit:
+; CHECK-NEXT: %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A]], %inner_loop_a ]
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_latch, label %loop_exit.loopexit1
+
+loop_latch:
+ br label %loop_begin
+; CHECK: loop_latch:
+; CHECK-NEXT: br label %loop_begin
+
+loop_exit:
+ %a.lcssa = phi i32 [ %a, %inner_loop_begin ], [ %a.inner_lcssa, %inner_loop_exit ]
+ ret i32 %a.lcssa
+; CHECK: loop_exit.loopexit:
+; CHECK-NEXT: br label %loop_exit
+;
+; CHECK: loop_exit.loopexit1:
+; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_LCSSA]], %inner_loop_exit ]
+; CHECK-NEXT: br label %loop_exit
+;
+; CHECK: loop_exit:
+; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA_US]], %loop_exit.loopexit ], [ %[[A_LCSSA]], %loop_exit.loopexit1 ]
+; CHECK-NEXT: ret i32 %[[A_PHI]]
+}
+
+; Check that if the original inner loop after unswitching doesn't loop and
+; directly exits even an outer loop, we remove the original preheader from the
+; outer loop and add needed LCSSA phi nodes for the new exit block from the
+; outer loop.
+define i32 @test11b(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test11b(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label %loop_begin
+
+loop_begin:
+ %b = load i32, i32* %b.ptr
+ %v1 = load i1, i1* %ptr
+ br i1 %v1, label %loop_latch, label %inner_loop_ph
+; CHECK: loop_begin:
+; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_latch, label %inner_loop_ph
+
+inner_loop_ph:
+ %cond = load i1, i1* %cond.ptr
+ br label %inner_loop_begin
+; CHECK: inner_loop_ph:
+; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr
+; CHECK-NEXT: br i1 %[[COND]], label %inner_loop_ph.split.us, label %inner_loop_ph.split
+
+inner_loop_begin:
+ call void @sink1(i32 %b)
+ %a = load i32, i32* %a.ptr
+ br i1 %cond, label %inner_loop_a, label %loop_exit
+
+inner_loop_a:
+ %v2 = load i1, i1* %ptr
+ br i1 %v2, label %inner_loop_exit, label %inner_loop_begin
+; The cloned path continues to loop without the exit out of the entire nest.
+;
+; CHECK: inner_loop_ph.split.us:
+; CHECK-NEXT: br label %inner_loop_begin.us
+;
+; CHECK: inner_loop_begin.us:
+; CHECK-NEXT: call void @sink1(i32 %[[B]])
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: br label %inner_loop_a.us
+;
+; CHECK: inner_loop_a.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.split.us, label %inner_loop_begin.us
+;
+; CHECK: inner_loop_exit.split.us:
+; CHECK-NEXT: %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A]], %inner_loop_a.us ]
+; CHECK-NEXT: br label %inner_loop_exit
+;
+; The original remains a loop losing the exit edge.
+;
+; CHECK: inner_loop_ph.split:
+; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %inner_loop_ph ]
+; CHECK-NEXT: br label %inner_loop_begin
+;
+; CHECK: inner_loop_begin:
+; CHECK-NEXT: call void @sink1(i32 %[[B_LCSSA]])
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: br label %loop_exit.loopexit
+
+inner_loop_exit:
+ %a.inner_lcssa = phi i32 [ %a, %inner_loop_a ]
+ %v3 = load i1, i1* %ptr
+ br i1 %v3, label %loop_latch, label %loop_exit
+; CHECK: inner_loop_exit:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_latch, label %loop_exit.loopexit1
+
+loop_latch:
+ br label %loop_begin
+; CHECK: loop_latch:
+; CHECK-NEXT: br label %loop_begin
+
+loop_exit:
+ %a.lcssa = phi i32 [ %a, %inner_loop_begin ], [ %a.inner_lcssa, %inner_loop_exit ]
+ ret i32 %a.lcssa
+; CHECK: loop_exit.loopexit:
+; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %inner_loop_begin ]
+; CHECK-NEXT: br label %loop_exit
+;
+; CHECK: loop_exit.loopexit1:
+; CHECK-NEXT: %[[A_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_LCSSA_US]], %inner_loop_exit ]
+; CHECK-NEXT: br label %loop_exit
+;
+; CHECK: loop_exit:
+; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.loopexit ], [ %[[A_LCSSA_US]], %loop_exit.loopexit1 ]
+; CHECK-NEXT: ret i32 %[[A_PHI]]
+}
+
+; Like test11a, but checking that when the whole thing is wrapped in yet
+; another loop, we correctly attribute the cloned preheader to that outermost
+; loop rather than only handling the case where the preheader is not in any loop
+; at all.
+define i32 @test12a(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test12a(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label %loop_begin
+
+loop_begin:
+ br label %inner_loop_begin
+; CHECK: loop_begin:
+; CHECK-NEXT: br label %inner_loop_begin
+
+inner_loop_begin:
+ %b = load i32, i32* %b.ptr
+ %v1 = load i1, i1* %ptr
+ br i1 %v1, label %inner_loop_latch, label %inner_inner_loop_ph
+; CHECK: inner_loop_begin:
+; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %inner_loop_latch, label %inner_inner_loop_ph
+
+inner_inner_loop_ph:
+ %cond = load i1, i1* %cond.ptr
+ br label %inner_inner_loop_begin
+; CHECK: inner_inner_loop_ph:
+; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr
+; CHECK-NEXT: br i1 %[[COND]], label %inner_inner_loop_ph.split.us, label %inner_inner_loop_ph.split
+
+inner_inner_loop_begin:
+ call void @sink1(i32 %b)
+ %a = load i32, i32* %a.ptr
+ br i1 %cond, label %inner_loop_exit, label %inner_inner_loop_a
+
+inner_inner_loop_a:
+ %v2 = load i1, i1* %ptr
+ br i1 %v2, label %inner_inner_loop_exit, label %inner_inner_loop_begin
+; The cloned path doesn't actually loop and is an exit from the outer loop as
+; well.
+;
+; CHECK: inner_inner_loop_ph.split.us:
+; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %inner_inner_loop_ph ]
+; CHECK-NEXT: br label %inner_inner_loop_begin.us
+;
+; CHECK: inner_inner_loop_begin.us:
+; CHECK-NEXT: call void @sink1(i32 %[[B_LCSSA]])
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: br label %inner_loop_exit.loopexit.split.us
+;
+; CHECK: inner_loop_exit.loopexit.split.us:
+; CHECK-NEXT: %[[A_INNER_INNER_LCSSA_US:.*]] = phi i32 [ %[[A]], %inner_inner_loop_begin.us ]
+; CHECK-NEXT: br label %inner_loop_exit.loopexit
+;
+; The original remains a loop losing the exit edge.
+;
+; CHECK: inner_inner_loop_ph.split:
+; CHECK-NEXT: br label %inner_inner_loop_begin
+;
+; CHECK: inner_inner_loop_begin:
+; CHECK-NEXT: call void @sink1(i32 %[[B]])
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: br label %inner_inner_loop_a
+;
+; CHECK: inner_inner_loop_a:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_exit, label %inner_inner_loop_begin
+
+inner_inner_loop_exit:
+ %a.inner_inner_lcssa = phi i32 [ %a, %inner_inner_loop_a ]
+ %v3 = load i1, i1* %ptr
+ br i1 %v3, label %inner_loop_latch, label %inner_loop_exit
+; CHECK: inner_inner_loop_exit:
+; CHECK-NEXT: %[[A_INNER_INNER_LCSSA:.*]] = phi i32 [ %[[A]], %inner_inner_loop_a ]
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %inner_loop_latch, label %inner_loop_exit.loopexit1
+
+inner_loop_latch:
+ br label %inner_loop_begin
+; CHECK: inner_loop_latch:
+; CHECK-NEXT: br label %inner_loop_begin
+
+inner_loop_exit:
+ %a.inner_lcssa = phi i32 [ %a, %inner_inner_loop_begin ], [ %a.inner_inner_lcssa, %inner_inner_loop_exit ]
+ %v4 = load i1, i1* %ptr
+ br i1 %v4, label %loop_begin, label %loop_exit
+; CHECK: inner_loop_exit.loopexit:
+; CHECK-NEXT: br label %inner_loop_exit
+;
+; CHECK: inner_loop_exit.loopexit1:
+; CHECK-NEXT: %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A_INNER_INNER_LCSSA]], %inner_inner_loop_exit ]
+; CHECK-NEXT: br label %inner_loop_exit
+;
+; CHECK: inner_loop_exit:
+; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A_INNER_INNER_LCSSA_US]], %inner_loop_exit.loopexit ], [ %[[A_INNER_LCSSA]], %inner_loop_exit.loopexit1 ]
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit
+
+loop_exit:
+ %a.lcssa = phi i32 [ %a.inner_lcssa, %inner_loop_exit ]
+ ret i32 %a.lcssa
+; CHECK: loop_exit:
+; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_exit ]
+; CHECK-NEXT: ret i32 %[[A_LCSSA]]
+}
+
+; Like test11b, but checking that when the whole thing is wrapped in yet
+; another loop, we correctly sink the preheader to the outermost loop rather
+; than only handling the case where the preheader is completely removed from
+; a loop.
+define i32 @test12b(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test12b(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label %loop_begin
+
+loop_begin:
+ br label %inner_loop_begin
+; CHECK: loop_begin:
+; CHECK-NEXT: br label %inner_loop_begin
+
+inner_loop_begin:
+ %b = load i32, i32* %b.ptr
+ %v1 = load i1, i1* %ptr
+ br i1 %v1, label %inner_loop_latch, label %inner_inner_loop_ph
+; CHECK: inner_loop_begin:
+; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %inner_loop_latch, label %inner_inner_loop_ph
+
+inner_inner_loop_ph:
+ %cond = load i1, i1* %cond.ptr
+ br label %inner_inner_loop_begin
+; CHECK: inner_inner_loop_ph:
+; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr
+; CHECK-NEXT: br i1 %[[COND]], label %inner_inner_loop_ph.split.us, label %inner_inner_loop_ph.split
+
+inner_inner_loop_begin:
+ call void @sink1(i32 %b)
+ %a = load i32, i32* %a.ptr
+ br i1 %cond, label %inner_inner_loop_a, label %inner_loop_exit
+
+inner_inner_loop_a:
+ %v2 = load i1, i1* %ptr
+ br i1 %v2, label %inner_inner_loop_exit, label %inner_inner_loop_begin
+; The cloned path continues to loop without the exit out of the entire nest.
+;
+; CHECK: inner_inner_loop_ph.split.us:
+; CHECK-NEXT: br label %inner_inner_loop_begin.us
+;
+; CHECK: inner_inner_loop_begin.us:
+; CHECK-NEXT: call void @sink1(i32 %[[B]])
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: br label %inner_inner_loop_a.us
+;
+; CHECK: inner_inner_loop_a.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_exit.split.us, label %inner_inner_loop_begin.us
+;
+; CHECK: inner_inner_loop_exit.split.us:
+; CHECK-NEXT: %[[A_INNER_INNER_LCSSA_US:.*]] = phi i32 [ %[[A]], %inner_inner_loop_a.us ]
+; CHECK-NEXT: br label %inner_inner_loop_exit
+;
+; The original remains a loop losing the exit edge.
+;
+; CHECK: inner_inner_loop_ph.split:
+; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %inner_inner_loop_ph ]
+; CHECK-NEXT: br label %inner_inner_loop_begin
+;
+; CHECK: inner_inner_loop_begin:
+; CHECK-NEXT: call void @sink1(i32 %[[B_LCSSA]])
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: br label %inner_loop_exit.loopexit
+
+inner_inner_loop_exit:
+ %a.inner_inner_lcssa = phi i32 [ %a, %inner_inner_loop_a ]
+ %v3 = load i1, i1* %ptr
+ br i1 %v3, label %inner_loop_latch, label %inner_loop_exit
+; CHECK: inner_inner_loop_exit:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %inner_loop_latch, label %inner_loop_exit.loopexit1
+
+inner_loop_latch:
+ br label %inner_loop_begin
+; CHECK: inner_loop_latch:
+; CHECK-NEXT: br label %inner_loop_begin
+
+inner_loop_exit:
+ %a.inner_lcssa = phi i32 [ %a, %inner_inner_loop_begin ], [ %a.inner_inner_lcssa, %inner_inner_loop_exit ]
+ %v4 = load i1, i1* %ptr
+ br i1 %v4, label %loop_begin, label %loop_exit
+; CHECK: inner_loop_exit.loopexit:
+; CHECK-NEXT: %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A]], %inner_inner_loop_begin ]
+; CHECK-NEXT: br label %inner_loop_exit
+;
+; CHECK: inner_loop_exit.loopexit1:
+; CHECK-NEXT: %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_INNER_LCSSA_US]], %inner_inner_loop_exit ]
+; CHECK-NEXT: br label %inner_loop_exit
+;
+; CHECK: inner_loop_exit:
+; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA]], %inner_loop_exit.loopexit ], [ %[[A_INNER_LCSSA_US]], %inner_loop_exit.loopexit1 ]
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit
+
+loop_exit:
+ %a.lcssa = phi i32 [ %a.inner_lcssa, %inner_loop_exit ]
+ ret i32 %a.lcssa
+; CHECK: loop_exit:
+; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_exit ]
+; CHECK-NEXT: ret i32 %[[A_LCSSA]]
+}
+
+; Test where the cloned loop has an inner loop that has to be traversed to form
+; the cloned loop, and where this inner loop has multiple blocks, and where the
+; exiting block that connects the inner loop to the cloned loop is not the header
+; block. This ensures that we correctly handle interesting corner cases of
+; traversing back to the header when establishing the cloned loop.
+define i32 @test13a(i1* %ptr, i1 %cond, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test13a(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond, label %entry.split.us, label %entry.split
+
+loop_begin:
+ %a = load i32, i32* %a.ptr
+ %v1 = load i1, i1* %ptr
+ br i1 %v1, label %loop_a, label %loop_b
+
+loop_a:
+ %v2 = load i1, i1* %ptr
+ br i1 %v2, label %loop_exit, label %loop_latch
+
+loop_b:
+ %b = load i32, i32* %b.ptr
+ br i1 %cond, label %loop_b_inner_ph, label %loop_exit
+
+loop_b_inner_ph:
+ br label %loop_b_inner_header
+
+loop_b_inner_header:
+ %v3 = load i1, i1* %ptr
+ br i1 %v3, label %loop_b_inner_latch, label %loop_b_inner_body
+
+loop_b_inner_body:
+ %v4 = load i1, i1* %ptr
+ br i1 %v4, label %loop_b_inner_latch, label %loop_b_inner_exit
+
+loop_b_inner_latch:
+ br label %loop_b_inner_header
+
+loop_b_inner_exit:
+ br label %loop_latch
+
+loop_latch:
+ br label %loop_begin
+; The cloned loop contains an inner loop within it.
+;
+; CHECK: entry.split.us:
+; CHECK-NEXT: br label %loop_begin.us
+;
+; CHECK: loop_begin.us:
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_a.us, label %loop_b.us
+;
+; CHECK: loop_b.us:
+; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT: br label %loop_b_inner_ph.us
+;
+; CHECK: loop_b_inner_ph.us:
+; CHECK-NEXT: br label %loop_b_inner_header.us
+;
+; CHECK: loop_b_inner_header.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_b_inner_latch.us, label %loop_b_inner_body.us
+;
+; CHECK: loop_b_inner_body.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_b_inner_latch.us, label %loop_b_inner_exit.us
+;
+; CHECK: loop_b_inner_exit.us:
+; CHECK-NEXT: br label %loop_latch.us
+;
+; CHECK: loop_b_inner_latch.us:
+; CHECK-NEXT: br label %loop_b_inner_header.us
+;
+; CHECK: loop_a.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split.us, label %loop_latch.us
+;
+; CHECK: loop_latch.us:
+; CHECK-NEXT: br label %loop_begin.us
+;
+; CHECK: loop_exit.split.us:
+; CHECK-NEXT: %[[A_LCSSA_US:.*]] = phi i32 [ %[[A]], %loop_a.us ]
+; CHECK-NEXT: br label %loop_exit
+;
+; And the original loop no longer contains an inner loop.
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br label %loop_begin
+;
+; CHECK: loop_begin:
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_a, label %loop_b
+;
+; CHECK: loop_a:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split.loopexit, label %loop_latch
+;
+; CHECK: loop_b:
+; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT: br label %loop_exit.split
+;
+; CHECK: loop_latch:
+; CHECK-NEXT: br label %loop_begin
+
+loop_exit:
+ %lcssa = phi i32 [ %a, %loop_a ], [ %b, %loop_b ]
+ ret i32 %lcssa
+; CHECK: loop_exit.split.loopexit:
+; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_a ]
+; CHECK-NEXT: br label %loop_exit.split
+;
+; CHECK: loop_exit.split:
+; CHECK-NEXT: %[[AB_PHI:.*]] = phi i32 [ %[[B]], %loop_b ], [ %[[A_LCSSA]], %loop_exit.split.loopexit ]
+; CHECK-NEXT: br label %loop_exit
+;
+; CHECK: loop_exit:
+; CHECK-NEXT: %[[AB_PHI_US:.*]] = phi i32 [ %[[AB_PHI]], %loop_exit.split ], [ %[[A_LCSSA_US]], %loop_exit.split.us ]
+; CHECK-NEXT: ret i32 %[[AB_PHI_US]]
+}
+
+; Test where the original loop has an inner loop that has to be traversed to
+; rebuild the loop, and where this inner loop has multiple blocks, and where
+; the exiting block that connects the inner loop to the original loop is not
+; the header block. This ensures that we correctly handle interesting corner
+; cases of traversing back to the header when re-establishing the original loop
+; still exists after unswitching.
+define i32 @test13b(i1* %ptr, i1 %cond, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test13b(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond, label %entry.split.us, label %entry.split
+
+loop_begin:
+ %a = load i32, i32* %a.ptr
+ %v1 = load i1, i1* %ptr
+ br i1 %v1, label %loop_a, label %loop_b
+
+loop_a:
+ %v2 = load i1, i1* %ptr
+ br i1 %v2, label %loop_exit, label %loop_latch
+
+loop_b:
+ %b = load i32, i32* %b.ptr
+ br i1 %cond, label %loop_exit, label %loop_b_inner_ph
+
+loop_b_inner_ph:
+ br label %loop_b_inner_header
+
+loop_b_inner_header:
+ %v3 = load i1, i1* %ptr
+ br i1 %v3, label %loop_b_inner_latch, label %loop_b_inner_body
+
+loop_b_inner_body:
+ %v4 = load i1, i1* %ptr
+ br i1 %v4, label %loop_b_inner_latch, label %loop_b_inner_exit
+
+loop_b_inner_latch:
+ br label %loop_b_inner_header
+
+loop_b_inner_exit:
+ br label %loop_latch
+
+loop_latch:
+ br label %loop_begin
+; The cloned loop doesn't contain an inner loop.
+;
+; CHECK: entry.split.us:
+; CHECK-NEXT: br label %loop_begin.us
+;
+; CHECK: loop_begin.us:
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_a.us, label %loop_b.us
+;
+; CHECK: loop_b.us:
+; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT: br label %loop_exit.split.us
+;
+; CHECK: loop_a.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split.us.loopexit, label %loop_latch.us
+;
+; CHECK: loop_latch.us:
+; CHECK-NEXT: br label %loop_begin.us
+;
+; CHECK: loop_exit.split.us.loopexit:
+; CHECK-NEXT: %[[A_LCSSA_US:.*]] = phi i32 [ %[[A]], %loop_a.us ]
+; CHECK-NEXT: br label %loop_exit.split.us
+;
+; CHECK: loop_exit.split.us:
+; CHECK-NEXT: %[[AB_PHI_US:.*]] = phi i32 [ %[[B]], %loop_b.us ], [ %[[A_LCSSA_US]], %loop_exit.split.us.loopexit ]
+; CHECK-NEXT: br label %loop_exit
+;
+; But the original loop contains an inner loop that must be traversed.;
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br label %loop_begin
+;
+; CHECK: loop_begin:
+; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_a, label %loop_b
+;
+; CHECK: loop_a:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split, label %loop_latch
+;
+; CHECK: loop_b:
+; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT: br label %loop_b_inner_ph
+;
+; CHECK: loop_b_inner_ph:
+; CHECK-NEXT: br label %loop_b_inner_header
+;
+; CHECK: loop_b_inner_header:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_b_inner_latch, label %loop_b_inner_body
+;
+; CHECK: loop_b_inner_body:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_b_inner_latch, label %loop_b_inner_exit
+;
+; CHECK: loop_b_inner_latch:
+; CHECK-NEXT: br label %loop_b_inner_header
+;
+; CHECK: loop_b_inner_exit:
+; CHECK-NEXT: br label %loop_latch
+;
+; CHECK: loop_latch:
+; CHECK-NEXT: br label %loop_begin
+
+loop_exit:
+ %lcssa = phi i32 [ %a, %loop_a ], [ %b, %loop_b ]
+ ret i32 %lcssa
+; CHECK: loop_exit.split:
+; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_a ]
+; CHECK-NEXT: br label %loop_exit
+;
+; CHECK: loop_exit:
+; CHECK-NEXT: %[[AB_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.split ], [ %[[AB_PHI_US]], %loop_exit.split.us ]
+; CHECK-NEXT: ret i32 %[[AB_PHI]]
+}
+
+define i32 @test20(i32* %var, i32 %cond1, i32 %cond2) {
+; CHECK-LABEL: @test20(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: switch i32 %cond2, label %[[ENTRY_SPLIT_EXIT:.*]] [
+; CHECK-NEXT: i32 0, label %[[ENTRY_SPLIT_A:.*]]
+; CHECK-NEXT: i32 1, label %[[ENTRY_SPLIT_A]]
+; CHECK-NEXT: i32 13, label %[[ENTRY_SPLIT_B:.*]]
+; CHECK-NEXT: i32 2, label %[[ENTRY_SPLIT_A]]
+; CHECK-NEXT: i32 42, label %[[ENTRY_SPLIT_C:.*]]
+; CHECK-NEXT: ]
+
+loop_begin:
+ %var_val = load i32, i32* %var
+ switch i32 %cond2, label %loop_exit [
+ i32 0, label %loop_a
+ i32 1, label %loop_a
+ i32 13, label %loop_b
+ i32 2, label %loop_a
+ i32 42, label %loop_c
+ ]
+
+loop_a:
+ call i32 @a()
+ br label %loop_latch
+; Unswitched 'a' loop.
+;
+; CHECK: [[ENTRY_SPLIT_A]]:
+; CHECK-NEXT: br label %[[LOOP_BEGIN_A:.*]]
+;
+; CHECK: [[LOOP_BEGIN_A]]:
+; CHECK-NEXT: %{{.*}} = load i32, i32* %var
+; CHECK-NEXT: br label %[[LOOP_A:.*]]
+;
+; CHECK: [[LOOP_A]]:
+; CHECK-NEXT: call i32 @a()
+; CHECK-NEXT: br label %[[LOOP_LATCH_A:.*]]
+;
+; CHECK: [[LOOP_LATCH_A]]:
+; CHECK: br label %[[LOOP_BEGIN_A]]
+
+loop_b:
+ call i32 @b()
+ br label %loop_latch
+; Unswitched 'b' loop.
+;
+; CHECK: [[ENTRY_SPLIT_B]]:
+; CHECK-NEXT: br label %[[LOOP_BEGIN_B:.*]]
+;
+; CHECK: [[LOOP_BEGIN_B]]:
+; CHECK-NEXT: %{{.*}} = load i32, i32* %var
+; CHECK-NEXT: br label %[[LOOP_B:.*]]
+;
+; CHECK: [[LOOP_B]]:
+; CHECK-NEXT: call i32 @b()
+; CHECK-NEXT: br label %[[LOOP_LATCH_B:.*]]
+;
+; CHECK: [[LOOP_LATCH_B]]:
+; CHECK: br label %[[LOOP_BEGIN_B]]
+
+loop_c:
+ call i32 @c() noreturn nounwind
+ br label %loop_latch
+; Unswitched 'c' loop.
+;
+; CHECK: [[ENTRY_SPLIT_C]]:
+; CHECK-NEXT: br label %[[LOOP_BEGIN_C:.*]]
+;
+; CHECK: [[LOOP_BEGIN_C]]:
+; CHECK-NEXT: %{{.*}} = load i32, i32* %var
+; CHECK-NEXT: br label %[[LOOP_C:.*]]
+;
+; CHECK: [[LOOP_C]]:
+; CHECK-NEXT: call i32 @c()
+; CHECK-NEXT: br label %[[LOOP_LATCH_C:.*]]
+;
+; CHECK: [[LOOP_LATCH_C]]:
+; CHECK: br label %[[LOOP_BEGIN_C]]
+
+loop_latch:
+ br label %loop_begin
+
+loop_exit:
+ %lcssa = phi i32 [ %var_val, %loop_begin ]
+ ret i32 %lcssa
+; Unswitched exit edge (no longer a loop).
+;
+; CHECK: [[ENTRY_SPLIT_EXIT]]:
+; CHECK-NEXT: br label %loop_begin
+;
+; CHECK: loop_begin:
+; CHECK-NEXT: %[[V:.*]] = load i32, i32* %var
+; CHECK-NEXT: br label %loop_exit
+;
+; CHECK: loop_exit:
+; CHECK-NEXT: %[[LCSSA:.*]] = phi i32 [ %[[V]], %loop_begin ]
+; CHECK-NEXT: ret i32 %[[LCSSA]]
+}
+
+; Negative test: we do not switch when the loop contains unstructured control
+; flows as it would significantly complicate the process as novel loops might
+; be formed, etc.
+define void @test_no_unswitch_unstructured_cfg(i1* %ptr, i1 %cond) {
+; CHECK-LABEL: @test_no_unswitch_unstructured_cfg(
+entry:
+ br label %loop_begin
+
+loop_begin:
+ br i1 %cond, label %loop_left, label %loop_right
+
+loop_left:
+ %v1 = load i1, i1* %ptr
+ br i1 %v1, label %loop_right, label %loop_merge
+
+loop_right:
+ %v2 = load i1, i1* %ptr
+ br i1 %v2, label %loop_left, label %loop_merge
+
+loop_merge:
+ %v3 = load i1, i1* %ptr
+ br i1 %v3, label %loop_latch, label %loop_exit
+
+loop_latch:
+ br label %loop_begin
+
+loop_exit:
+ ret void
+}
+
+; A test reduced out of 403.gcc with interesting nested loops that trigger
+; multiple unswitches. A key component of this test is that there are multiple
+; paths to reach an inner loop after unswitching, and one of them is via the
+; predecessors of the unswitched loop header. That can allow us to find the loop
+; through multiple different paths.
+define void @test21(i1 %a, i1 %b) {
+; CHECK-LABEL: @test21(
+bb:
+ br label %bb3
+; CHECK-NOT: br i1 %a
+;
+; CHECK: br i1 %a, label %[[BB_SPLIT_US:.*]], label %[[BB_SPLIT:.*]]
+;
+; CHECK-NOT: br i1 %a
+; CHECK-NOT: br i1 %b
+;
+; CHECK: [[BB_SPLIT]]:
+; CHECK: br i1 %b
+;
+; CHECK-NOT: br i1 %a
+; CHECK-NOT: br i1 %b
+
+bb3:
+ %tmp1.0 = phi i32 [ 0, %bb ], [ %tmp1.3, %bb23 ]
+ br label %bb7
+
+bb7:
+ %tmp.0 = phi i1 [ true, %bb3 ], [ false, %bb19 ]
+ %tmp1.1 = phi i32 [ %tmp1.0, %bb3 ], [ %tmp1.2.lcssa, %bb19 ]
+ br i1 %tmp.0, label %bb11.preheader, label %bb23
+
+bb11.preheader:
+ br i1 %a, label %bb19, label %bb14.lr.ph
+
+bb14.lr.ph:
+ br label %bb14
+
+bb14:
+ %tmp2.02 = phi i32 [ 0, %bb14.lr.ph ], [ 1, %bb14 ]
+ br i1 %b, label %bb11.bb19_crit_edge, label %bb14
+
+bb11.bb19_crit_edge:
+ %split = phi i32 [ %tmp2.02, %bb14 ]
+ br label %bb19
+
+bb19:
+ %tmp1.2.lcssa = phi i32 [ %split, %bb11.bb19_crit_edge ], [ %tmp1.1, %bb11.preheader ]
+ %tmp21 = icmp eq i32 %tmp1.2.lcssa, 0
+ br i1 %tmp21, label %bb23, label %bb7
+
+bb23:
+ %tmp1.3 = phi i32 [ %tmp1.2.lcssa, %bb19 ], [ %tmp1.1, %bb7 ]
+ br label %bb3
+}
+
+; A test reduced out of 400.perlbench that when unswitching the `%stop`
+; condition clones a loop nest outside of a containing loop. This excercises a
+; different cloning path from our other test cases and in turn verifying the
+; resulting structure can catch any failures to correctly clone these nested
+; loops.
+declare void @f()
+declare void @g()
+declare i32 @h(i32 %arg)
+define void @test22(i32 %arg) {
+; CHECK-LABEL: define void @test22(
+entry:
+ br label %loop1.header
+
+loop1.header:
+ %stop = phi i1 [ true, %loop1.latch ], [ false, %entry ]
+ %i = phi i32 [ %i.lcssa, %loop1.latch ], [ %arg, %entry ]
+; CHECK: %[[I:.*]] = phi i32 [ %{{.*}}, %loop1.latch ], [ %arg, %entry ]
+ br i1 %stop, label %loop1.exit, label %loop1.body.loop2.ph
+; CHECK: br i1 %stop, label %loop1.exit, label %loop1.body.loop2.ph
+
+loop1.body.loop2.ph:
+ br label %loop2.header
+; Just check that the we unswitched the key condition and that leads to the
+; inner loop header.
+;
+; CHECK: loop1.body.loop2.ph:
+; CHECK-NEXT: br i1 %stop, label %[[SPLIT_US:.*]], label %[[SPLIT:.*]]
+;
+; CHECK: [[SPLIT_US]]:
+; CHECK-NEXT: br label %[[LOOP2_HEADER_US:.*]]
+;
+; CHECK: [[LOOP2_HEADER_US]]:
+; CHECK-NEXT: %{{.*}} = phi i32 [ %[[I]], %[[SPLIT_US]] ]
+;
+; CHECK: [[SPLIT]]:
+; CHECK-NEXT: br label %[[LOOP2_HEADER:.*]]
+;
+; CHECK: [[LOOP2_HEADER]]:
+; CHECK-NEXT: %{{.*}} = phi i32 [ %[[I]], %[[SPLIT]] ]
+
+loop2.header:
+ %i.inner = phi i32 [ %i, %loop1.body.loop2.ph ], [ %i.next, %loop2.latch ]
+ br label %loop3.header
+
+loop3.header:
+ %sw = call i32 @h(i32 %i.inner)
+ switch i32 %sw, label %loop3.exit [
+ i32 32, label %loop3.header
+ i32 59, label %loop2.latch
+ i32 36, label %loop1.latch
+ ]
+
+loop2.latch:
+ %i.next = add i32 %i.inner, 1
+ br i1 %stop, label %loop2.exit, label %loop2.header
+
+loop1.latch:
+ %i.lcssa = phi i32 [ %i.inner, %loop3.header ]
+ br label %loop1.header
+
+loop3.exit:
+ call void @f()
+ ret void
+
+loop2.exit:
+ call void @g()
+ ret void
+
+loop1.exit:
+ call void @g()
+ ret void
+}
+
+; Test that when we are unswitching and need to rebuild the loop block set we
+; correctly skip past inner loops. We want to use the inner loop to efficiently
+; skip whole subregions of the outer loop blocks but just because the header of
+; the outer loop is also the preheader of an inner loop shouldn't confuse this
+; walk.
+define void @test23(i1 %arg, i1* %ptr) {
+; CHECK-LABEL: define void @test23(
+entry:
+ br label %outer.header
+; CHECK: entry:
+; CHECK-NEXT: br i1 %arg,
+;
+; Just verify that we unswitched the correct bits. We should call `@f` twice in
+; one unswitch and `@f` and then `@g` in the other.
+; CHECK: call void
+; CHECK-SAME: @f
+; CHECK: call void
+; CHECK-SAME: @f
+;
+; CHECK: call void
+; CHECK-SAME: @f
+; CHECK: call void
+; CHECK-SAME: @g
+
+outer.header:
+ br label %inner.header
+
+inner.header:
+ call void @f()
+ br label %inner.latch
+
+inner.latch:
+ %inner.cond = load i1, i1* %ptr
+ br i1 %inner.cond, label %inner.header, label %outer.body
+
+outer.body:
+ br i1 %arg, label %outer.body.left, label %outer.body.right
+
+outer.body.left:
+ call void @f()
+ br label %outer.latch
+
+outer.body.right:
+ call void @g()
+ br label %outer.latch
+
+outer.latch:
+ %outer.cond = load i1, i1* %ptr
+ br i1 %outer.cond, label %outer.header, label %exit
+
+exit:
+ ret void
+}
+
+; Non-trivial loop unswitching where there are two invariant conditions, but the
+; second one is only in the cloned copy of the loop after unswitching.
+define i32 @test24(i1* %ptr, i1 %cond1, i1 %cond2) {
+; CHECK-LABEL: @test24(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond1, label %entry.split.us, label %entry.split
+
+loop_begin:
+ br i1 %cond1, label %loop_a, label %loop_b
+
+loop_a:
+ br i1 %cond2, label %loop_a_a, label %loop_a_c
+; The second unswitched condition.
+;
+; CHECK: entry.split.us:
+; CHECK-NEXT: br i1 %cond2, label %entry.split.us.split.us, label %entry.split.us.split
+
+loop_a_a:
+ call i32 @a()
+ br label %latch
+; The 'loop_a_a' unswitched loop.
+;
+; CHECK: entry.split.us.split.us:
+; CHECK-NEXT: br label %loop_begin.us.us
+;
+; CHECK: loop_begin.us.us:
+; CHECK-NEXT: br label %loop_a.us.us
+;
+; CHECK: loop_a.us.us:
+; CHECK-NEXT: br label %loop_a_a.us.us
+;
+; CHECK: loop_a_a.us.us:
+; CHECK-NEXT: call i32 @a()
+; CHECK-NEXT: br label %latch.us.us
+;
+; CHECK: latch.us.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us.us, label %loop_exit.split.us.split.us
+;
+; CHECK: loop_exit.split.us.split.us:
+; CHECK-NEXT: br label %loop_exit.split
+
+loop_a_c:
+ call i32 @c()
+ br label %latch
+; The 'loop_a_c' unswitched loop.
+;
+; CHECK: entry.split.us.split:
+; CHECK-NEXT: br label %loop_begin.us
+;
+; CHECK: loop_begin.us:
+; CHECK-NEXT: br label %loop_a.us
+;
+; CHECK: loop_a.us:
+; CHECK-NEXT: br label %loop_a_c.us
+;
+; CHECK: loop_a_c.us:
+; CHECK-NEXT: call i32 @c()
+; CHECK-NEXT: br label %latch
+;
+; CHECK: latch.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us.split
+;
+; CHECK: loop_exit.split.us.split:
+; CHECK-NEXT: br label %loop_exit.split
+
+loop_b:
+ call i32 @b()
+ br label %latch
+; The 'loop_b' unswitched loop.
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br label %loop_begin
+;
+; CHECK: loop_begin:
+; CHECK-NEXT: br label %loop_b
+;
+; CHECK: loop_b:
+; CHECK-NEXT: call i32 @b()
+; CHECK-NEXT: br label %latch
+;
+; CHECK: latch:
+; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit.split
+;
+; CHECK: loop_exit.split:
+; CHECK-NEXT: br label %loop_exit
+
+latch:
+ %v = load i1, i1* %ptr
+ br i1 %v, label %loop_begin, label %loop_exit
+
+loop_exit:
+ ret i32 0
+; CHECK: loop_exit:
+; CHECK-NEXT: ret
+}
+
+; Non-trivial partial loop unswitching of an invariant input to an 'or'.
+define i32 @test25(i1* %ptr, i1 %cond) {
+; CHECK-LABEL: @test25(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond, label %entry.split.us, label %entry.split
+
+loop_begin:
+ %v1 = load i1, i1* %ptr
+ %cond_or = or i1 %v1, %cond
+ br i1 %cond_or, label %loop_a, label %loop_b
+
+loop_a:
+ call i32 @a()
+ br label %latch
+; The 'loop_a' unswitched loop.
+;
+; CHECK: entry.split.us:
+; CHECK-NEXT: br label %loop_begin.us
+;
+; CHECK: loop_begin.us:
+; CHECK-NEXT: %[[V1_US:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: %[[OR_US:.*]] = or i1 %[[V1_US]], true
+; CHECK-NEXT: br label %loop_a.us
+;
+; CHECK: loop_a.us:
+; CHECK-NEXT: call i32 @a()
+; CHECK-NEXT: br label %latch.us
+;
+; CHECK: latch.us:
+; CHECK-NEXT: %[[V2_US:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V2_US]], label %loop_begin.us, label %loop_exit.split.us
+;
+; CHECK: loop_exit.split.us:
+; CHECK-NEXT: br label %loop_exit
+
+loop_b:
+ call i32 @b()
+ br label %latch
+; The original loop.
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br label %loop_begin
+;
+; CHECK: loop_begin:
+; CHECK-NEXT: %[[V1:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: %[[OR:.*]] = or i1 %[[V1]], false
+; CHECK-NEXT: br i1 %[[OR]], label %loop_a, label %loop_b
+;
+; CHECK: loop_a:
+; CHECK-NEXT: call i32 @a()
+; CHECK-NEXT: br label %latch
+;
+; CHECK: loop_b:
+; CHECK-NEXT: call i32 @b()
+; CHECK-NEXT: br label %latch
+
+latch:
+ %v2 = load i1, i1* %ptr
+ br i1 %v2, label %loop_begin, label %loop_exit
+; CHECK: latch:
+; CHECK-NEXT: %[[V2:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V2]], label %loop_begin, label %loop_exit.split
+
+loop_exit:
+ ret i32 0
+; CHECK: loop_exit.split:
+; CHECK-NEXT: br label %loop_exit
+;
+; CHECK: loop_exit:
+; CHECK-NEXT: ret
+}
+
+; Non-trivial partial loop unswitching of multiple invariant inputs to an `and`
+; chain.
+define i32 @test26(i1* %ptr1, i1* %ptr2, i1* %ptr3, i1 %cond1, i1 %cond2, i1 %cond3) {
+; CHECK-LABEL: @test26(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %[[INV_AND:.*]] = and i1 %cond3, %cond1
+; CHECK-NEXT: br i1 %[[INV_AND]], label %entry.split, label %entry.split.us
+
+loop_begin:
+ %v1 = load i1, i1* %ptr1
+ %v2 = load i1, i1* %ptr2
+ %cond_and1 = and i1 %v1, %cond1
+ %cond_or1 = or i1 %v2, %cond2
+ %cond_and2 = and i1 %cond_and1, %cond_or1
+ %cond_and3 = and i1 %cond_and2, %cond3
+ br i1 %cond_and3, label %loop_a, label %loop_b
+; The 'loop_b' unswitched loop.
+;
+; CHECK: entry.split.us:
+; CHECK-NEXT: br label %loop_begin.us
+;
+; CHECK: loop_begin.us:
+; CHECK-NEXT: %[[V1_US:.*]] = load i1, i1* %ptr1
+; CHECK-NEXT: %[[V2_US:.*]] = load i1, i1* %ptr2
+; CHECK-NEXT: %[[AND1_US:.*]] = and i1 %[[V1_US]], %cond1
+; CHECK-NEXT: %[[OR1_US:.*]] = or i1 %[[V2_US]], %cond2
+; CHECK-NEXT: %[[AND2_US:.*]] = and i1 %[[AND1_US]], %[[OR1_US]]
+; CHECK-NEXT: %[[AND3_US:.*]] = and i1 %[[AND2_US]], %cond3
+; CHECK-NEXT: br label %loop_b.us
+;
+; CHECK: loop_b.us:
+; CHECK-NEXT: call i32 @b()
+; CHECK-NEXT: br label %latch.us
+;
+; CHECK: latch.us:
+; CHECK-NEXT: %[[V3_US:.*]] = load i1, i1* %ptr3
+; CHECK-NEXT: br i1 %[[V3_US]], label %loop_begin.us, label %loop_exit.split.us
+;
+; CHECK: loop_exit.split.us:
+; CHECK-NEXT: br label %loop_exit
+
+; The original loop.
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br label %loop_begin
+;
+; CHECK: loop_begin:
+; CHECK-NEXT: %[[V1:.*]] = load i1, i1* %ptr1
+; CHECK-NEXT: %[[V2:.*]] = load i1, i1* %ptr2
+; CHECK-NEXT: %[[AND1:.*]] = and i1 %[[V1]], true
+; CHECK-NEXT: %[[OR1:.*]] = or i1 %[[V2]], %cond2
+; CHECK-NEXT: %[[AND2:.*]] = and i1 %[[AND1]], %[[OR1]]
+; CHECK-NEXT: %[[AND3:.*]] = and i1 %[[AND2]], true
+; CHECK-NEXT: br i1 %[[AND3]], label %loop_a, label %loop_b
+
+loop_a:
+ call i32 @a()
+ br label %latch
+; CHECK: loop_a:
+; CHECK-NEXT: call i32 @a()
+; CHECK-NEXT: br label %latch
+
+loop_b:
+ call i32 @b()
+ br label %latch
+; CHECK: loop_b:
+; CHECK-NEXT: call i32 @b()
+; CHECK-NEXT: br label %latch
+
+latch:
+ %v3 = load i1, i1* %ptr3
+ br i1 %v3, label %loop_begin, label %loop_exit
+; CHECK: latch:
+; CHECK-NEXT: %[[V3:.*]] = load i1, i1* %ptr3
+; CHECK-NEXT: br i1 %[[V3]], label %loop_begin, label %loop_exit.split
+
+loop_exit:
+ ret i32 0
+; CHECK: loop_exit.split:
+; CHECK-NEXT: br label %loop_exit
+;
+; CHECK: loop_exit:
+; CHECK-NEXT: ret
+}
+
+; Non-trivial partial loop unswitching of multiple invariant inputs to an `or`
+; chain. Basically an inverted version of corresponding `and` test (test26).
+define i32 @test27(i1* %ptr1, i1* %ptr2, i1* %ptr3, i1 %cond1, i1 %cond2, i1 %cond3) {
+; CHECK-LABEL: @test27(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %[[INV_OR:.*]] = or i1 %cond3, %cond1
+; CHECK-NEXT: br i1 %[[INV_OR]], label %entry.split.us, label %entry.split
+
+loop_begin:
+ %v1 = load i1, i1* %ptr1
+ %v2 = load i1, i1* %ptr2
+ %cond_or1 = or i1 %v1, %cond1
+ %cond_and1 = and i1 %v2, %cond2
+ %cond_or2 = or i1 %cond_or1, %cond_and1
+ %cond_or3 = or i1 %cond_or2, %cond3
+ br i1 %cond_or3, label %loop_b, label %loop_a
+; The 'loop_b' unswitched loop.
+;
+; CHECK: entry.split.us:
+; CHECK-NEXT: br label %loop_begin.us
+;
+; CHECK: loop_begin.us:
+; CHECK-NEXT: %[[V1_US:.*]] = load i1, i1* %ptr1
+; CHECK-NEXT: %[[V2_US:.*]] = load i1, i1* %ptr2
+; CHECK-NEXT: %[[OR1_US:.*]] = or i1 %[[V1_US]], %cond1
+; CHECK-NEXT: %[[AND1_US:.*]] = and i1 %[[V2_US]], %cond2
+; CHECK-NEXT: %[[OR2_US:.*]] = or i1 %[[OR1_US]], %[[AND1_US]]
+; CHECK-NEXT: %[[OR3_US:.*]] = or i1 %[[OR2_US]], %cond3
+; CHECK-NEXT: br label %loop_b.us
+;
+; CHECK: loop_b.us:
+; CHECK-NEXT: call i32 @b()
+; CHECK-NEXT: br label %latch.us
+;
+; CHECK: latch.us:
+; CHECK-NEXT: %[[V3_US:.*]] = load i1, i1* %ptr3
+; CHECK-NEXT: br i1 %[[V3_US]], label %loop_begin.us, label %loop_exit.split.us
+;
+; CHECK: loop_exit.split.us:
+; CHECK-NEXT: br label %loop_exit
+
+; The original loop.
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br label %loop_begin
+;
+; CHECK: loop_begin:
+; CHECK-NEXT: %[[V1:.*]] = load i1, i1* %ptr1
+; CHECK-NEXT: %[[V2:.*]] = load i1, i1* %ptr2
+; CHECK-NEXT: %[[OR1:.*]] = or i1 %[[V1]], false
+; CHECK-NEXT: %[[AND1:.*]] = and i1 %[[V2]], %cond2
+; CHECK-NEXT: %[[OR2:.*]] = or i1 %[[OR1]], %[[AND1]]
+; CHECK-NEXT: %[[OR3:.*]] = or i1 %[[OR2]], false
+; CHECK-NEXT: br i1 %[[OR3]], label %loop_b, label %loop_a
+
+loop_a:
+ call i32 @a()
+ br label %latch
+; CHECK: loop_a:
+; CHECK-NEXT: call i32 @a()
+; CHECK-NEXT: br label %latch
+
+loop_b:
+ call i32 @b()
+ br label %latch
+; CHECK: loop_b:
+; CHECK-NEXT: call i32 @b()
+; CHECK-NEXT: br label %latch
+
+latch:
+ %v3 = load i1, i1* %ptr3
+ br i1 %v3, label %loop_begin, label %loop_exit
+; CHECK: latch:
+; CHECK-NEXT: %[[V3:.*]] = load i1, i1* %ptr3
+; CHECK-NEXT: br i1 %[[V3]], label %loop_begin, label %loop_exit.split
+
+loop_exit:
+ ret i32 0
+; CHECK: loop_exit.split:
+; CHECK-NEXT: br label %loop_exit
+;
+; CHECK: loop_exit:
+; CHECK-NEXT: ret
+}
+
+; Non-trivial unswitching of a switch.
+define i32 @test28(i1* %ptr, i32 %cond) {
+; CHECK-LABEL: @test28(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: switch i32 %cond, label %[[ENTRY_SPLIT_LATCH:.*]] [
+; CHECK-NEXT: i32 0, label %[[ENTRY_SPLIT_A:.*]]
+; CHECK-NEXT: i32 1, label %[[ENTRY_SPLIT_B:.*]]
+; CHECK-NEXT: i32 2, label %[[ENTRY_SPLIT_C:.*]]
+; CHECK-NEXT: ]
+
+loop_begin:
+ switch i32 %cond, label %latch [
+ i32 0, label %loop_a
+ i32 1, label %loop_b
+ i32 2, label %loop_c
+ ]
+
+loop_a:
+ call i32 @a()
+ br label %latch
+; Unswitched 'a' loop.
+;
+; CHECK: [[ENTRY_SPLIT_A]]:
+; CHECK-NEXT: br label %[[LOOP_BEGIN_A:.*]]
+;
+; CHECK: [[LOOP_BEGIN_A]]:
+; CHECK-NEXT: br label %[[LOOP_A:.*]]
+;
+; CHECK: [[LOOP_A]]:
+; CHECK-NEXT: call i32 @a()
+; CHECK-NEXT: br label %[[LOOP_LATCH_A:.*]]
+;
+; CHECK: [[LOOP_LATCH_A]]:
+; CHECK-NEXT: %[[V_A:.*]] = load i1, i1* %ptr
+; CHECK: br i1 %[[V_A]], label %[[LOOP_BEGIN_A]], label %[[LOOP_EXIT_A:.*]]
+;
+; CHECK: [[LOOP_EXIT_A]]:
+; CHECK-NEXT: br label %loop_exit
+
+loop_b:
+ call i32 @b()
+ br label %latch
+; Unswitched 'b' loop.
+;
+; CHECK: [[ENTRY_SPLIT_B]]:
+; CHECK-NEXT: br label %[[LOOP_BEGIN_B:.*]]
+;
+; CHECK: [[LOOP_BEGIN_B]]:
+; CHECK-NEXT: br label %[[LOOP_B:.*]]
+;
+; CHECK: [[LOOP_B]]:
+; CHECK-NEXT: call i32 @b()
+; CHECK-NEXT: br label %[[LOOP_LATCH_B:.*]]
+;
+; CHECK: [[LOOP_LATCH_B]]:
+; CHECK-NEXT: %[[V_B:.*]] = load i1, i1* %ptr
+; CHECK: br i1 %[[V_B]], label %[[LOOP_BEGIN_B]], label %[[LOOP_EXIT_B:.*]]
+;
+; CHECK: [[LOOP_EXIT_B]]:
+; CHECK-NEXT: br label %loop_exit
+
+loop_c:
+ call i32 @c()
+ br label %latch
+; Unswitched 'c' loop.
+;
+; CHECK: [[ENTRY_SPLIT_C]]:
+; CHECK-NEXT: br label %[[LOOP_BEGIN_C:.*]]
+;
+; CHECK: [[LOOP_BEGIN_C]]:
+; CHECK-NEXT: br label %[[LOOP_C:.*]]
+;
+; CHECK: [[LOOP_C]]:
+; CHECK-NEXT: call i32 @c()
+; CHECK-NEXT: br label %[[LOOP_LATCH_C:.*]]
+;
+; CHECK: [[LOOP_LATCH_C]]:
+; CHECK-NEXT: %[[V_C:.*]] = load i1, i1* %ptr
+; CHECK: br i1 %[[V_C]], label %[[LOOP_BEGIN_C]], label %[[LOOP_EXIT_C:.*]]
+;
+; CHECK: [[LOOP_EXIT_C]]:
+; CHECK-NEXT: br label %loop_exit
+
+latch:
+ %v = load i1, i1* %ptr
+ br i1 %v, label %loop_begin, label %loop_exit
+; Unswitched the 'latch' only loop.
+;
+; CHECK: [[ENTRY_SPLIT_LATCH]]:
+; CHECK-NEXT: br label %[[LOOP_BEGIN_LATCH:.*]]
+;
+; CHECK: [[LOOP_BEGIN_LATCH]]:
+; CHECK-NEXT: br label %[[LOOP_LATCH_LATCH:.*]]
+;
+; CHECK: [[LOOP_LATCH_LATCH]]:
+; CHECK-NEXT: %[[V_LATCH:.*]] = load i1, i1* %ptr
+; CHECK: br i1 %[[V_LATCH]], label %[[LOOP_BEGIN_LATCH]], label %[[LOOP_EXIT_LATCH:.*]]
+;
+; CHECK: [[LOOP_EXIT_LATCH]]:
+; CHECK-NEXT: br label %loop_exit
+
+loop_exit:
+ ret i32 0
+; CHECK: loop_exit:
+; CHECK-NEXT: ret i32 0
+}
+
+; A test case designed to exercise unusual properties of switches: they
+; can introduce multiple edges to successors. These need lots of special case
+; handling as they get collapsed in many cases (domtree, the unswitch itself)
+; but not in all cases (the PHI node operands).
+define i32 @test29(i32 %arg) {
+; CHECK-LABEL: @test29(
+entry:
+ br label %header
+; CHECK-NEXT: entry:
+; CHECK-NEXT: switch i32 %arg, label %[[ENTRY_SPLIT_C:.*]] [
+; CHECK-NEXT: i32 0, label %[[ENTRY_SPLIT_A:.*]]
+; CHECK-NEXT: i32 1, label %[[ENTRY_SPLIT_A]]
+; CHECK-NEXT: i32 2, label %[[ENTRY_SPLIT_B:.*]]
+; CHECK-NEXT: i32 3, label %[[ENTRY_SPLIT_C]]
+; CHECK-NEXT: ]
+
+header:
+ %tmp = call i32 @d()
+ %cmp1 = icmp eq i32 %tmp, 0
+ ; We set up a chain through all the successors of the switch that doesn't
+ ; involve the switch so that we can have interesting PHI nodes in them.
+ br i1 %cmp1, label %body.a, label %dispatch
+
+dispatch:
+ ; Switch with multiple successors. We arrange the last successor to be the
+ ; default to make the test case easier to read. This has a duplicate edge
+ ; both to the default destination (which is completely superfluous but
+ ; technically valid IR) and to a regular successor.
+ switch i32 %arg, label %body.c [
+ i32 0, label %body.a
+ i32 1, label %body.a
+ i32 2, label %body.b
+ i32 3, label %body.c
+ ]
+
+body.a:
+ %tmp.a.phi = phi i32 [ 0, %header ], [ %tmp, %dispatch ], [ %tmp, %dispatch ]
+ %tmp.a = call i32 @a()
+ %tmp.a.sum = add i32 %tmp.a.phi, %tmp.a
+ br label %body.b
+; Unswitched 'a' loop.
+;
+; CHECK: [[ENTRY_SPLIT_A]]:
+; CHECK-NEXT: br label %[[HEADER_A:.*]]
+;
+; CHECK: [[HEADER_A]]:
+; CHECK-NEXT: %[[TMP_A:.*]] = call i32 @d()
+; CHECK-NEXT: %[[CMP1_A:.*]] = icmp eq i32 %[[TMP_A]], 0
+; CHECK-NEXT: br i1 %[[CMP1_A]], label %[[BODY_A_A:.*]], label %[[DISPATCH_A:.*]]
+;
+; CHECK: [[DISPATCH_A]]:
+; CHECK-NEXT: br label %[[BODY_A_A]]
+;
+; CHECK: [[BODY_A_A]]:
+; CHECK-NEXT: %[[TMP_A_PHI_A:.*]] = phi i32 [ 0, %[[HEADER_A]] ], [ %[[TMP_A]], %[[DISPATCH_A]] ]
+; CHECK-NEXT: %[[TMP_A_A:.*]] = call i32 @a()
+; CHECK-NEXT: %[[TMP_A_SUM_A:.*]] = add i32 %[[TMP_A_PHI_A]], %[[TMP_A_A]]
+; CHECK-NEXT: br label %[[BODY_B_A:.*]]
+;
+; CHECK: [[BODY_B_A]]:
+; CHECK-NEXT: %[[TMP_B_PHI_A:.*]] = phi i32 [ %[[TMP_A_SUM_A]], %[[BODY_A_A]] ]
+; CHECK-NEXT: %[[TMP_B_A:.*]] = call i32 @b()
+; CHECK-NEXT: %[[TMP_B_SUM_A:.*]] = add i32 %[[TMP_B_PHI_A]], %[[TMP_B_A]]
+; CHECK-NEXT: br label %[[BODY_C_A:.*]]
+;
+; CHECK: [[BODY_C_A]]:
+; CHECK-NEXT: %[[TMP_C_PHI_A:.*]] = phi i32 [ %[[TMP_B_SUM_A]], %[[BODY_B_A]] ]
+; CHECK-NEXT: %[[TMP_C_A:.*]] = call i32 @c()
+; CHECK-NEXT: %[[TMP_C_SUM_A:.*]] = add i32 %[[TMP_C_PHI_A]], %[[TMP_C_A]]
+; CHECK-NEXT: br label %[[LATCH_A:.*]]
+;
+; CHECK: [[LATCH_A]]:
+; CHECK-NEXT: %[[CMP2_A:.*]] = icmp slt i32 %[[TMP_C_SUM_A]], 42
+; CHECK: br i1 %[[CMP2_A]], label %[[HEADER_A]], label %[[LOOP_EXIT_A:.*]]
+;
+; CHECK: [[LOOP_EXIT_A]]:
+; CHECK-NEXT: %[[LCSSA_A:.*]] = phi i32 [ %[[TMP_C_SUM_A]], %[[LATCH_A]] ]
+; CHECK-NEXT: br label %exit
+
+body.b:
+ %tmp.b.phi = phi i32 [ %tmp, %dispatch ], [ %tmp.a.sum, %body.a ]
+ %tmp.b = call i32 @b()
+ %tmp.b.sum = add i32 %tmp.b.phi, %tmp.b
+ br label %body.c
+; Unswitched 'b' loop.
+;
+; CHECK: [[ENTRY_SPLIT_B]]:
+; CHECK-NEXT: br label %[[HEADER_B:.*]]
+;
+; CHECK: [[HEADER_B]]:
+; CHECK-NEXT: %[[TMP_B:.*]] = call i32 @d()
+; CHECK-NEXT: %[[CMP1_B:.*]] = icmp eq i32 %[[TMP_B]], 0
+; CHECK-NEXT: br i1 %[[CMP1_B]], label %[[BODY_A_B:.*]], label %[[DISPATCH_B:.*]]
+;
+; CHECK: [[DISPATCH_B]]:
+; CHECK-NEXT: br label %[[BODY_B_B:.*]]
+;
+; CHECK: [[BODY_A_B]]:
+; CHECK-NEXT: %[[TMP_A_PHI_B:.*]] = phi i32 [ 0, %[[HEADER_B]] ]
+; CHECK-NEXT: %[[TMP_A_B:.*]] = call i32 @a()
+; CHECK-NEXT: %[[TMP_A_SUM_B:.*]] = add i32 %[[TMP_A_PHI_B]], %[[TMP_A_B]]
+; CHECK-NEXT: br label %[[BODY_B_B:.*]]
+;
+; CHECK: [[BODY_B_B]]:
+; CHECK-NEXT: %[[TMP_B_PHI_B:.*]] = phi i32 [ %[[TMP_B]], %[[DISPATCH_B]] ], [ %[[TMP_A_SUM_B]], %[[BODY_A_B]] ]
+; CHECK-NEXT: %[[TMP_B_B:.*]] = call i32 @b()
+; CHECK-NEXT: %[[TMP_B_SUM_B:.*]] = add i32 %[[TMP_B_PHI_B]], %[[TMP_B_B]]
+; CHECK-NEXT: br label %[[BODY_C_B:.*]]
+;
+; CHECK: [[BODY_C_B]]:
+; CHECK-NEXT: %[[TMP_C_PHI_B:.*]] = phi i32 [ %[[TMP_B_SUM_B]], %[[BODY_B_B]] ]
+; CHECK-NEXT: %[[TMP_C_B:.*]] = call i32 @c()
+; CHECK-NEXT: %[[TMP_C_SUM_B:.*]] = add i32 %[[TMP_C_PHI_B]], %[[TMP_C_B]]
+; CHECK-NEXT: br label %[[LATCH_B:.*]]
+;
+; CHECK: [[LATCH_B]]:
+; CHECK-NEXT: %[[CMP2_B:.*]] = icmp slt i32 %[[TMP_C_SUM_B]], 42
+; CHECK: br i1 %[[CMP2_B]], label %[[HEADER_B]], label %[[LOOP_EXIT_B:.*]]
+;
+; CHECK: [[LOOP_EXIT_B]]:
+; CHECK-NEXT: %[[LCSSA_B:.*]] = phi i32 [ %[[TMP_C_SUM_B]], %[[LATCH_B]] ]
+; CHECK-NEXT: br label %[[EXIT_SPLIT:.*]]
+
+body.c:
+ %tmp.c.phi = phi i32 [ %tmp, %dispatch ], [ %tmp, %dispatch ], [ %tmp.b.sum, %body.b ]
+ %tmp.c = call i32 @c()
+ %tmp.c.sum = add i32 %tmp.c.phi, %tmp.c
+ br label %latch
+; Unswitched 'c' loop.
+;
+; CHECK: [[ENTRY_SPLIT_C]]:
+; CHECK-NEXT: br label %[[HEADER_C:.*]]
+;
+; CHECK: [[HEADER_C]]:
+; CHECK-NEXT: %[[TMP_C:.*]] = call i32 @d()
+; CHECK-NEXT: %[[CMP1_C:.*]] = icmp eq i32 %[[TMP_C]], 0
+; CHECK-NEXT: br i1 %[[CMP1_C]], label %[[BODY_A_C:.*]], label %[[DISPATCH_C:.*]]
+;
+; CHECK: [[DISPATCH_C]]:
+; CHECK-NEXT: br label %[[BODY_C_C:.*]]
+;
+; CHECK: [[BODY_A_C]]:
+; CHECK-NEXT: %[[TMP_A_PHI_C:.*]] = phi i32 [ 0, %[[HEADER_C]] ]
+; CHECK-NEXT: %[[TMP_A_C:.*]] = call i32 @a()
+; CHECK-NEXT: %[[TMP_A_SUM_C:.*]] = add i32 %[[TMP_A_PHI_C]], %[[TMP_A_C]]
+; CHECK-NEXT: br label %[[BODY_B_C:.*]]
+;
+; CHECK: [[BODY_B_C]]:
+; CHECK-NEXT: %[[TMP_B_PHI_C:.*]] = phi i32 [ %[[TMP_A_SUM_C]], %[[BODY_A_C]] ]
+; CHECK-NEXT: %[[TMP_B_C:.*]] = call i32 @b()
+; CHECK-NEXT: %[[TMP_B_SUM_C:.*]] = add i32 %[[TMP_B_PHI_C]], %[[TMP_B_C]]
+; CHECK-NEXT: br label %[[BODY_C_C:.*]]
+;
+; CHECK: [[BODY_C_C]]:
+; CHECK-NEXT: %[[TMP_C_PHI_C:.*]] = phi i32 [ %[[TMP_C]], %[[DISPATCH_C]] ], [ %[[TMP_B_SUM_C]], %[[BODY_B_C]] ]
+; CHECK-NEXT: %[[TMP_C_C:.*]] = call i32 @c()
+; CHECK-NEXT: %[[TMP_C_SUM_C:.*]] = add i32 %[[TMP_C_PHI_C]], %[[TMP_C_C]]
+; CHECK-NEXT: br label %[[LATCH_C:.*]]
+;
+; CHECK: [[LATCH_C]]:
+; CHECK-NEXT: %[[CMP2_C:.*]] = icmp slt i32 %[[TMP_C_SUM_C]], 42
+; CHECK: br i1 %[[CMP2_C]], label %[[HEADER_C]], label %[[LOOP_EXIT_C:.*]]
+;
+; CHECK: [[LOOP_EXIT_C]]:
+; CHECK-NEXT: %[[LCSSA_C:.*]] = phi i32 [ %[[TMP_C_SUM_C]], %[[LATCH_C]] ]
+; CHECK-NEXT: br label %[[EXIT_SPLIT]]
+
+latch:
+ %cmp2 = icmp slt i32 %tmp.c.sum, 42
+ br i1 %cmp2, label %header, label %exit
+
+exit:
+ %lcssa.phi = phi i32 [ %tmp.c.sum, %latch ]
+ ret i32 %lcssa.phi
+; CHECK: [[EXIT_SPLIT]]:
+; CHECK-NEXT: %[[EXIT_PHI1:.*]] = phi i32 [ %[[LCSSA_C]], %[[LOOP_EXIT_C]] ], [ %[[LCSSA_B]], %[[LOOP_EXIT_B]] ]
+; CHECK-NEXT: br label %exit
+
+; CHECK: exit:
+; CHECK-NEXT: %[[EXIT_PHI2:.*]] = phi i32 [ %[[EXIT_PHI1]], %[[EXIT_SPLIT]] ], [ %[[LCSSA_A]], %[[LOOP_EXIT_A]] ]
+; CHECK-NEXT: ret i32 %[[EXIT_PHI2]]
+}
+
+; Similar to @test29 but designed to have one of the duplicate edges be
+; a loop exit edge as those can in some cases be special. Among other things,
+; this includes an LCSSA phi with multiple entries despite being a dedicated
+; exit block.
+define i32 @test30(i32 %arg) {
+; CHECK-LABEL: define i32 @test30(
+entry:
+ br label %header
+; CHECK-NEXT: entry:
+; CHECK-NEXT: switch i32 %arg, label %[[ENTRY_SPLIT_EXIT:.*]] [
+; CHECK-NEXT: i32 -1, label %[[ENTRY_SPLIT_EXIT]]
+; CHECK-NEXT: i32 0, label %[[ENTRY_SPLIT_A:.*]]
+; CHECK-NEXT: i32 1, label %[[ENTRY_SPLIT_B:.*]]
+; CHECK-NEXT: i32 2, label %[[ENTRY_SPLIT_B]]
+; CHECK-NEXT: ]
+
+header:
+ %tmp = call i32 @d()
+ %cmp1 = icmp eq i32 %tmp, 0
+ br i1 %cmp1, label %body.a, label %dispatch
+
+dispatch:
+ switch i32 %arg, label %loop.exit1 [
+ i32 -1, label %loop.exit1
+ i32 0, label %body.a
+ i32 1, label %body.b
+ i32 2, label %body.b
+ ]
+
+body.a:
+ %tmp.a.phi = phi i32 [ 0, %header ], [ %tmp, %dispatch ]
+ %tmp.a = call i32 @a()
+ %tmp.a.sum = add i32 %tmp.a.phi, %tmp.a
+ br label %body.b
+; Unswitched 'a' loop.
+;
+; CHECK: [[ENTRY_SPLIT_A]]:
+; CHECK-NEXT: br label %[[HEADER_A:.*]]
+;
+; CHECK: [[HEADER_A]]:
+; CHECK-NEXT: %[[TMP_A:.*]] = call i32 @d()
+; CHECK-NEXT: %[[CMP1_A:.*]] = icmp eq i32 %[[TMP_A]], 0
+; CHECK-NEXT: br i1 %[[CMP1_A]], label %[[BODY_A_A:.*]], label %[[DISPATCH_A:.*]]
+;
+; CHECK: [[DISPATCH_A]]:
+; CHECK-NEXT: br label %[[BODY_A_A]]
+;
+; CHECK: [[BODY_A_A]]:
+; CHECK-NEXT: %[[TMP_A_PHI_A:.*]] = phi i32 [ 0, %[[HEADER_A]] ], [ %[[TMP_A]], %[[DISPATCH_A]] ]
+; CHECK-NEXT: %[[TMP_A_A:.*]] = call i32 @a()
+; CHECK-NEXT: %[[TMP_A_SUM_A:.*]] = add i32 %[[TMP_A_PHI_A]], %[[TMP_A_A]]
+; CHECK-NEXT: br label %[[BODY_B_A:.*]]
+;
+; CHECK: [[BODY_B_A]]:
+; CHECK-NEXT: %[[TMP_B_PHI_A:.*]] = phi i32 [ %[[TMP_A_SUM_A]], %[[BODY_A_A]] ]
+; CHECK-NEXT: %[[TMP_B_A:.*]] = call i32 @b()
+; CHECK-NEXT: %[[TMP_B_SUM_A:.*]] = add i32 %[[TMP_B_PHI_A]], %[[TMP_B_A]]
+; CHECK-NEXT: br label %[[LATCH_A:.*]]
+;
+; CHECK: [[LATCH_A]]:
+; CHECK-NEXT: %[[CMP2_A:.*]] = icmp slt i32 %[[TMP_B_SUM_A]], 42
+; CHECK: br i1 %[[CMP2_A]], label %[[HEADER_A]], label %[[LOOP_EXIT_A:.*]]
+;
+; CHECK: [[LOOP_EXIT_A]]:
+; CHECK-NEXT: %[[LCSSA_A:.*]] = phi i32 [ %[[TMP_B_SUM_A]], %[[LATCH_A]] ]
+; CHECK-NEXT: br label %loop.exit2
+
+body.b:
+ %tmp.b.phi = phi i32 [ %tmp, %dispatch ], [ %tmp, %dispatch ], [ %tmp.a.sum, %body.a ]
+ %tmp.b = call i32 @b()
+ %tmp.b.sum = add i32 %tmp.b.phi, %tmp.b
+ br label %latch
+; Unswitched 'b' loop.
+;
+; CHECK: [[ENTRY_SPLIT_B]]:
+; CHECK-NEXT: br label %[[HEADER_B:.*]]
+;
+; CHECK: [[HEADER_B]]:
+; CHECK-NEXT: %[[TMP_B:.*]] = call i32 @d()
+; CHECK-NEXT: %[[CMP1_B:.*]] = icmp eq i32 %[[TMP_B]], 0
+; CHECK-NEXT: br i1 %[[CMP1_B]], label %[[BODY_A_B:.*]], label %[[DISPATCH_B:.*]]
+;
+; CHECK: [[DISPATCH_B]]:
+; CHECK-NEXT: br label %[[BODY_B_B]]
+;
+; CHECK: [[BODY_A_B]]:
+; CHECK-NEXT: %[[TMP_A_PHI_B:.*]] = phi i32 [ 0, %[[HEADER_B]] ]
+; CHECK-NEXT: %[[TMP_A_B:.*]] = call i32 @a()
+; CHECK-NEXT: %[[TMP_A_SUM_B:.*]] = add i32 %[[TMP_A_PHI_B]], %[[TMP_A_B]]
+; CHECK-NEXT: br label %[[BODY_B_B:.*]]
+;
+; CHECK: [[BODY_B_B]]:
+; CHECK-NEXT: %[[TMP_B_PHI_B:.*]] = phi i32 [ %[[TMP_B]], %[[DISPATCH_B]] ], [ %[[TMP_A_SUM_B]], %[[BODY_A_B]] ]
+; CHECK-NEXT: %[[TMP_B_B:.*]] = call i32 @b()
+; CHECK-NEXT: %[[TMP_B_SUM_B:.*]] = add i32 %[[TMP_B_PHI_B]], %[[TMP_B_B]]
+; CHECK-NEXT: br label %[[LATCH_B:.*]]
+;
+; CHECK: [[LATCH_B]]:
+; CHECK-NEXT: %[[CMP2_B:.*]] = icmp slt i32 %[[TMP_B_SUM_B]], 42
+; CHECK: br i1 %[[CMP2_B]], label %[[HEADER_B]], label %[[LOOP_EXIT_B:.*]]
+;
+; CHECK: [[LOOP_EXIT_B]]:
+; CHECK-NEXT: %[[LCSSA_B:.*]] = phi i32 [ %[[TMP_B_SUM_B]], %[[LATCH_B]] ]
+; CHECK-NEXT: br label %[[LOOP_EXIT2_SPLIT:.*]]
+
+latch:
+ %cmp2 = icmp slt i32 %tmp.b.sum, 42
+ br i1 %cmp2, label %header, label %loop.exit2
+
+loop.exit1:
+ %l1.phi = phi i32 [ %tmp, %dispatch ], [ %tmp, %dispatch ]
+ br label %exit
+; Unswitched 'exit' loop.
+;
+; CHECK: [[ENTRY_SPLIT_EXIT]]:
+; CHECK-NEXT: br label %[[HEADER_EXIT:.*]]
+;
+; CHECK: [[HEADER_EXIT]]:
+; CHECK-NEXT: %[[TMP_EXIT:.*]] = call i32 @d()
+; CHECK-NEXT: %[[CMP1_EXIT:.*]] = icmp eq i32 %[[TMP_EXIT]], 0
+; CHECK-NEXT: br i1 %[[CMP1_EXIT]], label %[[BODY_A_EXIT:.*]], label %[[DISPATCH_EXIT:.*]]
+;
+; CHECK: [[DISPATCH_EXIT]]:
+; CHECK-NEXT: %[[TMP_LCSSA:.*]] = phi i32 [ %[[TMP_EXIT]], %[[HEADER_EXIT]] ]
+; CHECK-NEXT: br label %loop.exit1
+;
+; CHECK: [[BODY_A_EXIT]]:
+; CHECK-NEXT: %[[TMP_A_PHI_EXIT:.*]] = phi i32 [ 0, %[[HEADER_EXIT]] ]
+; CHECK-NEXT: %[[TMP_A_EXIT:.*]] = call i32 @a()
+; CHECK-NEXT: %[[TMP_A_SUM_EXIT:.*]] = add i32 %[[TMP_A_PHI_EXIT]], %[[TMP_A_EXIT]]
+; CHECK-NEXT: br label %[[BODY_B_EXIT:.*]]
+;
+; CHECK: [[BODY_B_EXIT]]:
+; CHECK-NEXT: %[[TMP_B_PHI_EXIT:.*]] = phi i32 [ %[[TMP_A_SUM_EXIT]], %[[BODY_A_EXIT]] ]
+; CHECK-NEXT: %[[TMP_B_EXIT:.*]] = call i32 @b()
+; CHECK-NEXT: %[[TMP_B_SUM_EXIT:.*]] = add i32 %[[TMP_B_PHI_EXIT]], %[[TMP_B_EXIT]]
+; CHECK-NEXT: br label %[[LATCH_EXIT:.*]]
+;
+; CHECK: [[LATCH_EXIT]]:
+; CHECK-NEXT: %[[CMP2_EXIT:.*]] = icmp slt i32 %[[TMP_B_SUM_EXIT]], 42
+; CHECK: br i1 %[[CMP2_EXIT]], label %[[HEADER_EXIT]], label %[[LOOP_EXIT_EXIT:.*]]
+;
+; CHECK: loop.exit1:
+; CHECK-NEXT: %[[L1_PHI:.*]] = phi i32 [ %[[TMP_LCSSA]], %[[DISPATCH_EXIT]] ]
+; CHECK-NEXT: br label %exit
+;
+; CHECK: [[LOOP_EXIT_EXIT]]:
+; CHECK-NEXT: %[[L2_PHI:.*]] = phi i32 [ %[[TMP_B_SUM_EXIT]], %[[LATCH_EXIT]] ]
+; CHECK-NEXT: br label %[[LOOP_EXIT2_SPLIT]]
+
+loop.exit2:
+ %l2.phi = phi i32 [ %tmp.b.sum, %latch ]
+ br label %exit
+; CHECK: [[LOOP_EXIT2_SPLIT]]:
+; CHECK-NEXT: %[[LOOP_EXIT_PHI1:.*]] = phi i32 [ %[[L2_PHI]], %[[LOOP_EXIT_EXIT]] ], [ %[[LCSSA_B]], %[[LOOP_EXIT_B]] ]
+; CHECK-NEXT: br label %loop.exit2
+;
+; CHECK: loop.exit2:
+; CHECK-NEXT: %[[LOOP_EXIT_PHI2:.*]] = phi i32 [ %[[LOOP_EXIT_PHI1]], %[[LOOP_EXIT2_SPLIT]] ], [ %[[LCSSA_A]], %[[LOOP_EXIT_A]] ]
+; CHECK-NEXT: br label %exit
+
+exit:
+ %l.phi = phi i32 [ %l1.phi, %loop.exit1 ], [ %l2.phi, %loop.exit2 ]
+ ret i32 %l.phi
+; CHECK: exit:
+; CHECK-NEXT: %[[EXIT_PHI:.*]] = phi i32 [ %[[L1_PHI]], %loop.exit1 ], [ %[[LOOP_EXIT_PHI2]], %loop.exit2 ]
+; CHECK-NEXT: ret i32 %[[EXIT_PHI]]
+}
+
+; Unswitch will not actually change the loop nest from:
+; A < B < C
+define void @hoist_inner_loop0() {
+; CHECK-LABEL: define void @hoist_inner_loop0(
+entry:
+ br label %a.header
+; CHECK: entry:
+; CHECK-NEXT: br label %a.header
+
+a.header:
+ br label %b.header
+; CHECK: a.header:
+; CHECK-NEXT: br label %b.header
+
+b.header:
+ %v1 = call i1 @cond()
+ br label %c.header
+; CHECK: b.header:
+; CHECK-NEXT: %v1 = call i1 @cond()
+; CHECK-NEXT: br i1 %v1, label %[[B_HEADER_SPLIT_US:.*]], label %[[B_HEADER_SPLIT:.*]]
+;
+; CHECK: [[B_HEADER_SPLIT_US]]:
+; CHECK-NEXT: br label %[[C_HEADER_US:.*]]
+;
+; CHECK: [[C_HEADER_US]]:
+; CHECK-NEXT: call i32 @c()
+; CHECK-NEXT: br label %[[B_LATCH_SPLIT_US:.*]]
+;
+; CHECK: [[B_LATCH_SPLIT_US]]:
+; CHECK-NEXT: br label %b.latch
+;
+; CHECK: [[B_HEADER_SPLIT]]:
+; CHECK-NEXT: br label %c.header
+
+c.header:
+ call i32 @c()
+ br i1 %v1, label %b.latch, label %c.latch
+; CHECK: c.header:
+; CHECK-NEXT: call i32 @c()
+; CHECK-NEXT: br label %c.latch
+
+c.latch:
+ %v2 = call i1 @cond()
+ br i1 %v2, label %c.header, label %b.latch
+; CHECK: c.latch:
+; CHECK-NEXT: %v2 = call i1 @cond()
+; CHECK-NEXT: br i1 %v2, label %c.header, label %[[B_LATCH_SPLIT:.*]]
+
+b.latch:
+ %v3 = call i1 @cond()
+ br i1 %v3, label %b.header, label %a.latch
+; CHECK: [[B_LATCH_SPLIT]]:
+; CHECK-NEXT: br label %b.latch
+;
+; CHECK: b.latch:
+; CHECK-NEXT: %v3 = call i1 @cond()
+; CHECK-NEXT: br i1 %v3, label %b.header, label %a.latch
+
+a.latch:
+ br label %a.header
+; CHECK: a.latch:
+; CHECK-NEXT: br label %a.header
+
+exit:
+ ret void
+; CHECK: exit:
+; CHECK-NEXT: ret void
+}
+
+; Unswitch will transform the loop nest from:
+; A < B < C
+; into
+; A < (B, C)
+define void @hoist_inner_loop1(i32* %ptr) {
+; CHECK-LABEL: define void @hoist_inner_loop1(
+entry:
+ br label %a.header
+; CHECK: entry:
+; CHECK-NEXT: br label %a.header
+
+a.header:
+ %x.a = load i32, i32* %ptr
+ br label %b.header
+; CHECK: a.header:
+; CHECK-NEXT: %x.a = load i32, i32* %ptr
+; CHECK-NEXT: br label %b.header
+
+b.header:
+ %x.b = load i32, i32* %ptr
+ %v1 = call i1 @cond()
+ br label %c.header
+; CHECK: b.header:
+; CHECK-NEXT: %x.b = load i32, i32* %ptr
+; CHECK-NEXT: %v1 = call i1 @cond()
+; CHECK-NEXT: br i1 %v1, label %[[B_HEADER_SPLIT_US:.*]], label %[[B_HEADER_SPLIT:.*]]
+;
+; CHECK: [[B_HEADER_SPLIT_US]]:
+; CHECK-NEXT: br label %[[C_HEADER_US:.*]]
+;
+; CHECK: [[C_HEADER_US]]:
+; CHECK-NEXT: call i32 @c()
+; CHECK-NEXT: br label %[[B_LATCH_US:.*]]
+;
+; CHECK: [[B_LATCH_US]]:
+; CHECK-NEXT: br label %b.latch
+;
+; CHECK: [[B_HEADER_SPLIT]]:
+; CHECK-NEXT: %[[X_B_LCSSA:.*]] = phi i32 [ %x.b, %b.header ]
+; CHECK-NEXT: br label %c.header
+
+c.header:
+ call i32 @c()
+ br i1 %v1, label %b.latch, label %c.latch
+; CHECK: c.header:
+; CHECK-NEXT: call i32 @c()
+; CHECK-NEXT: br label %c.latch
+
+c.latch:
+ ; Use values from other loops to check LCSSA form.
+ store i32 %x.a, i32* %ptr
+ store i32 %x.b, i32* %ptr
+ %v2 = call i1 @cond()
+ br i1 %v2, label %c.header, label %a.exit.c
+; CHECK: c.latch:
+; CHECK-NEXT: store i32 %x.a, i32* %ptr
+; CHECK-NEXT: store i32 %[[X_B_LCSSA]], i32* %ptr
+; CHECK-NEXT: %v2 = call i1 @cond()
+; CHECK-NEXT: br i1 %v2, label %c.header, label %a.exit.c
+
+b.latch:
+ %v3 = call i1 @cond()
+ br i1 %v3, label %b.header, label %a.exit.b
+; CHECK: b.latch:
+; CHECK-NEXT: %v3 = call i1 @cond()
+; CHECK-NEXT: br i1 %v3, label %b.header, label %a.exit.b
+
+a.exit.c:
+ br label %a.latch
+; CHECK: a.exit.c
+; CHECK-NEXT: br label %a.latch
+
+a.exit.b:
+ br label %a.latch
+; CHECK: a.exit.b:
+; CHECK-NEXT: br label %a.latch
+
+a.latch:
+ br label %a.header
+; CHECK: a.latch:
+; CHECK-NEXT: br label %a.header
+
+exit:
+ ret void
+; CHECK: exit:
+; CHECK-NEXT: ret void
+}
+
+; Unswitch will transform the loop nest from:
+; A < B < C
+; into
+; (A < B), C
+define void @hoist_inner_loop2(i32* %ptr) {
+; CHECK-LABEL: define void @hoist_inner_loop2(
+entry:
+ br label %a.header
+; CHECK: entry:
+; CHECK-NEXT: br label %a.header
+
+a.header:
+ %x.a = load i32, i32* %ptr
+ br label %b.header
+; CHECK: a.header:
+; CHECK-NEXT: %x.a = load i32, i32* %ptr
+; CHECK-NEXT: br label %b.header
+
+b.header:
+ %x.b = load i32, i32* %ptr
+ %v1 = call i1 @cond()
+ br label %c.header
+; CHECK: b.header:
+; CHECK-NEXT: %x.b = load i32, i32* %ptr
+; CHECK-NEXT: %v1 = call i1 @cond()
+; CHECK-NEXT: br i1 %v1, label %[[B_HEADER_SPLIT_US:.*]], label %[[B_HEADER_SPLIT:.*]]
+;
+; CHECK: [[B_HEADER_SPLIT_US]]:
+; CHECK-NEXT: br label %[[C_HEADER_US:.*]]
+;
+; CHECK: [[C_HEADER_US]]:
+; CHECK-NEXT: call i32 @c()
+; CHECK-NEXT: br label %[[B_LATCH_US:.*]]
+;
+; CHECK: [[B_LATCH_US]]:
+; CHECK-NEXT: br label %b.latch
+;
+; CHECK: [[B_HEADER_SPLIT]]:
+; CHECK-NEXT: %[[X_A_LCSSA:.*]] = phi i32 [ %x.a, %b.header ]
+; CHECK-NEXT: %[[X_B_LCSSA:.*]] = phi i32 [ %x.b, %b.header ]
+; CHECK-NEXT: br label %c.header
+
+c.header:
+ call i32 @c()
+ br i1 %v1, label %b.latch, label %c.latch
+; CHECK: c.header:
+; CHECK-NEXT: call i32 @c()
+; CHECK-NEXT: br label %c.latch
+
+c.latch:
+ ; Use values from other loops to check LCSSA form.
+ store i32 %x.a, i32* %ptr
+ store i32 %x.b, i32* %ptr
+ %v2 = call i1 @cond()
+ br i1 %v2, label %c.header, label %exit
+; CHECK: c.latch:
+; CHECK-NEXT: store i32 %[[X_A_LCSSA]], i32* %ptr
+; CHECK-NEXT: store i32 %[[X_B_LCSSA]], i32* %ptr
+; CHECK-NEXT: %v2 = call i1 @cond()
+; CHECK-NEXT: br i1 %v2, label %c.header, label %exit
+
+b.latch:
+ %v3 = call i1 @cond()
+ br i1 %v3, label %b.header, label %a.latch
+; CHECK: b.latch:
+; CHECK-NEXT: %v3 = call i1 @cond()
+; CHECK-NEXT: br i1 %v3, label %b.header, label %a.latch
+
+a.latch:
+ br label %a.header
+; CHECK: a.latch:
+; CHECK-NEXT: br label %a.header
+
+exit:
+ ret void
+; CHECK: exit:
+; CHECK-NEXT: ret void
+}
+
+; Same as @hoist_inner_loop2 but with a nested loop inside the hoisted loop.
+; Unswitch will transform the loop nest from:
+; A < B < C < D
+; into
+; (A < B), (C < D)
+define void @hoist_inner_loop3(i32* %ptr) {
+; CHECK-LABEL: define void @hoist_inner_loop3(
+entry:
+ br label %a.header
+; CHECK: entry:
+; CHECK-NEXT: br label %a.header
+
+a.header:
+ %x.a = load i32, i32* %ptr
+ br label %b.header
+; CHECK: a.header:
+; CHECK-NEXT: %x.a = load i32, i32* %ptr
+; CHECK-NEXT: br label %b.header
+
+b.header:
+ %x.b = load i32, i32* %ptr
+ %v1 = call i1 @cond()
+ br label %c.header
+; CHECK: b.header:
+; CHECK-NEXT: %x.b = load i32, i32* %ptr
+; CHECK-NEXT: %v1 = call i1 @cond()
+; CHECK-NEXT: br i1 %v1, label %[[B_HEADER_SPLIT_US:.*]], label %[[B_HEADER_SPLIT:.*]]
+;
+; CHECK: [[B_HEADER_SPLIT_US]]:
+; CHECK-NEXT: br label %[[C_HEADER_US:.*]]
+;
+; CHECK: [[C_HEADER_US]]:
+; CHECK-NEXT: call i32 @c()
+; CHECK-NEXT: br label %[[B_LATCH_US:.*]]
+;
+; CHECK: [[B_LATCH_US]]:
+; CHECK-NEXT: br label %b.latch
+;
+; CHECK: [[B_HEADER_SPLIT]]:
+; CHECK-NEXT: %[[X_A_LCSSA:.*]] = phi i32 [ %x.a, %b.header ]
+; CHECK-NEXT: %[[X_B_LCSSA:.*]] = phi i32 [ %x.b, %b.header ]
+; CHECK-NEXT: br label %c.header
+
+c.header:
+ call i32 @c()
+ br i1 %v1, label %b.latch, label %c.body
+; CHECK: c.header:
+; CHECK-NEXT: call i32 @c()
+; CHECK-NEXT: br label %c.body
+
+c.body:
+ %x.c = load i32, i32* %ptr
+ br label %d.header
+; CHECK: c.body:
+; CHECK-NEXT: %x.c = load i32, i32* %ptr
+; CHECK-NEXT: br label %d.header
+
+d.header:
+ ; Use values from other loops to check LCSSA form.
+ store i32 %x.a, i32* %ptr
+ store i32 %x.b, i32* %ptr
+ store i32 %x.c, i32* %ptr
+ %v2 = call i1 @cond()
+ br i1 %v2, label %d.header, label %c.latch
+; CHECK: d.header:
+; CHECK-NEXT: store i32 %[[X_A_LCSSA]], i32* %ptr
+; CHECK-NEXT: store i32 %[[X_B_LCSSA]], i32* %ptr
+; CHECK-NEXT: store i32 %x.c, i32* %ptr
+; CHECK-NEXT: %v2 = call i1 @cond()
+; CHECK-NEXT: br i1 %v2, label %d.header, label %c.latch
+
+c.latch:
+ %v3 = call i1 @cond()
+ br i1 %v3, label %c.header, label %exit
+; CHECK: c.latch:
+; CHECK-NEXT: %v3 = call i1 @cond()
+; CHECK-NEXT: br i1 %v3, label %c.header, label %exit
+
+b.latch:
+ %v4 = call i1 @cond()
+ br i1 %v4, label %b.header, label %a.latch
+; CHECK: b.latch:
+; CHECK-NEXT: %v4 = call i1 @cond()
+; CHECK-NEXT: br i1 %v4, label %b.header, label %a.latch
+
+a.latch:
+ br label %a.header
+; CHECK: a.latch:
+; CHECK-NEXT: br label %a.header
+
+exit:
+ ret void
+; CHECK: exit:
+; CHECK-NEXT: ret void
+}
+
+; This test is designed to exercise checking multiple remaining exits from the
+; loop being unswitched.
+; Unswitch will transform the loop nest from:
+; A < B < C < D
+; into
+; A < B < (C, D)
+define void @hoist_inner_loop4() {
+; CHECK-LABEL: define void @hoist_inner_loop4(
+entry:
+ br label %a.header
+; CHECK: entry:
+; CHECK-NEXT: br label %a.header
+
+a.header:
+ br label %b.header
+; CHECK: a.header:
+; CHECK-NEXT: br label %b.header
+
+b.header:
+ br label %c.header
+; CHECK: b.header:
+; CHECK-NEXT: br label %c.header
+
+c.header:
+ %v1 = call i1 @cond()
+ br label %d.header
+; CHECK: c.header:
+; CHECK-NEXT: %v1 = call i1 @cond()
+; CHECK-NEXT: br i1 %v1, label %[[C_HEADER_SPLIT_US:.*]], label %[[C_HEADER_SPLIT:.*]]
+;
+; CHECK: [[C_HEADER_SPLIT_US]]:
+; CHECK-NEXT: br label %[[D_HEADER_US:.*]]
+;
+; CHECK: [[D_HEADER_US]]:
+; CHECK-NEXT: call i32 @d()
+; CHECK-NEXT: br label %[[C_LATCH_US:.*]]
+;
+; CHECK: [[C_LATCH_US]]:
+; CHECK-NEXT: br label %c.latch
+;
+; CHECK: [[C_HEADER_SPLIT]]:
+; CHECK-NEXT: br label %d.header
+
+d.header:
+ call i32 @d()
+ br i1 %v1, label %c.latch, label %d.exiting1
+; CHECK: d.header:
+; CHECK-NEXT: call i32 @d()
+; CHECK-NEXT: br label %d.exiting1
+
+d.exiting1:
+ %v2 = call i1 @cond()
+ br i1 %v2, label %d.exiting2, label %a.latch
+; CHECK: d.exiting1:
+; CHECK-NEXT: %v2 = call i1 @cond()
+; CHECK-NEXT: br i1 %v2, label %d.exiting2, label %a.latch
+
+d.exiting2:
+ %v3 = call i1 @cond()
+ br i1 %v3, label %d.exiting3, label %loopexit.d
+; CHECK: d.exiting2:
+; CHECK-NEXT: %v3 = call i1 @cond()
+; CHECK-NEXT: br i1 %v3, label %d.exiting3, label %loopexit.d
+
+d.exiting3:
+ %v4 = call i1 @cond()
+ br i1 %v4, label %d.latch, label %b.latch
+; CHECK: d.exiting3:
+; CHECK-NEXT: %v4 = call i1 @cond()
+; CHECK-NEXT: br i1 %v4, label %d.latch, label %b.latch
+
+d.latch:
+ br label %d.header
+; CHECK: d.latch:
+; CHECK-NEXT: br label %d.header
+
+c.latch:
+ %v5 = call i1 @cond()
+ br i1 %v5, label %c.header, label %loopexit.c
+; CHECK: c.latch:
+; CHECK-NEXT: %v5 = call i1 @cond()
+; CHECK-NEXT: br i1 %v5, label %c.header, label %loopexit.c
+
+b.latch:
+ br label %b.header
+; CHECK: b.latch:
+; CHECK-NEXT: br label %b.header
+
+a.latch:
+ br label %a.header
+; CHECK: a.latch:
+; CHECK-NEXT: br label %a.header
+
+loopexit.d:
+ br label %exit
+; CHECK: loopexit.d:
+; CHECK-NEXT: br label %exit
+
+loopexit.c:
+ br label %exit
+; CHECK: loopexit.c:
+; CHECK-NEXT: br label %exit
+
+exit:
+ ret void
+; CHECK: exit:
+; CHECK-NEXT: ret void
+}
+
+; Unswitch will transform the loop nest from:
+; A < B < C < D
+; into
+; A < ((B < C), D)
+define void @hoist_inner_loop5(i32* %ptr) {
+; CHECK-LABEL: define void @hoist_inner_loop5(
+entry:
+ br label %a.header
+; CHECK: entry:
+; CHECK-NEXT: br label %a.header
+
+a.header:
+ %x.a = load i32, i32* %ptr
+ br label %b.header
+; CHECK: a.header:
+; CHECK-NEXT: %x.a = load i32, i32* %ptr
+; CHECK-NEXT: br label %b.header
+
+b.header:
+ %x.b = load i32, i32* %ptr
+ br label %c.header
+; CHECK: b.header:
+; CHECK-NEXT: %x.b = load i32, i32* %ptr
+; CHECK-NEXT: br label %c.header
+
+c.header:
+ %x.c = load i32, i32* %ptr
+ %v1 = call i1 @cond()
+ br label %d.header
+; CHECK: c.header:
+; CHECK-NEXT: %x.c = load i32, i32* %ptr
+; CHECK-NEXT: %v1 = call i1 @cond()
+; CHECK-NEXT: br i1 %v1, label %[[C_HEADER_SPLIT_US:.*]], label %[[C_HEADER_SPLIT:.*]]
+;
+; CHECK: [[C_HEADER_SPLIT_US]]:
+; CHECK-NEXT: br label %[[D_HEADER_US:.*]]
+;
+; CHECK: [[D_HEADER_US]]:
+; CHECK-NEXT: call i32 @d()
+; CHECK-NEXT: br label %[[C_LATCH_US:.*]]
+;
+; CHECK: [[C_LATCH_US]]:
+; CHECK-NEXT: br label %c.latch
+;
+; CHECK: [[C_HEADER_SPLIT]]:
+; CHECK-NEXT: %[[X_B_LCSSA:.*]] = phi i32 [ %x.b, %c.header ]
+; CHECK-NEXT: %[[X_C_LCSSA:.*]] = phi i32 [ %x.c, %c.header ]
+; CHECK-NEXT: br label %d.header
+
+d.header:
+ call i32 @d()
+ br i1 %v1, label %c.latch, label %d.latch
+; CHECK: d.header:
+; CHECK-NEXT: call i32 @d()
+; CHECK-NEXT: br label %d.latch
+
+d.latch:
+ ; Use values from other loops to check LCSSA form.
+ store i32 %x.a, i32* %ptr
+ store i32 %x.b, i32* %ptr
+ store i32 %x.c, i32* %ptr
+ %v2 = call i1 @cond()
+ br i1 %v2, label %d.header, label %a.latch
+; CHECK: d.latch:
+; CHECK-NEXT: store i32 %x.a, i32* %ptr
+; CHECK-NEXT: store i32 %[[X_B_LCSSA]], i32* %ptr
+; CHECK-NEXT: store i32 %[[X_C_LCSSA]], i32* %ptr
+; CHECK-NEXT: %v2 = call i1 @cond()
+; CHECK-NEXT: br i1 %v2, label %d.header, label %a.latch
+
+c.latch:
+ %v3 = call i1 @cond()
+ br i1 %v3, label %c.header, label %b.latch
+; CHECK: c.latch:
+; CHECK-NEXT: %v3 = call i1 @cond()
+; CHECK-NEXT: br i1 %v3, label %c.header, label %b.latch
+
+b.latch:
+ br label %b.header
+; CHECK: b.latch:
+; CHECK-NEXT: br label %b.header
+
+a.latch:
+ br label %a.header
+; CHECK: a.latch:
+; CHECK-NEXT: br label %a.header
+
+exit:
+ ret void
+; CHECK: exit:
+; CHECK-NEXT: ret void
+}
+
+define void @hoist_inner_loop_switch(i32* %ptr) {
+; CHECK-LABEL: define void @hoist_inner_loop_switch(
+entry:
+ br label %a.header
+; CHECK: entry:
+; CHECK-NEXT: br label %a.header
+
+a.header:
+ %x.a = load i32, i32* %ptr
+ br label %b.header
+; CHECK: a.header:
+; CHECK-NEXT: %x.a = load i32, i32* %ptr
+; CHECK-NEXT: br label %b.header
+
+b.header:
+ %x.b = load i32, i32* %ptr
+ %v1 = call i32 @cond.i32()
+ br label %c.header
+; CHECK: b.header:
+; CHECK-NEXT: %x.b = load i32, i32* %ptr
+; CHECK-NEXT: %v1 = call i32 @cond.i32()
+; CHECK-NEXT: switch i32 %v1, label %[[B_HEADER_SPLIT:.*]] [
+; CHECK-NEXT: i32 1, label %[[B_HEADER_SPLIT_US:.*]]
+; CHECK-NEXT: i32 2, label %[[B_HEADER_SPLIT_US]]
+; CHECK-NEXT: i32 3, label %[[B_HEADER_SPLIT_US]]
+; CHECK-NEXT: ]
+;
+; CHECK: [[B_HEADER_SPLIT_US]]:
+; CHECK-NEXT: br label %[[C_HEADER_US:.*]]
+;
+; CHECK: [[C_HEADER_US]]:
+; CHECK-NEXT: call i32 @c()
+; CHECK-NEXT: br label %[[B_LATCH_US:.*]]
+;
+; CHECK: [[B_LATCH_US]]:
+; CHECK-NEXT: br label %b.latch
+;
+; CHECK: [[B_HEADER_SPLIT]]:
+; CHECK-NEXT: %[[X_A_LCSSA:.*]] = phi i32 [ %x.a, %b.header ]
+; CHECK-NEXT: %[[X_B_LCSSA:.*]] = phi i32 [ %x.b, %b.header ]
+; CHECK-NEXT: br label %c.header
+
+c.header:
+ call i32 @c()
+ switch i32 %v1, label %c.latch [
+ i32 1, label %b.latch
+ i32 2, label %b.latch
+ i32 3, label %b.latch
+ ]
+; CHECK: c.header:
+; CHECK-NEXT: call i32 @c()
+; CHECK-NEXT: br label %c.latch
+
+c.latch:
+ ; Use values from other loops to check LCSSA form.
+ store i32 %x.a, i32* %ptr
+ store i32 %x.b, i32* %ptr
+ %v2 = call i1 @cond()
+ br i1 %v2, label %c.header, label %exit
+; CHECK: c.latch:
+; CHECK-NEXT: store i32 %[[X_A_LCSSA]], i32* %ptr
+; CHECK-NEXT: store i32 %[[X_B_LCSSA]], i32* %ptr
+; CHECK-NEXT: %v2 = call i1 @cond()
+; CHECK-NEXT: br i1 %v2, label %c.header, label %exit
+
+b.latch:
+ %v3 = call i1 @cond()
+ br i1 %v3, label %b.header, label %a.latch
+; CHECK: b.latch:
+; CHECK-NEXT: %v3 = call i1 @cond()
+; CHECK-NEXT: br i1 %v3, label %b.header, label %a.latch
+
+a.latch:
+ br label %a.header
+; CHECK: a.latch:
+; CHECK-NEXT: br label %a.header
+
+exit:
+ ret void
+; CHECK: exit:
+; CHECK-NEXT: ret void
+}
+
+; A devilish pattern. This is a crafty, crafty test case designed to risk
+; creating indirect cycles with trivial and non-trivial unswitching. The inner
+; loop has a switch with a trivial exit edge that can be unswitched, but the
+; rest of the switch cannot be unswitched because its cost is too high.
+; However, the unswitching of the trivial edge creates a new switch in the
+; outer loop. *This* switch isn't trivial, but has a low cost to unswitch. When
+; we unswitch this switch from the outer loop, we will remove it completely and
+; create a clone of the inner loop on one side. This clone will then again be
+; viable for unswitching the inner-most loop. This lets us check that the
+; unswitching doesn't end up cycling infinitely even when the cycle is
+; indirect and due to revisiting a loop after cloning.
+define void @test31(i32 %arg) {
+; CHECK-LABEL: define void @test31(
+entry:
+ br label %outer.header
+; CHECK-NEXT: entry:
+; CHECK-NEXT: switch i32 %arg, label %[[ENTRY_SPLIT:.*]] [
+; CHECK-NEXT: i32 1, label %[[ENTRY_SPLIT_US:.*]]
+; CHECK-NEXT: i32 2, label %[[ENTRY_SPLIT_US]]
+; CHECK-NEXT: ]
+;
+; CHECK: [[ENTRY_SPLIT_US]]:
+; CHECK-NEXT: switch i32 %arg, label %[[ENTRY_SPLIT_US_SPLIT:.*]] [
+; CHECK-NEXT: i32 1, label %[[ENTRY_SPLIT_US_SPLIT_US:.*]]
+; CHECK-NEXT: ]
+
+outer.header:
+ br label %inner.header
+
+inner.header:
+ switch i32 %arg, label %inner.loopexit1 [
+ i32 1, label %inner.body1
+ i32 2, label %inner.body2
+ ]
+
+inner.body1:
+ %a = call i32 @a()
+ br label %inner.latch
+; The (super convoluted) fully unswitched loop around `@a`.
+;
+; CHECK: [[ENTRY_SPLIT_US_SPLIT_US]]:
+; CHECK-NEXT: br label %[[OUTER_HEADER_US_US:.*]]
+;
+; CHECK: [[OUTER_HEADER_US_US]]:
+; CHECK-NEXT: br label %[[OUTER_HEADER_SPLIT_US_US:.*]]
+;
+; CHECK: [[OUTER_LATCH_US_US:.*]]:
+; CHECK-NEXT: %[[OUTER_COND_US_US:.*]] = call i1 @cond()
+; CHECK-NEXT: br i1 %[[OUTER_COND_US_US]], label %[[OUTER_HEADER_US_US]], label %[[EXIT_SPLIT_US_SPLIT_US:.*]]
+;
+; CHECK: [[OUTER_HEADER_SPLIT_US_US]]:
+; CHECK-NEXT: br label %[[OUTER_HEADER_SPLIT_SPLIT_US_US_US:.*]]
+;
+; CHECK: [[INNER_LOOPEXIT2_US_US:.*]]:
+; CHECK-NEXT: br label %[[OUTER_LATCH_US_US]]
+;
+; CHECK: [[OUTER_HEADER_SPLIT_SPLIT_US_US_US]]:
+; CHECK-NEXT: br label %[[INNER_HEADER_US_US_US:.*]]
+;
+; CHECK: [[INNER_HEADER_US_US_US]]:
+; CHECK-NEXT: br label %[[INNER_BODY1_US_US_US:.*]]
+;
+; CHECK: [[INNER_BODY1_US_US_US]]:
+; CHECK-NEXT: %[[A:.*]] = call i32 @a()
+; CHECK-NEXT: br label %[[INNER_LATCH_US_US_US:.*]]
+;
+; CHECK: [[INNER_LATCH_US_US_US]]:
+; CHECK-NEXT: %[[PHI_A:.*]] = phi i32 [ %[[A]], %[[INNER_BODY1_US_US_US]] ]
+; CHECK-NEXT: call void @sink1(i32 0)
+; CHECK-NEXT: call void @sink1(i32 0)
+; CHECK-NEXT: call void @sink1(i32 0)
+; CHECK-NEXT: call void @sink1(i32 0)
+; CHECK-NEXT: call void @sink1(i32 0)
+; CHECK-NEXT: call void @sink1(i32 0)
+; CHECK-NEXT: call void @sink1(i32 0)
+; CHECK-NEXT: call void @sink1(i32 0)
+; CHECK-NEXT: call void @sink1(i32 0)
+; CHECK-NEXT: call void @sink1(i32 0)
+; CHECK-NEXT: call void @sink1(i32 %[[PHI_A]])
+; CHECK-NEXT: %[[INNER_COND_US_US_US:.*]] = call i1 @cond()
+; CHECK-NEXT: br i1 %[[INNER_COND_US_US_US]], label %[[INNER_HEADER_US_US_US]], label %[[INNER_LOOPEXIT2_SPLIT_US_US_US:.*]]
+;
+; CHECK: [[INNER_LOOPEXIT2_SPLIT_US_US_US]]:
+; CHECK-NEXT: br label %[[INNER_LOOPEXIT2_US_US]]
+;
+; CHECK: [[EXIT_SPLIT_US_SPLIT_US]]:
+; CHECK-NEXT: br label %[[EXIT_SPLIT_US:.*]]
+
+
+inner.body2:
+ %b = call i32 @b()
+ br label %inner.latch
+; The fully unswitched loop around `@b`.
+;
+; CHECK: [[ENTRY_SPLIT_US_SPLIT]]:
+; CHECK-NEXT: br label %[[OUTER_HEADER_US:.*]]
+;
+; CHECK: [[OUTER_HEADER_US]]:
+; CHECK-NEXT: br label %[[OUTER_HEADER_SPLIT_US:.*]]
+;
+; CHECK: [[INNER_HEADER_US:.*]]:
+; CHECK-NEXT: br label %[[INNER_BODY2_US:.*]]
+;
+; CHECK: [[INNER_BODY2_US]]:
+; CHECK-NEXT: %[[B:.*]] = call i32 @b()
+; CHECK-NEXT: br label %[[INNER_LATCH_US:.*]]
+;
+; CHECK: [[INNER_LATCH_US]]:
+; CHECK-NEXT: call void @sink1(i32 0)
+; CHECK-NEXT: call void @sink1(i32 0)
+; CHECK-NEXT: call void @sink1(i32 0)
+; CHECK-NEXT: call void @sink1(i32 0)
+; CHECK-NEXT: call void @sink1(i32 0)
+; CHECK-NEXT: call void @sink1(i32 0)
+; CHECK-NEXT: call void @sink1(i32 0)
+; CHECK-NEXT: call void @sink1(i32 0)
+; CHECK-NEXT: call void @sink1(i32 0)
+; CHECK-NEXT: call void @sink1(i32 0)
+; CHECK-NEXT: call void @sink1(i32 %[[B]])
+; CHECK-NEXT: %[[INNER_COND_US:.*]] = call i1 @cond()
+; CHECK-NEXT: br i1 %[[INNER_COND_US]], label %[[INNER_HEADER_US]], label %[[INNER_LOOPEXIT2_SPLIT_US:.*]]
+;
+; CHECK: [[INNER_LOOPEXIT2_SPLIT_US]]:
+; CHECK-NEXT: br label %[[INNER_LOOPEXIT2_US:.*]]
+;
+; CHECK: [[OUTER_LATCH_US:.*]]:
+; CHECK-NEXT: %[[OUTER_COND_US:.*]] = call i1 @cond()
+; CHECK-NEXT: br i1 %[[OUTER_COND_US]], label %[[OUTER_HEADER_US]], label %[[EXIT_SPLIT_US_SPLIT:.*]]
+;
+; CHECK: [[OUTER_HEADER_SPLIT_US]]:
+; CHECK-NEXT: br label %[[OUTER_HEADER_SPLIT_SPLIT_US:.*]]
+;
+; CHECK: [[OUTER_HEADER_SPLIT_SPLIT_US]]:
+; CHECK-NEXT: br label %[[INNER_HEADER_US]]
+;
+; CHECK: [[INNER_LOOPEXIT2_US]]:
+; CHECK-NEXT: br label %[[OUTER_LATCH_US]]
+;
+; CHECK: [[EXIT_SPLIT_US]]:
+; CHECK-NEXT: br label %exit
+
+inner.latch:
+ %phi = phi i32 [ %a, %inner.body1 ], [ %b, %inner.body2 ]
+ ; Make 10 junk calls here to ensure we're over the "50" cost threshold of
+ ; non-trivial unswitching for this inner switch.
+ call void @sink1(i32 0)
+ call void @sink1(i32 0)
+ call void @sink1(i32 0)
+ call void @sink1(i32 0)
+ call void @sink1(i32 0)
+ call void @sink1(i32 0)
+ call void @sink1(i32 0)
+ call void @sink1(i32 0)
+ call void @sink1(i32 0)
+ call void @sink1(i32 0)
+ call void @sink1(i32 %phi)
+ %inner.cond = call i1 @cond()
+ br i1 %inner.cond, label %inner.header, label %inner.loopexit2
+
+inner.loopexit1:
+ br label %outer.latch
+; The unswitched `loopexit1` path.
+;
+; CHECK: [[ENTRY_SPLIT]]:
+; CHECK-NEXT: br label %[[OUTER_HEADER:.*]]
+;
+; CHECK: outer.header:
+; CHECK-NEXT: br label %inner.loopexit1
+;
+; CHECK: inner.loopexit1:
+; CHECK-NEXT: br label %outer.latch
+;
+; CHECK: outer.latch:
+; CHECK-NEXT: %outer.cond = call i1 @cond()
+; CHECK-NEXT: br i1 %outer.cond, label %outer.header, label %[[EXIT_SPLIT:.*]]
+;
+; CHECK: [[EXIT_SPLIT]]:
+; CHECK-NEXT: br label %exit
+
+inner.loopexit2:
+ br label %outer.latch
+
+outer.latch:
+ %outer.cond = call i1 @cond()
+ br i1 %outer.cond, label %outer.header, label %exit
+
+exit:
+ ret void
+; CHECK: exit:
+; CHECK-NEXT: ret void
+}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/pr37888.ll b/llvm/test/Transforms/SimpleLoopUnswitch/pr37888.ll
new file mode 100644
index 00000000000..e8e34a2e882
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/pr37888.ll
@@ -0,0 +1,39 @@
+; RUN: opt -simple-loop-unswitch -loop-deletion -S < %s | FileCheck %s
+; RUN: opt -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -loop-deletion -S < %s | FileCheck %s
+;
+; Check that when we do unswitching where we re-enqueue the loop to be processed
+; again, but manage to delete the loop before ever getting to iterate on it, it
+; doesn't crash the legacy pass manager.
+
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @pr37888() {
+; CHECK-LABEL: define void @pr37888()
+entry:
+ %tobool = icmp ne i16 undef, 0
+ br label %for.body
+; CHECK: %[[TOBOOL:.*]] = icmp ne
+; CHECK-NEXT: br i1 %[[TOBOOL]], label %if.then, label %[[ENTRY_SPLIT:.*]]
+;
+; CHECK: [[ENTRY_SPLIT]]:
+; CHECK-NEXT: br label %for.end
+
+for.body:
+ br i1 %tobool, label %if.then, label %if.end
+
+if.then:
+ unreachable
+; CHECK: if.then:
+; CHECK-NEXT: unreachable
+
+if.end:
+ br label %for.inc
+
+for.inc:
+ br i1 undef, label %for.body, label %for.end
+
+for.end:
+ ret void
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/preserve-analyses.ll b/llvm/test/Transforms/SimpleLoopUnswitch/preserve-analyses.ll
new file mode 100644
index 00000000000..114825348da
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/preserve-analyses.ll
@@ -0,0 +1,130 @@
+; RUN: opt -simple-loop-unswitch -verify-loop-info -verify-dom-info -disable-output < %s
+; RUN: opt -simple-loop-unswitch -verify-loop-info -verify-dom-info -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output < %s
+
+; Loop unswitch should be able to unswitch these loops and
+; preserve LCSSA and LoopSimplify forms.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv6-apple-darwin9"
+
+@delim1 = external global i32 ; <i32*> [#uses=1]
+@delim2 = external global i32 ; <i32*> [#uses=1]
+
+define i32 @ineqn(i8* %s, i8* %p) nounwind readonly {
+entry:
+ %0 = load i32, i32* @delim1, align 4 ; <i32> [#uses=1]
+ %1 = load i32, i32* @delim2, align 4 ; <i32> [#uses=1]
+ br label %bb8.outer
+
+bb: ; preds = %bb8
+ %2 = icmp eq i8* %p_addr.0, %s ; <i1> [#uses=1]
+ br i1 %2, label %bb10, label %bb2
+
+bb2: ; preds = %bb
+ %3 = getelementptr inbounds i8, i8* %p_addr.0, i32 1 ; <i8*> [#uses=3]
+ switch i32 %ineq.0.ph, label %bb8.backedge [
+ i32 0, label %bb3
+ i32 1, label %bb6
+ ]
+
+bb8.backedge: ; preds = %bb6, %bb5, %bb2
+ br label %bb8
+
+bb3: ; preds = %bb2
+ %4 = icmp eq i32 %8, %0 ; <i1> [#uses=1]
+ br i1 %4, label %bb8.outer.loopexit, label %bb5
+
+bb5: ; preds = %bb3
+ br i1 %6, label %bb6, label %bb8.backedge
+
+bb6: ; preds = %bb5, %bb2
+ %5 = icmp eq i32 %8, %1 ; <i1> [#uses=1]
+ br i1 %5, label %bb7, label %bb8.backedge
+
+bb7: ; preds = %bb6
+ %.lcssa1 = phi i8* [ %3, %bb6 ] ; <i8*> [#uses=1]
+ br label %bb8.outer.backedge
+
+bb8.outer.backedge: ; preds = %bb8.outer.loopexit, %bb7
+ %.lcssa2 = phi i8* [ %.lcssa1, %bb7 ], [ %.lcssa, %bb8.outer.loopexit ] ; <i8*> [#uses=1]
+ %ineq.0.ph.be = phi i32 [ 0, %bb7 ], [ 1, %bb8.outer.loopexit ] ; <i32> [#uses=1]
+ br label %bb8.outer
+
+bb8.outer.loopexit: ; preds = %bb3
+ %.lcssa = phi i8* [ %3, %bb3 ] ; <i8*> [#uses=1]
+ br label %bb8.outer.backedge
+
+bb8.outer: ; preds = %bb8.outer.backedge, %entry
+ %ineq.0.ph = phi i32 [ 0, %entry ], [ %ineq.0.ph.be, %bb8.outer.backedge ] ; <i32> [#uses=3]
+ %p_addr.0.ph = phi i8* [ %p, %entry ], [ %.lcssa2, %bb8.outer.backedge ] ; <i8*> [#uses=1]
+ %6 = icmp eq i32 %ineq.0.ph, 1 ; <i1> [#uses=1]
+ br label %bb8
+
+bb8: ; preds = %bb8.outer, %bb8.backedge
+ %p_addr.0 = phi i8* [ %p_addr.0.ph, %bb8.outer ], [ %3, %bb8.backedge ] ; <i8*> [#uses=3]
+ %7 = load i8, i8* %p_addr.0, align 1 ; <i8> [#uses=2]
+ %8 = sext i8 %7 to i32 ; <i32> [#uses=2]
+ %9 = icmp eq i8 %7, 0 ; <i1> [#uses=1]
+ br i1 %9, label %bb10, label %bb
+
+bb10: ; preds = %bb8, %bb
+ %.0 = phi i32 [ %ineq.0.ph, %bb ], [ 0, %bb8 ] ; <i32> [#uses=1]
+ ret i32 %.0
+}
+
+; This is a simplified form of ineqn from above. It triggers some
+; different cases in the loop-unswitch code.
+
+define void @simplified_ineqn() nounwind readonly {
+entry:
+ br label %bb8.outer
+
+bb8.outer: ; preds = %bb6, %bb2, %entry
+ %x = phi i32 [ 0, %entry ], [ 0, %bb6 ], [ 1, %bb2 ] ; <i32> [#uses=1]
+ br i1 undef, label %return, label %bb2
+
+bb2: ; preds = %bb
+ switch i32 %x, label %bb6 [
+ i32 0, label %bb8.outer
+ ]
+
+bb6: ; preds = %bb2
+ br i1 undef, label %bb8.outer, label %bb2
+
+return: ; preds = %bb8, %bb
+ ret void
+}
+
+; This function requires special handling to preserve LCSSA form.
+; PR4934
+
+define void @pnp_check_irq() nounwind noredzone {
+entry:
+ %conv56 = trunc i64 undef to i32 ; <i32> [#uses=1]
+ br label %while.cond.i
+
+while.cond.i: ; preds = %while.cond.i.backedge, %entry
+ %call.i25 = call i8* @pci_get_device() nounwind noredzone ; <i8*> [#uses=2]
+ br i1 undef, label %if.then65, label %while.body.i
+
+while.body.i: ; preds = %while.cond.i
+ br i1 undef, label %if.then31.i.i, label %while.cond.i.backedge
+
+while.cond.i.backedge: ; preds = %if.then31.i.i, %while.body.i
+ br label %while.cond.i
+
+if.then31.i.i: ; preds = %while.body.i
+ switch i32 %conv56, label %while.cond.i.backedge [
+ i32 14, label %if.then42.i.i
+ i32 15, label %if.then42.i.i
+ ]
+
+if.then42.i.i: ; preds = %if.then31.i.i, %if.then31.i.i
+ %call.i25.lcssa48 = phi i8* [ %call.i25, %if.then31.i.i ], [ %call.i25, %if.then31.i.i ] ; <i8*> [#uses=0]
+ unreachable
+
+if.then65: ; preds = %while.cond.i
+ unreachable
+}
+
+declare i8* @pci_get_device() noredzone
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-iteration.ll b/llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-iteration.ll
new file mode 100644
index 00000000000..18b39ca8082
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-iteration.ll
@@ -0,0 +1,42 @@
+; RUN: opt -passes='loop(loop-instsimplify,simplify-cfg,unswitch),verify<loops>' -S < %s | FileCheck %s
+; RUN: opt -enable-mssa-loop-dependency=true -verify-memoryssa -passes='loop(loop-instsimplify,simplify-cfg,unswitch),verify<loops>' -S < %s | FileCheck %s
+
+declare void @some_func() noreturn
+
+define i32 @test1(i32* %var, i1 %cond1, i1 %cond2) {
+; CHECK-LABEL: @test1(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %{{.*}}, label %entry.split, label %loop_exit.split
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br i1 %{{.*}}, label %entry.split.split, label %loop_exit
+;
+; CHECK: entry.split.split:
+; CHECK-NEXT: br label %loop_begin
+
+loop_begin:
+ br i1 %cond1, label %continue, label %loop_exit ; first trivial condition
+
+continue:
+ %var_val = load i32, i32* %var
+ %var_cond = trunc i32 %var_val to i1
+ %maybe_cond = select i1 %cond1, i1 %cond2, i1 %var_cond
+ br i1 %maybe_cond, label %do_something, label %loop_exit ; second trivial condition
+
+do_something:
+ call void @some_func() noreturn nounwind
+ br label %loop_begin
+; CHECK: loop_begin:
+; CHECK-NEXT: call
+; CHECK-NEXT: br label %loop_begin
+
+loop_exit:
+ ret i32 0
+; CHECK: loop_exit:
+; CHECK-NEXT: br label %loop_exit.split
+;
+; CHECK: loop_exit.split:
+; CHECK-NEXT: ret
+}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll b/llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll
new file mode 100644
index 00000000000..56a9bac8980
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll
@@ -0,0 +1,1245 @@
+; RUN: opt -passes='loop(unswitch),verify<loops>' -S < %s | FileCheck %s
+; RUN: opt -enable-mssa-loop-dependency=true -verify-memoryssa -passes='loop(unswitch),verify<loops>' -S < %s | FileCheck %s
+
+declare void @some_func() noreturn
+declare void @sink(i32)
+
+declare i1 @cond()
+declare i32 @cond.i32()
+
+; This test contains two trivial unswitch condition in one loop.
+; LoopUnswitch pass should be able to unswitch the second one
+; after unswitching the first one.
+define i32 @test1(i32* %var, i1 %cond1, i1 %cond2) {
+; CHECK-LABEL: @test1(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %{{.*}}, label %entry.split, label %loop_exit.split
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br i1 %{{.*}}, label %entry.split.split, label %loop_exit
+;
+; CHECK: entry.split.split:
+; CHECK-NEXT: br label %loop_begin
+
+loop_begin:
+ br i1 %cond1, label %continue, label %loop_exit ; first trivial condition
+; CHECK: loop_begin:
+; CHECK-NEXT: br label %continue
+
+continue:
+ %var_val = load i32, i32* %var
+ br i1 %cond2, label %do_something, label %loop_exit ; second trivial condition
+; CHECK: continue:
+; CHECK-NEXT: load
+; CHECK-NEXT: br label %do_something
+
+do_something:
+ call void @some_func() noreturn nounwind
+ br label %loop_begin
+; CHECK: do_something:
+; CHECK-NEXT: call
+; CHECK-NEXT: br label %loop_begin
+
+loop_exit:
+ ret i32 0
+; CHECK: loop_exit:
+; CHECK-NEXT: br label %loop_exit.split
+;
+; CHECK: loop_exit.split:
+; CHECK-NEXT: ret
+}
+
+; Test for two trivially unswitchable switches.
+define i32 @test3(i32* %var, i32 %cond1, i32 %cond2) {
+; CHECK-LABEL: @test3(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: switch i32 %cond1, label %entry.split [
+; CHECK-NEXT: i32 0, label %loop_exit1
+; CHECK-NEXT: ]
+;
+; CHECK: entry.split:
+; CHECK-NEXT: switch i32 %cond2, label %loop_exit2 [
+; CHECK-NEXT: i32 42, label %loop_exit2
+; CHECK-NEXT: i32 0, label %entry.split.split
+; CHECK-NEXT: ]
+;
+; CHECK: entry.split.split:
+; CHECK-NEXT: br label %loop_begin
+
+loop_begin:
+ switch i32 %cond1, label %continue [
+ i32 0, label %loop_exit1
+ ]
+; CHECK: loop_begin:
+; CHECK-NEXT: br label %continue
+
+continue:
+ %var_val = load i32, i32* %var
+ switch i32 %cond2, label %loop_exit2 [
+ i32 0, label %do_something
+ i32 42, label %loop_exit2
+ ]
+; CHECK: continue:
+; CHECK-NEXT: load
+; CHECK-NEXT: br label %do_something
+
+do_something:
+ call void @some_func() noreturn nounwind
+ br label %loop_begin
+; CHECK: do_something:
+; CHECK-NEXT: call
+; CHECK-NEXT: br label %loop_begin
+
+loop_exit1:
+ ret i32 0
+; CHECK: loop_exit1:
+; CHECK-NEXT: ret
+
+loop_exit2:
+ ret i32 0
+; CHECK: loop_exit2:
+; CHECK-NEXT: ret
+;
+; We shouldn't have any unreachable blocks here because the unswitched switches
+; turn into branches instead.
+; CHECK-NOT: unreachable
+}
+
+; Test for a trivially unswitchable switch with multiple exiting cases and
+; multiple looping cases.
+define i32 @test4(i32* %var, i32 %cond1, i32 %cond2) {
+; CHECK-LABEL: @test4(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: switch i32 %cond2, label %loop_exit2 [
+; CHECK-NEXT: i32 13, label %loop_exit1
+; CHECK-NEXT: i32 42, label %loop_exit3
+; CHECK-NEXT: i32 0, label %entry.split
+; CHECK-NEXT: i32 1, label %entry.split
+; CHECK-NEXT: i32 2, label %entry.split
+; CHECK-NEXT: ]
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br label %loop_begin
+
+loop_begin:
+ %var_val = load i32, i32* %var
+ switch i32 %cond2, label %loop_exit2 [
+ i32 0, label %loop0
+ i32 1, label %loop1
+ i32 13, label %loop_exit1
+ i32 2, label %loop2
+ i32 42, label %loop_exit3
+ ]
+; CHECK: loop_begin:
+; CHECK-NEXT: load
+; CHECK-NEXT: switch i32 %cond2, label %loop2 [
+; CHECK-NEXT: i32 0, label %loop0
+; CHECK-NEXT: i32 1, label %loop1
+; CHECK-NEXT: ]
+
+loop0:
+ call void @some_func() noreturn nounwind
+ br label %loop_latch
+; CHECK: loop0:
+; CHECK-NEXT: call
+; CHECK-NEXT: br label %loop_latch
+
+loop1:
+ call void @some_func() noreturn nounwind
+ br label %loop_latch
+; CHECK: loop1:
+; CHECK-NEXT: call
+; CHECK-NEXT: br label %loop_latch
+
+loop2:
+ call void @some_func() noreturn nounwind
+ br label %loop_latch
+; CHECK: loop2:
+; CHECK-NEXT: call
+; CHECK-NEXT: br label %loop_latch
+
+loop_latch:
+ br label %loop_begin
+; CHECK: loop_latch:
+; CHECK-NEXT: br label %loop_begin
+
+loop_exit1:
+ ret i32 0
+; CHECK: loop_exit1:
+; CHECK-NEXT: ret
+
+loop_exit2:
+ ret i32 0
+; CHECK: loop_exit2:
+; CHECK-NEXT: ret
+
+loop_exit3:
+ ret i32 0
+; CHECK: loop_exit3:
+; CHECK-NEXT: ret
+}
+
+; This test contains a trivially unswitchable branch with an LCSSA phi node in
+; a loop exit block.
+define i32 @test5(i1 %cond1, i32 %x, i32 %y) {
+; CHECK-LABEL: @test5(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %{{.*}}, label %entry.split, label %loop_exit
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br label %loop_begin
+
+loop_begin:
+ br i1 %cond1, label %latch, label %loop_exit
+; CHECK: loop_begin:
+; CHECK-NEXT: br label %latch
+
+latch:
+ call void @some_func() noreturn nounwind
+ br label %loop_begin
+; CHECK: latch:
+; CHECK-NEXT: call
+; CHECK-NEXT: br label %loop_begin
+
+loop_exit:
+ %result1 = phi i32 [ %x, %loop_begin ]
+ %result2 = phi i32 [ %y, %loop_begin ]
+ %result = add i32 %result1, %result2
+ ret i32 %result
+; CHECK: loop_exit:
+; CHECK-NEXT: %[[R1:.*]] = phi i32 [ %x, %entry ]
+; CHECK-NEXT: %[[R2:.*]] = phi i32 [ %y, %entry ]
+; CHECK-NEXT: %[[R:.*]] = add i32 %[[R1]], %[[R2]]
+; CHECK-NEXT: ret i32 %[[R]]
+}
+
+; This test contains a trivially unswitchable branch with a real phi node in LCSSA
+; position in a shared exit block where a different path through the loop
+; produces a non-invariant input to the PHI node.
+define i32 @test6(i32* %var, i1 %cond1, i1 %cond2, i32 %x, i32 %y) {
+; CHECK-LABEL: @test6(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %{{.*}}, label %entry.split, label %loop_exit.split
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br label %loop_begin
+
+loop_begin:
+ br i1 %cond1, label %continue, label %loop_exit
+; CHECK: loop_begin:
+; CHECK-NEXT: br label %continue
+
+continue:
+ %var_val = load i32, i32* %var
+ br i1 %cond2, label %latch, label %loop_exit
+; CHECK: continue:
+; CHECK-NEXT: load
+; CHECK-NEXT: br i1 %cond2, label %latch, label %loop_exit
+
+latch:
+ call void @some_func() noreturn nounwind
+ br label %loop_begin
+; CHECK: latch:
+; CHECK-NEXT: call
+; CHECK-NEXT: br label %loop_begin
+
+loop_exit:
+ %result1 = phi i32 [ %x, %loop_begin ], [ %var_val, %continue ]
+ %result2 = phi i32 [ %var_val, %continue ], [ %y, %loop_begin ]
+ %result = add i32 %result1, %result2
+ ret i32 %result
+; CHECK: loop_exit:
+; CHECK-NEXT: %[[R1:.*]] = phi i32 [ %var_val, %continue ]
+; CHECK-NEXT: %[[R2:.*]] = phi i32 [ %var_val, %continue ]
+; CHECK-NEXT: br label %loop_exit.split
+;
+; CHECK: loop_exit.split:
+; CHECK-NEXT: %[[R1S:.*]] = phi i32 [ %x, %entry ], [ %[[R1]], %loop_exit ]
+; CHECK-NEXT: %[[R2S:.*]] = phi i32 [ %y, %entry ], [ %[[R2]], %loop_exit ]
+; CHECK-NEXT: %[[R:.*]] = add i32 %[[R1S]], %[[R2S]]
+; CHECK-NEXT: ret i32 %[[R]]
+}
+
+; This test contains a trivially unswitchable switch with an LCSSA phi node in
+; a loop exit block.
+define i32 @test7(i32 %cond1, i32 %x, i32 %y) {
+; CHECK-LABEL: @test7(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: switch i32 %cond1, label %entry.split [
+; CHECK-NEXT: i32 0, label %loop_exit
+; CHECK-NEXT: i32 1, label %loop_exit
+; CHECK-NEXT: ]
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br label %loop_begin
+
+loop_begin:
+ switch i32 %cond1, label %latch [
+ i32 0, label %loop_exit
+ i32 1, label %loop_exit
+ ]
+; CHECK: loop_begin:
+; CHECK-NEXT: br label %latch
+
+latch:
+ call void @some_func() noreturn nounwind
+ br label %loop_begin
+; CHECK: latch:
+; CHECK-NEXT: call
+; CHECK-NEXT: br label %loop_begin
+
+loop_exit:
+ %result1 = phi i32 [ %x, %loop_begin ], [ %x, %loop_begin ]
+ %result2 = phi i32 [ %y, %loop_begin ], [ %y, %loop_begin ]
+ %result = add i32 %result1, %result2
+ ret i32 %result
+; CHECK: loop_exit:
+; CHECK-NEXT: %[[R1:.*]] = phi i32 [ %x, %entry ], [ %x, %entry ]
+; CHECK-NEXT: %[[R2:.*]] = phi i32 [ %y, %entry ], [ %y, %entry ]
+; CHECK-NEXT: %[[R:.*]] = add i32 %[[R1]], %[[R2]]
+; CHECK-NEXT: ret i32 %[[R]]
+}
+
+; This test contains a trivially unswitchable switch with a real phi node in
+; LCSSA position in a shared exit block where a different path through the loop
+; produces a non-invariant input to the PHI node.
+define i32 @test8(i32* %var, i32 %cond1, i32 %cond2, i32 %x, i32 %y) {
+; CHECK-LABEL: @test8(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: switch i32 %cond1, label %entry.split [
+; CHECK-NEXT: i32 0, label %loop_exit.split
+; CHECK-NEXT: i32 1, label %loop_exit2
+; CHECK-NEXT: i32 2, label %loop_exit.split
+; CHECK-NEXT: ]
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br label %loop_begin
+
+loop_begin:
+ switch i32 %cond1, label %continue [
+ i32 0, label %loop_exit
+ i32 1, label %loop_exit2
+ i32 2, label %loop_exit
+ ]
+; CHECK: loop_begin:
+; CHECK-NEXT: br label %continue
+
+continue:
+ %var_val = load i32, i32* %var
+ switch i32 %cond2, label %latch [
+ i32 0, label %loop_exit
+ ]
+; CHECK: continue:
+; CHECK-NEXT: load
+; CHECK-NEXT: switch i32 %cond2, label %latch [
+; CHECK-NEXT: i32 0, label %loop_exit
+; CHECK-NEXT: ]
+
+latch:
+ call void @some_func() noreturn nounwind
+ br label %loop_begin
+; CHECK: latch:
+; CHECK-NEXT: call
+; CHECK-NEXT: br label %loop_begin
+
+loop_exit:
+ %result1.1 = phi i32 [ %x, %loop_begin ], [ %x, %loop_begin ], [ %var_val, %continue ]
+ %result1.2 = phi i32 [ %var_val, %continue ], [ %y, %loop_begin ], [ %y, %loop_begin ]
+ %result1 = add i32 %result1.1, %result1.2
+ ret i32 %result1
+; CHECK: loop_exit:
+; CHECK-NEXT: %[[R1:.*]] = phi i32 [ %var_val, %continue ]
+; CHECK-NEXT: %[[R2:.*]] = phi i32 [ %var_val, %continue ]
+; CHECK-NEXT: br label %loop_exit.split
+;
+; CHECK: loop_exit.split:
+; CHECK-NEXT: %[[R1S:.*]] = phi i32 [ %x, %entry ], [ %x, %entry ], [ %[[R1]], %loop_exit ]
+; CHECK-NEXT: %[[R2S:.*]] = phi i32 [ %y, %entry ], [ %y, %entry ], [ %[[R2]], %loop_exit ]
+; CHECK-NEXT: %[[R:.*]] = add i32 %[[R1S]], %[[R2S]]
+; CHECK-NEXT: ret i32 %[[R]]
+
+loop_exit2:
+ %result2.1 = phi i32 [ %x, %loop_begin ]
+ %result2.2 = phi i32 [ %y, %loop_begin ]
+ %result2 = add i32 %result2.1, %result2.2
+ ret i32 %result2
+; CHECK: loop_exit2:
+; CHECK-NEXT: %[[R1:.*]] = phi i32 [ %x, %entry ]
+; CHECK-NEXT: %[[R2:.*]] = phi i32 [ %y, %entry ]
+; CHECK-NEXT: %[[R:.*]] = add i32 %[[R1]], %[[R2]]
+; CHECK-NEXT: ret i32 %[[R]]
+}
+
+; This test, extracted from the LLVM test suite, has an interesting dominator
+; tree to update as there are edges to sibling domtree nodes within child
+; domtree nodes of the unswitched node.
+define void @xgets(i1 %cond1, i1* %cond2.ptr) {
+; CHECK-LABEL: @xgets(
+entry:
+ br label %for.cond.preheader
+; CHECK: entry:
+; CHECK-NEXT: br label %for.cond.preheader
+
+for.cond.preheader:
+ br label %for.cond
+; CHECK: for.cond.preheader:
+; CHECK-NEXT: br i1 %cond1, label %for.cond.preheader.split, label %if.end17.thread.loopexit
+;
+; CHECK: for.cond.preheader.split:
+; CHECK-NEXT: br label %for.cond
+
+for.cond:
+ br i1 %cond1, label %land.lhs.true, label %if.end17.thread.loopexit
+; CHECK: for.cond:
+; CHECK-NEXT: br label %land.lhs.true
+
+land.lhs.true:
+ br label %if.then20
+; CHECK: land.lhs.true:
+; CHECK-NEXT: br label %if.then20
+
+if.then20:
+ %cond2 = load volatile i1, i1* %cond2.ptr
+ br i1 %cond2, label %if.then23, label %if.else
+; CHECK: if.then20:
+; CHECK-NEXT: %[[COND2:.*]] = load volatile i1, i1* %cond2.ptr
+; CHECK-NEXT: br i1 %[[COND2]], label %if.then23, label %if.else
+
+if.else:
+ br label %for.cond
+; CHECK: if.else:
+; CHECK-NEXT: br label %for.cond
+
+if.end17.thread.loopexit:
+ br label %if.end17.thread
+; CHECK: if.end17.thread.loopexit:
+; CHECK-NEXT: br label %if.end17.thread
+
+if.end17.thread:
+ br label %cleanup
+; CHECK: if.end17.thread:
+; CHECK-NEXT: br label %cleanup
+
+if.then23:
+ br label %cleanup
+; CHECK: if.then23:
+; CHECK-NEXT: br label %cleanup
+
+cleanup:
+ ret void
+; CHECK: cleanup:
+; CHECK-NEXT: ret void
+}
+
+define i32 @test_partial_condition_unswitch_and(i32* %var, i1 %cond1, i1 %cond2) {
+; CHECK-LABEL: @test_partial_condition_unswitch_and(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond1, label %entry.split, label %loop_exit.split
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br i1 %cond2, label %entry.split.split, label %loop_exit
+;
+; CHECK: entry.split.split:
+; CHECK-NEXT: br label %loop_begin
+
+loop_begin:
+ br i1 %cond1, label %continue, label %loop_exit
+; CHECK: loop_begin:
+; CHECK-NEXT: br label %continue
+
+continue:
+ %var_val = load i32, i32* %var
+ %var_cond = trunc i32 %var_val to i1
+ %cond_and = and i1 %var_cond, %cond2
+ br i1 %cond_and, label %do_something, label %loop_exit
+; CHECK: continue:
+; CHECK-NEXT: %[[VAR:.*]] = load i32
+; CHECK-NEXT: %[[VAR_COND:.*]] = trunc i32 %[[VAR]] to i1
+; CHECK-NEXT: %[[COND_AND:.*]] = and i1 %[[VAR_COND]], true
+; CHECK-NEXT: br i1 %[[COND_AND]], label %do_something, label %loop_exit
+
+do_something:
+ call void @some_func() noreturn nounwind
+ br label %loop_begin
+; CHECK: do_something:
+; CHECK-NEXT: call
+; CHECK-NEXT: br label %loop_begin
+
+loop_exit:
+ ret i32 0
+; CHECK: loop_exit:
+; CHECK-NEXT: br label %loop_exit.split
+;
+; CHECK: loop_exit.split:
+; CHECK-NEXT: ret
+}
+
+define i32 @test_partial_condition_unswitch_or(i32* %var, i1 %cond1, i1 %cond2, i1 %cond3, i1 %cond4, i1 %cond5, i1 %cond6) {
+; CHECK-LABEL: @test_partial_condition_unswitch_or(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %[[INV_OR1:.*]] = or i1 %cond4, %cond2
+; CHECK-NEXT: %[[INV_OR2:.*]] = or i1 %[[INV_OR1]], %cond3
+; CHECK-NEXT: %[[INV_OR3:.*]] = or i1 %[[INV_OR2]], %cond1
+; CHECK-NEXT: br i1 %[[INV_OR3]], label %loop_exit.split, label %entry.split
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br label %loop_begin
+
+loop_begin:
+ %var_val = load i32, i32* %var
+ %var_cond = trunc i32 %var_val to i1
+ %cond_or1 = or i1 %var_cond, %cond1
+ %cond_or2 = or i1 %cond2, %cond3
+ %cond_or3 = or i1 %cond_or1, %cond_or2
+ %cond_xor1 = xor i1 %cond5, %var_cond
+ %cond_and1 = and i1 %cond6, %var_cond
+ %cond_or4 = or i1 %cond_xor1, %cond_and1
+ %cond_or5 = or i1 %cond_or3, %cond_or4
+ %cond_or6 = or i1 %cond_or5, %cond4
+ br i1 %cond_or6, label %loop_exit, label %do_something
+; CHECK: loop_begin:
+; CHECK-NEXT: %[[VAR:.*]] = load i32
+; CHECK-NEXT: %[[VAR_COND:.*]] = trunc i32 %[[VAR]] to i1
+; CHECK-NEXT: %[[COND_OR1:.*]] = or i1 %[[VAR_COND]], false
+; CHECK-NEXT: %[[COND_OR2:.*]] = or i1 false, false
+; CHECK-NEXT: %[[COND_OR3:.*]] = or i1 %[[COND_OR1]], %[[COND_OR2]]
+; CHECK-NEXT: %[[COND_XOR:.*]] = xor i1 %cond5, %[[VAR_COND]]
+; CHECK-NEXT: %[[COND_AND:.*]] = and i1 %cond6, %[[VAR_COND]]
+; CHECK-NEXT: %[[COND_OR4:.*]] = or i1 %[[COND_XOR]], %[[COND_AND]]
+; CHECK-NEXT: %[[COND_OR5:.*]] = or i1 %[[COND_OR3]], %[[COND_OR4]]
+; CHECK-NEXT: %[[COND_OR6:.*]] = or i1 %[[COND_OR5]], false
+; CHECK-NEXT: br i1 %[[COND_OR6]], label %loop_exit, label %do_something
+
+do_something:
+ call void @some_func() noreturn nounwind
+ br label %loop_begin
+; CHECK: do_something:
+; CHECK-NEXT: call
+; CHECK-NEXT: br label %loop_begin
+
+loop_exit:
+ ret i32 0
+; CHECK: loop_exit.split:
+; CHECK-NEXT: ret
+}
+
+define i32 @test_partial_condition_unswitch_with_lcssa_phi1(i32* %var, i1 %cond, i32 %x) {
+; CHECK-LABEL: @test_partial_condition_unswitch_with_lcssa_phi1(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond, label %entry.split, label %loop_exit.split
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br label %loop_begin
+
+loop_begin:
+ %var_val = load i32, i32* %var
+ %var_cond = trunc i32 %var_val to i1
+ %cond_and = and i1 %var_cond, %cond
+ br i1 %cond_and, label %do_something, label %loop_exit
+; CHECK: loop_begin:
+; CHECK-NEXT: %[[VAR:.*]] = load i32
+; CHECK-NEXT: %[[VAR_COND:.*]] = trunc i32 %[[VAR]] to i1
+; CHECK-NEXT: %[[COND_AND:.*]] = and i1 %[[VAR_COND]], true
+; CHECK-NEXT: br i1 %[[COND_AND]], label %do_something, label %loop_exit
+
+do_something:
+ call void @some_func() noreturn nounwind
+ br label %loop_begin
+; CHECK: do_something:
+; CHECK-NEXT: call
+; CHECK-NEXT: br label %loop_begin
+
+loop_exit:
+ %x.lcssa = phi i32 [ %x, %loop_begin ]
+ ret i32 %x.lcssa
+; CHECK: loop_exit:
+; CHECK-NEXT: %[[LCSSA:.*]] = phi i32 [ %x, %loop_begin ]
+; CHECK-NEXT: br label %loop_exit.split
+;
+; CHECK: loop_exit.split:
+; CHECK-NEXT: %[[LCSSA_SPLIT:.*]] = phi i32 [ %x, %entry ], [ %[[LCSSA]], %loop_exit ]
+; CHECK-NEXT: ret i32 %[[LCSSA_SPLIT]]
+}
+
+define i32 @test_partial_condition_unswitch_with_lcssa_phi2(i32* %var, i1 %cond, i32 %x, i32 %y) {
+; CHECK-LABEL: @test_partial_condition_unswitch_with_lcssa_phi2(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond, label %entry.split, label %loop_exit.split
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br label %loop_begin
+
+loop_begin:
+ %var_val = load i32, i32* %var
+ %var_cond = trunc i32 %var_val to i1
+ %cond_and = and i1 %var_cond, %cond
+ br i1 %cond_and, label %do_something, label %loop_exit
+; CHECK: loop_begin:
+; CHECK-NEXT: %[[VAR:.*]] = load i32
+; CHECK-NEXT: %[[VAR_COND:.*]] = trunc i32 %[[VAR]] to i1
+; CHECK-NEXT: %[[COND_AND:.*]] = and i1 %[[VAR_COND]], true
+; CHECK-NEXT: br i1 %[[COND_AND]], label %do_something, label %loop_exit
+
+do_something:
+ call void @some_func() noreturn nounwind
+ br i1 %var_cond, label %loop_begin, label %loop_exit
+; CHECK: do_something:
+; CHECK-NEXT: call
+; CHECK-NEXT: br i1 %[[VAR_COND]], label %loop_begin, label %loop_exit
+
+loop_exit:
+ %xy.lcssa = phi i32 [ %x, %loop_begin ], [ %y, %do_something ]
+ ret i32 %xy.lcssa
+; CHECK: loop_exit:
+; CHECK-NEXT: %[[LCSSA:.*]] = phi i32 [ %x, %loop_begin ], [ %y, %do_something ]
+; CHECK-NEXT: br label %loop_exit.split
+;
+; CHECK: loop_exit.split:
+; CHECK-NEXT: %[[LCSSA_SPLIT:.*]] = phi i32 [ %x, %entry ], [ %[[LCSSA]], %loop_exit ]
+; CHECK-NEXT: ret i32 %[[LCSSA_SPLIT]]
+}
+
+; Unswitch will not actually change the loop nest from:
+; A < B < C
+define void @hoist_inner_loop0() {
+; CHECK-LABEL: define void @hoist_inner_loop0(
+entry:
+ br label %a.header
+; CHECK: entry:
+; CHECK-NEXT: br label %a.header
+
+a.header:
+ br label %b.header
+; CHECK: a.header:
+; CHECK-NEXT: br label %b.header
+
+b.header:
+ %v1 = call i1 @cond()
+ br label %c.header
+; CHECK: b.header:
+; CHECK-NEXT: %v1 = call i1 @cond()
+; CHECK-NEXT: br i1 %v1, label %[[B_LATCH_SPLIT:.*]], label %[[B_HEADER_SPLIT:.*]]
+;
+; CHECK: [[B_HEADER_SPLIT]]:
+; CHECK-NEXT: br label %c.header
+
+c.header:
+ br i1 %v1, label %b.latch, label %c.latch
+; CHECK: c.header:
+; CHECK-NEXT: br label %c.latch
+
+c.latch:
+ %v2 = call i1 @cond()
+ br i1 %v2, label %c.header, label %b.latch
+; CHECK: c.latch:
+; CHECK-NEXT: %v2 = call i1 @cond()
+; CHECK-NEXT: br i1 %v2, label %c.header, label %b.latch
+
+b.latch:
+ %v3 = call i1 @cond()
+ br i1 %v3, label %b.header, label %a.latch
+; CHECK: b.latch:
+; CHECK-NEXT: br label %[[B_LATCH_SPLIT]]
+;
+; CHECK: [[B_LATCH_SPLIT]]:
+; CHECK-NEXT: %v3 = call i1 @cond()
+; CHECK-NEXT: br i1 %v3, label %b.header, label %a.latch
+
+a.latch:
+ br label %a.header
+; CHECK: a.latch:
+; CHECK-NEXT: br label %a.header
+
+exit:
+ ret void
+; CHECK: exit:
+; CHECK-NEXT: ret void
+}
+
+; Unswitch will transform the loop nest from:
+; A < B < C
+; into
+; A < (B, C)
+define void @hoist_inner_loop1(i32* %ptr) {
+; CHECK-LABEL: define void @hoist_inner_loop1(
+entry:
+ br label %a.header
+; CHECK: entry:
+; CHECK-NEXT: br label %a.header
+
+a.header:
+ %x.a = load i32, i32* %ptr
+ br label %b.header
+; CHECK: a.header:
+; CHECK-NEXT: %x.a = load i32, i32* %ptr
+; CHECK-NEXT: br label %b.header
+
+b.header:
+ %x.b = load i32, i32* %ptr
+ %v1 = call i1 @cond()
+ br label %c.header
+; CHECK: b.header:
+; CHECK-NEXT: %x.b = load i32, i32* %ptr
+; CHECK-NEXT: %v1 = call i1 @cond()
+; CHECK-NEXT: br i1 %v1, label %b.latch, label %[[B_HEADER_SPLIT:.*]]
+;
+; CHECK: [[B_HEADER_SPLIT]]:
+; CHECK-NEXT: %[[X_B_LCSSA:.*]] = phi i32 [ %x.b, %b.header ]
+; CHECK-NEXT: br label %c.header
+
+c.header:
+ br i1 %v1, label %b.latch, label %c.latch
+; CHECK: c.header:
+; CHECK-NEXT: br label %c.latch
+
+c.latch:
+ ; Use values from other loops to check LCSSA form.
+ store i32 %x.a, i32* %ptr
+ store i32 %x.b, i32* %ptr
+ %v2 = call i1 @cond()
+ br i1 %v2, label %c.header, label %a.exit.c
+; CHECK: c.latch:
+; CHECK-NEXT: store i32 %x.a, i32* %ptr
+; CHECK-NEXT: store i32 %[[X_B_LCSSA]], i32* %ptr
+; CHECK-NEXT: %v2 = call i1 @cond()
+; CHECK-NEXT: br i1 %v2, label %c.header, label %a.exit.c
+
+b.latch:
+ %v3 = call i1 @cond()
+ br i1 %v3, label %b.header, label %a.exit.b
+; CHECK: b.latch:
+; CHECK-NEXT: %v3 = call i1 @cond()
+; CHECK-NEXT: br i1 %v3, label %b.header, label %a.exit.b
+
+a.exit.c:
+ br label %a.latch
+; CHECK: a.exit.c
+; CHECK-NEXT: br label %a.latch
+
+a.exit.b:
+ br label %a.latch
+; CHECK: a.exit.b:
+; CHECK-NEXT: br label %a.latch
+
+a.latch:
+ br label %a.header
+; CHECK: a.latch:
+; CHECK-NEXT: br label %a.header
+
+exit:
+ ret void
+; CHECK: exit:
+; CHECK-NEXT: ret void
+}
+
+; Unswitch will transform the loop nest from:
+; A < B < C
+; into
+; (A < B), C
+define void @hoist_inner_loop2(i32* %ptr) {
+; CHECK-LABEL: define void @hoist_inner_loop2(
+entry:
+ br label %a.header
+; CHECK: entry:
+; CHECK-NEXT: br label %a.header
+
+a.header:
+ %x.a = load i32, i32* %ptr
+ br label %b.header
+; CHECK: a.header:
+; CHECK-NEXT: %x.a = load i32, i32* %ptr
+; CHECK-NEXT: br label %b.header
+
+b.header:
+ %x.b = load i32, i32* %ptr
+ %v1 = call i1 @cond()
+ br label %c.header
+; CHECK: b.header:
+; CHECK-NEXT: %x.b = load i32, i32* %ptr
+; CHECK-NEXT: %v1 = call i1 @cond()
+; CHECK-NEXT: br i1 %v1, label %b.latch, label %[[B_HEADER_SPLIT:.*]]
+;
+; CHECK: [[B_HEADER_SPLIT]]:
+; CHECK-NEXT: %[[X_A_LCSSA:.*]] = phi i32 [ %x.a, %b.header ]
+; CHECK-NEXT: %[[X_B_LCSSA:.*]] = phi i32 [ %x.b, %b.header ]
+; CHECK-NEXT: br label %c.header
+
+c.header:
+ br i1 %v1, label %b.latch, label %c.latch
+; CHECK: c.header:
+; CHECK-NEXT: br label %c.latch
+
+c.latch:
+ ; Use values from other loops to check LCSSA form.
+ store i32 %x.a, i32* %ptr
+ store i32 %x.b, i32* %ptr
+ %v2 = call i1 @cond()
+ br i1 %v2, label %c.header, label %exit
+; CHECK: c.latch:
+; CHECK-NEXT: store i32 %[[X_A_LCSSA]], i32* %ptr
+; CHECK-NEXT: store i32 %[[X_B_LCSSA]], i32* %ptr
+; CHECK-NEXT: %v2 = call i1 @cond()
+; CHECK-NEXT: br i1 %v2, label %c.header, label %exit
+
+b.latch:
+ %v3 = call i1 @cond()
+ br i1 %v3, label %b.header, label %a.latch
+; CHECK: b.latch:
+; CHECK-NEXT: %v3 = call i1 @cond()
+; CHECK-NEXT: br i1 %v3, label %b.header, label %a.latch
+
+a.latch:
+ br label %a.header
+; CHECK: a.latch:
+; CHECK-NEXT: br label %a.header
+
+exit:
+ ret void
+; CHECK: exit:
+; CHECK-NEXT: ret void
+}
+
+; Same as @hoist_inner_loop2 but with a nested loop inside the hoisted loop.
+; Unswitch will transform the loop nest from:
+; A < B < C < D
+; into
+; (A < B), (C < D)
+define void @hoist_inner_loop3(i32* %ptr) {
+; CHECK-LABEL: define void @hoist_inner_loop3(
+entry:
+ br label %a.header
+; CHECK: entry:
+; CHECK-NEXT: br label %a.header
+
+a.header:
+ %x.a = load i32, i32* %ptr
+ br label %b.header
+; CHECK: a.header:
+; CHECK-NEXT: %x.a = load i32, i32* %ptr
+; CHECK-NEXT: br label %b.header
+
+b.header:
+ %x.b = load i32, i32* %ptr
+ %v1 = call i1 @cond()
+ br label %c.header
+; CHECK: b.header:
+; CHECK-NEXT: %x.b = load i32, i32* %ptr
+; CHECK-NEXT: %v1 = call i1 @cond()
+; CHECK-NEXT: br i1 %v1, label %b.latch, label %[[B_HEADER_SPLIT:.*]]
+;
+; CHECK: [[B_HEADER_SPLIT]]:
+; CHECK-NEXT: %[[X_A_LCSSA:.*]] = phi i32 [ %x.a, %b.header ]
+; CHECK-NEXT: %[[X_B_LCSSA:.*]] = phi i32 [ %x.b, %b.header ]
+; CHECK-NEXT: br label %c.header
+
+c.header:
+ br i1 %v1, label %b.latch, label %c.body
+; CHECK: c.header:
+; CHECK-NEXT: br label %c.body
+
+c.body:
+ %x.c = load i32, i32* %ptr
+ br label %d.header
+; CHECK: c.body:
+; CHECK-NEXT: %x.c = load i32, i32* %ptr
+; CHECK-NEXT: br label %d.header
+
+d.header:
+ ; Use values from other loops to check LCSSA form.
+ store i32 %x.a, i32* %ptr
+ store i32 %x.b, i32* %ptr
+ store i32 %x.c, i32* %ptr
+ %v2 = call i1 @cond()
+ br i1 %v2, label %d.header, label %c.latch
+; CHECK: d.header:
+; CHECK-NEXT: store i32 %[[X_A_LCSSA]], i32* %ptr
+; CHECK-NEXT: store i32 %[[X_B_LCSSA]], i32* %ptr
+; CHECK-NEXT: store i32 %x.c, i32* %ptr
+; CHECK-NEXT: %v2 = call i1 @cond()
+; CHECK-NEXT: br i1 %v2, label %d.header, label %c.latch
+
+c.latch:
+ %v3 = call i1 @cond()
+ br i1 %v3, label %c.header, label %exit
+; CHECK: c.latch:
+; CHECK-NEXT: %v3 = call i1 @cond()
+; CHECK-NEXT: br i1 %v3, label %c.header, label %exit
+
+b.latch:
+ %v4 = call i1 @cond()
+ br i1 %v4, label %b.header, label %a.latch
+; CHECK: b.latch:
+; CHECK-NEXT: %v4 = call i1 @cond()
+; CHECK-NEXT: br i1 %v4, label %b.header, label %a.latch
+
+a.latch:
+ br label %a.header
+; CHECK: a.latch:
+; CHECK-NEXT: br label %a.header
+
+exit:
+ ret void
+; CHECK: exit:
+; CHECK-NEXT: ret void
+}
+
+; This test is designed to exercise checking multiple remaining exits from the
+; loop being unswitched.
+; Unswitch will transform the loop nest from:
+; A < B < C < D
+; into
+; A < B < (C, D)
+define void @hoist_inner_loop4() {
+; CHECK-LABEL: define void @hoist_inner_loop4(
+entry:
+ br label %a.header
+; CHECK: entry:
+; CHECK-NEXT: br label %a.header
+
+a.header:
+ br label %b.header
+; CHECK: a.header:
+; CHECK-NEXT: br label %b.header
+
+b.header:
+ br label %c.header
+; CHECK: b.header:
+; CHECK-NEXT: br label %c.header
+
+c.header:
+ %v1 = call i1 @cond()
+ br label %d.header
+; CHECK: c.header:
+; CHECK-NEXT: %v1 = call i1 @cond()
+; CHECK-NEXT: br i1 %v1, label %[[C_HEADER_SPLIT:.*]], label %c.latch
+;
+; CHECK: [[C_HEADER_SPLIT]]:
+; CHECK-NEXT: br label %d.header
+
+d.header:
+ br i1 %v1, label %d.exiting1, label %c.latch
+; CHECK: d.header:
+; CHECK-NEXT: br label %d.exiting1
+
+d.exiting1:
+ %v2 = call i1 @cond()
+ br i1 %v2, label %d.exiting2, label %a.latch
+; CHECK: d.exiting1:
+; CHECK-NEXT: %v2 = call i1 @cond()
+; CHECK-NEXT: br i1 %v2, label %d.exiting2, label %a.latch
+
+d.exiting2:
+ %v3 = call i1 @cond()
+ br i1 %v3, label %d.exiting3, label %loopexit.d
+; CHECK: d.exiting2:
+; CHECK-NEXT: %v3 = call i1 @cond()
+; CHECK-NEXT: br i1 %v3, label %d.exiting3, label %loopexit.d
+
+d.exiting3:
+ %v4 = call i1 @cond()
+ br i1 %v4, label %d.latch, label %b.latch
+; CHECK: d.exiting3:
+; CHECK-NEXT: %v4 = call i1 @cond()
+; CHECK-NEXT: br i1 %v4, label %d.latch, label %b.latch
+
+d.latch:
+ br label %d.header
+; CHECK: d.latch:
+; CHECK-NEXT: br label %d.header
+
+c.latch:
+ %v5 = call i1 @cond()
+ br i1 %v5, label %c.header, label %loopexit.c
+; CHECK: c.latch:
+; CHECK-NEXT: %v5 = call i1 @cond()
+; CHECK-NEXT: br i1 %v5, label %c.header, label %loopexit.c
+
+b.latch:
+ br label %b.header
+; CHECK: b.latch:
+; CHECK-NEXT: br label %b.header
+
+a.latch:
+ br label %a.header
+; CHECK: a.latch:
+; CHECK-NEXT: br label %a.header
+
+loopexit.d:
+ br label %exit
+; CHECK: loopexit.d:
+; CHECK-NEXT: br label %exit
+
+loopexit.c:
+ br label %exit
+; CHECK: loopexit.c:
+; CHECK-NEXT: br label %exit
+
+exit:
+ ret void
+; CHECK: exit:
+; CHECK-NEXT: ret void
+}
+
+; Unswitch will transform the loop nest from:
+; A < B < C < D
+; into
+; A < ((B < C), D)
+define void @hoist_inner_loop5(i32* %ptr) {
+; CHECK-LABEL: define void @hoist_inner_loop5(
+entry:
+ br label %a.header
+; CHECK: entry:
+; CHECK-NEXT: br label %a.header
+
+a.header:
+ %x.a = load i32, i32* %ptr
+ br label %b.header
+; CHECK: a.header:
+; CHECK-NEXT: %x.a = load i32, i32* %ptr
+; CHECK-NEXT: br label %b.header
+
+b.header:
+ %x.b = load i32, i32* %ptr
+ br label %c.header
+; CHECK: b.header:
+; CHECK-NEXT: %x.b = load i32, i32* %ptr
+; CHECK-NEXT: br label %c.header
+
+c.header:
+ %x.c = load i32, i32* %ptr
+ %v1 = call i1 @cond()
+ br label %d.header
+; CHECK: c.header:
+; CHECK-NEXT: %x.c = load i32, i32* %ptr
+; CHECK-NEXT: %v1 = call i1 @cond()
+; CHECK-NEXT: br i1 %v1, label %c.latch, label %[[C_HEADER_SPLIT:.*]]
+;
+; CHECK: [[C_HEADER_SPLIT]]:
+; CHECK-NEXT: %[[X_B_LCSSA:.*]] = phi i32 [ %x.b, %c.header ]
+; CHECK-NEXT: %[[X_C_LCSSA:.*]] = phi i32 [ %x.c, %c.header ]
+; CHECK-NEXT: br label %d.header
+
+d.header:
+ br i1 %v1, label %c.latch, label %d.latch
+; CHECK: d.header:
+; CHECK-NEXT: br label %d.latch
+
+d.latch:
+ ; Use values from other loops to check LCSSA form.
+ store i32 %x.a, i32* %ptr
+ store i32 %x.b, i32* %ptr
+ store i32 %x.c, i32* %ptr
+ %v2 = call i1 @cond()
+ br i1 %v2, label %d.header, label %a.latch
+; CHECK: d.latch:
+; CHECK-NEXT: store i32 %x.a, i32* %ptr
+; CHECK-NEXT: store i32 %[[X_B_LCSSA]], i32* %ptr
+; CHECK-NEXT: store i32 %[[X_C_LCSSA]], i32* %ptr
+; CHECK-NEXT: %v2 = call i1 @cond()
+; CHECK-NEXT: br i1 %v2, label %d.header, label %a.latch
+
+c.latch:
+ %v3 = call i1 @cond()
+ br i1 %v3, label %c.header, label %b.latch
+; CHECK: c.latch:
+; CHECK-NEXT: %v3 = call i1 @cond()
+; CHECK-NEXT: br i1 %v3, label %c.header, label %b.latch
+
+b.latch:
+ br label %b.header
+; CHECK: b.latch:
+; CHECK-NEXT: br label %b.header
+
+a.latch:
+ br label %a.header
+; CHECK: a.latch:
+; CHECK-NEXT: br label %a.header
+
+exit:
+ ret void
+; CHECK: exit:
+; CHECK-NEXT: ret void
+}
+
+; Same as `@hoist_inner_loop2` but using a switch.
+; Unswitch will transform the loop nest from:
+; A < B < C
+; into
+; (A < B), C
+define void @hoist_inner_loop_switch(i32* %ptr) {
+; CHECK-LABEL: define void @hoist_inner_loop_switch(
+entry:
+ br label %a.header
+; CHECK: entry:
+; CHECK-NEXT: br label %a.header
+
+a.header:
+ %x.a = load i32, i32* %ptr
+ br label %b.header
+; CHECK: a.header:
+; CHECK-NEXT: %x.a = load i32, i32* %ptr
+; CHECK-NEXT: br label %b.header
+
+b.header:
+ %x.b = load i32, i32* %ptr
+ %v1 = call i32 @cond.i32()
+ br label %c.header
+; CHECK: b.header:
+; CHECK-NEXT: %x.b = load i32, i32* %ptr
+; CHECK-NEXT: %v1 = call i32 @cond.i32()
+; CHECK-NEXT: switch i32 %v1, label %[[B_HEADER_SPLIT:.*]] [
+; CHECK-NEXT: i32 1, label %b.latch
+; CHECK-NEXT: i32 2, label %b.latch
+; CHECK-NEXT: i32 3, label %b.latch
+; CHECK-NEXT: ]
+;
+; CHECK: [[B_HEADER_SPLIT]]:
+; CHECK-NEXT: %[[X_A_LCSSA:.*]] = phi i32 [ %x.a, %b.header ]
+; CHECK-NEXT: %[[X_B_LCSSA:.*]] = phi i32 [ %x.b, %b.header ]
+; CHECK-NEXT: br label %c.header
+
+c.header:
+ switch i32 %v1, label %c.latch [
+ i32 1, label %b.latch
+ i32 2, label %b.latch
+ i32 3, label %b.latch
+ ]
+; CHECK: c.header:
+; CHECK-NEXT: br label %c.latch
+
+c.latch:
+ ; Use values from other loops to check LCSSA form.
+ store i32 %x.a, i32* %ptr
+ store i32 %x.b, i32* %ptr
+ %v2 = call i1 @cond()
+ br i1 %v2, label %c.header, label %exit
+; CHECK: c.latch:
+; CHECK-NEXT: store i32 %[[X_A_LCSSA]], i32* %ptr
+; CHECK-NEXT: store i32 %[[X_B_LCSSA]], i32* %ptr
+; CHECK-NEXT: %v2 = call i1 @cond()
+; CHECK-NEXT: br i1 %v2, label %c.header, label %exit
+
+b.latch:
+ %v3 = call i1 @cond()
+ br i1 %v3, label %b.header, label %a.latch
+; CHECK: b.latch:
+; CHECK-NEXT: %v3 = call i1 @cond()
+; CHECK-NEXT: br i1 %v3, label %b.header, label %a.latch
+
+a.latch:
+ br label %a.header
+; CHECK: a.latch:
+; CHECK-NEXT: br label %a.header
+
+exit:
+ ret void
+; CHECK: exit:
+; CHECK-NEXT: ret void
+}
+
+define void @test_unswitch_to_common_succ_with_phis(i32* %var, i32 %cond) {
+; CHECK-LABEL: @test_unswitch_to_common_succ_with_phis(
+entry:
+ br label %header
+; CHECK-NEXT: entry:
+; CHECK-NEXT: switch i32 %cond, label %loopexit1 [
+; CHECK-NEXT: i32 13, label %loopexit2
+; CHECK-NEXT: i32 0, label %entry.split
+; CHECK-NEXT: i32 1, label %entry.split
+; CHECK-NEXT: ]
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br label %header
+
+header:
+ %var_val = load i32, i32* %var
+ switch i32 %cond, label %loopexit1 [
+ i32 0, label %latch
+ i32 1, label %latch
+ i32 13, label %loopexit2
+ ]
+; CHECK: header:
+; CHECK-NEXT: load
+; CHECK-NEXT: br label %latch
+
+latch:
+ ; No-op PHI node to exercise weird PHI update scenarios.
+ %phi = phi i32 [ %var_val, %header ], [ %var_val, %header ]
+ call void @sink(i32 %phi)
+ br label %header
+; CHECK: latch:
+; CHECK-NEXT: %[[PHI:.*]] = phi i32 [ %var_val, %header ]
+; CHECK-NEXT: call void @sink(i32 %[[PHI]])
+; CHECK-NEXT: br label %header
+
+loopexit1:
+ ret void
+; CHECK: loopexit1:
+; CHECK-NEXT: ret
+
+loopexit2:
+ ret void
+; CHECK: loopexit2:
+; CHECK-NEXT: ret
+}
+
+define void @test_unswitch_to_default_common_succ_with_phis(i32* %var, i32 %cond) {
+; CHECK-LABEL: @test_unswitch_to_default_common_succ_with_phis(
+entry:
+ br label %header
+; CHECK-NEXT: entry:
+; CHECK-NEXT: switch i32 %cond, label %entry.split [
+; CHECK-NEXT: i32 13, label %loopexit
+; CHECK-NEXT: ]
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br label %header
+
+header:
+ %var_val = load i32, i32* %var
+ switch i32 %cond, label %latch [
+ i32 0, label %latch
+ i32 1, label %latch
+ i32 13, label %loopexit
+ ]
+; CHECK: header:
+; CHECK-NEXT: load
+; CHECK-NEXT: br label %latch
+
+latch:
+ ; No-op PHI node to exercise weird PHI update scenarios.
+ %phi = phi i32 [ %var_val, %header ], [ %var_val, %header ], [ %var_val, %header ]
+ call void @sink(i32 %phi)
+ br label %header
+; CHECK: latch:
+; CHECK-NEXT: %[[PHI:.*]] = phi i32 [ %var_val, %header ]
+; CHECK-NEXT: call void @sink(i32 %[[PHI]])
+; CHECK-NEXT: br label %header
+
+loopexit:
+ ret void
+; CHECK: loopexit:
+; CHECK-NEXT: ret
+}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/update-scev.ll b/llvm/test/Transforms/SimpleLoopUnswitch/update-scev.ll
new file mode 100644
index 00000000000..1f4c1421b4e
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/update-scev.ll
@@ -0,0 +1,187 @@
+; RUN: opt -passes='print<scalar-evolution>,loop(unswitch,loop-instsimplify),print<scalar-evolution>' -enable-nontrivial-unswitch -S < %s 2>%t.scev | FileCheck %s
+; RUN: opt -enable-mssa-loop-dependency=true -verify-memoryssa -passes='print<scalar-evolution>,loop(unswitch,loop-instsimplify),print<scalar-evolution>' -enable-nontrivial-unswitch -S < %s 2>%t.scev | FileCheck %s
+; RUN: FileCheck %s --check-prefix=SCEV < %t.scev
+
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @f()
+
+; Check that trivially unswitching an inner loop resets both the inner and outer
+; loop trip count.
+define void @test1(i32 %n, i32 %m, i1 %cond) {
+; Check that SCEV has no trip count before unswitching.
+; SCEV-LABEL: Determining loop execution counts for: @test1
+; SCEV: Loop %inner_loop_begin: <multiple exits> Unpredictable backedge-taken count.
+; SCEV: Loop %outer_loop_begin: Unpredictable backedge-taken count.
+;
+; Now check that after unswitching and simplifying instructions we get clean
+; backedge-taken counts.
+; SCEV-LABEL: Determining loop execution counts for: @test1
+; SCEV: Loop %inner_loop_begin: backedge-taken count is (-1 + (1 smax %m))<nsw>
+; SCEV: Loop %outer_loop_begin: backedge-taken count is (-1 + (1 smax %n))<nsw>
+;
+; And verify the code matches what we expect.
+; CHECK-LABEL: define void @test1(
+entry:
+ br label %outer_loop_begin
+; Ensure the outer loop didn't get unswitched.
+; CHECK: entry:
+; CHECK-NEXT: br label %outer_loop_begin
+
+outer_loop_begin:
+ %i = phi i32 [ %i.next, %outer_loop_latch ], [ 0, %entry ]
+ ; Block unswitching of the outer loop with a noduplicate call.
+ call void @f() noduplicate
+ br label %inner_loop_begin
+; Ensure the inner loop got unswitched into the outer loop.
+; CHECK: outer_loop_begin:
+; CHECK-NEXT: %{{.*}} = phi i32
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: br i1 %cond,
+
+inner_loop_begin:
+ %j = phi i32 [ %j.next, %inner_loop_latch ], [ 0, %outer_loop_begin ]
+ br i1 %cond, label %inner_loop_latch, label %inner_loop_early_exit
+
+inner_loop_latch:
+ %j.next = add nsw i32 %j, 1
+ %j.cmp = icmp slt i32 %j.next, %m
+ br i1 %j.cmp, label %inner_loop_begin, label %inner_loop_late_exit
+
+inner_loop_early_exit:
+ %j.lcssa = phi i32 [ %i, %inner_loop_begin ]
+ br label %outer_loop_latch
+
+inner_loop_late_exit:
+ br label %outer_loop_latch
+
+outer_loop_latch:
+ %i.phi = phi i32 [ %j.lcssa, %inner_loop_early_exit ], [ %i, %inner_loop_late_exit ]
+ %i.next = add nsw i32 %i.phi, 1
+ %i.cmp = icmp slt i32 %i.next, %n
+ br i1 %i.cmp, label %outer_loop_begin, label %exit
+
+exit:
+ ret void
+}
+
+; Check that trivially unswitching an inner loop resets both the inner and outer
+; loop trip count.
+define void @test2(i32 %n, i32 %m, i32 %cond) {
+; Check that SCEV has no trip count before unswitching.
+; SCEV-LABEL: Determining loop execution counts for: @test2
+; SCEV: Loop %inner_loop_begin: <multiple exits> Unpredictable backedge-taken count.
+; SCEV: Loop %outer_loop_begin: Unpredictable backedge-taken count.
+;
+; Now check that after unswitching and simplifying instructions we get clean
+; backedge-taken counts.
+; SCEV-LABEL: Determining loop execution counts for: @test2
+; SCEV: Loop %inner_loop_begin: backedge-taken count is (-1 + (1 smax %m))<nsw>
+; SCEV: Loop %outer_loop_begin: backedge-taken count is (-1 + (1 smax %n))<nsw>
+;
+; CHECK-LABEL: define void @test2(
+entry:
+ br label %outer_loop_begin
+; Ensure the outer loop didn't get unswitched.
+; CHECK: entry:
+; CHECK-NEXT: br label %outer_loop_begin
+
+outer_loop_begin:
+ %i = phi i32 [ %i.next, %outer_loop_latch ], [ 0, %entry ]
+ ; Block unswitching of the outer loop with a noduplicate call.
+ call void @f() noduplicate
+ br label %inner_loop_begin
+; Ensure the inner loop got unswitched into the outer loop.
+; CHECK: outer_loop_begin:
+; CHECK-NEXT: %{{.*}} = phi i32
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: switch i32 %cond,
+
+inner_loop_begin:
+ %j = phi i32 [ %j.next, %inner_loop_latch ], [ 0, %outer_loop_begin ]
+ switch i32 %cond, label %inner_loop_early_exit [
+ i32 1, label %inner_loop_latch
+ i32 2, label %inner_loop_latch
+ ]
+
+inner_loop_latch:
+ %j.next = add nsw i32 %j, 1
+ %j.cmp = icmp slt i32 %j.next, %m
+ br i1 %j.cmp, label %inner_loop_begin, label %inner_loop_late_exit
+
+inner_loop_early_exit:
+ %j.lcssa = phi i32 [ %i, %inner_loop_begin ]
+ br label %outer_loop_latch
+
+inner_loop_late_exit:
+ br label %outer_loop_latch
+
+outer_loop_latch:
+ %i.phi = phi i32 [ %j.lcssa, %inner_loop_early_exit ], [ %i, %inner_loop_late_exit ]
+ %i.next = add nsw i32 %i.phi, 1
+ %i.cmp = icmp slt i32 %i.next, %n
+ br i1 %i.cmp, label %outer_loop_begin, label %exit
+
+exit:
+ ret void
+}
+
+; Check that non-trivial unswitching of a branch in an inner loop into the outer
+; loop invalidates both inner and outer.
+define void @test3(i32 %n, i32 %m, i1 %cond) {
+; Check that SCEV has no trip count before unswitching.
+; SCEV-LABEL: Determining loop execution counts for: @test3
+; SCEV: Loop %inner_loop_begin: <multiple exits> Unpredictable backedge-taken count.
+; SCEV: Loop %outer_loop_begin: Unpredictable backedge-taken count.
+;
+; Now check that after unswitching and simplifying instructions we get clean
+; backedge-taken counts.
+; SCEV-LABEL: Determining loop execution counts for: @test3
+; SCEV: Loop %inner_loop_begin{{.*}}: backedge-taken count is (-1 + (1 smax %m))<nsw>
+; SCEV: Loop %outer_loop_begin: backedge-taken count is (-1 + (1 smax %n))<nsw>
+;
+; And verify the code matches what we expect.
+; CHECK-LABEL: define void @test3(
+entry:
+ br label %outer_loop_begin
+; Ensure the outer loop didn't get unswitched.
+; CHECK: entry:
+; CHECK-NEXT: br label %outer_loop_begin
+
+outer_loop_begin:
+ %i = phi i32 [ %i.next, %outer_loop_latch ], [ 0, %entry ]
+ ; Block unswitching of the outer loop with a noduplicate call.
+ call void @f() noduplicate
+ br label %inner_loop_begin
+; Ensure the inner loop got unswitched into the outer loop.
+; CHECK: outer_loop_begin:
+; CHECK-NEXT: %{{.*}} = phi i32
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: br i1 %cond,
+
+inner_loop_begin:
+ %j = phi i32 [ %j.next, %inner_loop_latch ], [ 0, %outer_loop_begin ]
+ %j.tmp = add nsw i32 %j, 1
+ br i1 %cond, label %inner_loop_latch, label %inner_loop_early_exit
+
+inner_loop_latch:
+ %j.next = add nsw i32 %j, 1
+ %j.cmp = icmp slt i32 %j.next, %m
+ br i1 %j.cmp, label %inner_loop_begin, label %inner_loop_late_exit
+
+inner_loop_early_exit:
+ %j.lcssa = phi i32 [ %j.tmp, %inner_loop_begin ]
+ br label %outer_loop_latch
+
+inner_loop_late_exit:
+ br label %outer_loop_latch
+
+outer_loop_latch:
+ %inc.phi = phi i32 [ %j.lcssa, %inner_loop_early_exit ], [ 1, %inner_loop_late_exit ]
+ %i.next = add nsw i32 %i, %inc.phi
+ %i.cmp = icmp slt i32 %i.next, %n
+ br i1 %i.cmp, label %outer_loop_begin, label %exit
+
+exit:
+ ret void
+}
OpenPOWER on IntegriCloud