17 files changed, 174 insertions, 1 deletions
diff --git a/llvm/test/Transforms/LoopUnroll/basic.ll b/llvm/test/Transforms/LoopUnroll/basic.ll
index 2bfd3e6de8f..e965f2a19c0 100644
--- a/llvm/test/Transforms/LoopUnroll/basic.ll
+++ b/llvm/test/Transforms/LoopUnroll/basic.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -loop-unroll -S | FileCheck %s
+; RUN: opt < %s -passes='require<opt-remark-emit>,loop(unroll)' -S | FileCheck %s
 
 
 ; This should not unroll since the address of the loop header is taken.
diff --git a/llvm/test/Transforms/LoopUnroll/full-unroll-bad-cost.ll b/llvm/test/Transforms/LoopUnroll/full-unroll-bad-cost.ll
index e5694fbeb0c..9b6da8b6203 100644
--- a/llvm/test/Transforms/LoopUnroll/full-unroll-bad-cost.ll
+++ b/llvm/test/Transforms/LoopUnroll/full-unroll-bad-cost.ll
@@ -1,4 +1,5 @@
 ; RUN: opt -S -loop-unroll < %s | FileCheck %s
+; RUN: opt < %s -passes='require<opt-remark-emit>,loop(unroll)' -S | FileCheck %s
 
 ; LLVM should not try to fully unroll this loop.
 
diff --git a/llvm/test/Transforms/LoopUnroll/full-unroll-crashers.ll b/llvm/test/Transforms/LoopUnroll/full-unroll-crashers.ll
index 9f1529139de..64a01a74359 100644
--- a/llvm/test/Transforms/LoopUnroll/full-unroll-crashers.ll
+++ b/llvm/test/Transforms/LoopUnroll/full-unroll-crashers.ll
@@ -1,5 +1,6 @@
 ; Check that we don't crash on corner cases.
 ; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=1 -unroll-max-percent-threshold-boost=200 -o /dev/null
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=1 -unroll-max-percent-threshold-boost=200 -o /dev/null
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 @known_constant = internal unnamed_addr constant [10 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 16
diff --git a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll
index 26124fb32ca..b83913b1818 100644
--- a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll
+++ b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 @unknown_global = internal unnamed_addr global [9 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 5, i32 -1, i32 0, i32 -1, i32 0], align 16
diff --git a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll
index 8bddb1b225c..31b91972fdc 100644
--- a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll
+++ b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 @known_constant = internal unnamed_addr constant [10 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 16
diff --git a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-dce.ll b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-dce.ll
index 83c105ca23f..5a3123df507 100644
--- a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-dce.ll
+++ b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-dce.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=12 -unroll-max-percent-threshold-boost=400 | FileCheck %s
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=12 -unroll-max-percent-threshold-boost=400 | FileCheck %s
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 @known_constant = internal unnamed_addr constant [10 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0], align 16
diff --git a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-geps.ll b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-geps.ll
index 230912538d2..3e5fe1d889e 100644
--- a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-geps.ll
+++ b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-geps.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 ; When examining gep-instructions we shouldn't consider them simplified if the
diff --git a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll
index a1fab3cc71e..bfcd9208d8c 100644
--- a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll
+++ b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 define i64 @propagate_loop_phis() {
diff --git a/llvm/test/Transforms/LoopUnroll/full-unroll-keep-first-exit.ll b/llvm/test/Transforms/LoopUnroll/full-unroll-keep-first-exit.ll
index e70ff4156d3..27cef367c37 100644
--- a/llvm/test/Transforms/LoopUnroll/full-unroll-keep-first-exit.ll
+++ b/llvm/test/Transforms/LoopUnroll/full-unroll-keep-first-exit.ll
@@ -1,4 +1,5 @@
 ; RUN: opt -S -loop-unroll < %s | FileCheck %s
+; RUN: opt -S -passes='require<opt-remark-emit>,loop(unroll)' < %s | FileCheck %s
 
 ; Unroll twice, with first loop exit kept
 ; CHECK-LABEL: @s32_max1
diff --git a/llvm/test/Transforms/LoopUnroll/revisit.ll b/llvm/test/Transforms/LoopUnroll/revisit.ll
new file mode 100644
index 00000000000..18ae3658385
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/revisit.ll
@@ -0,0 +1,150 @@
+; This test checks that nested loops are revisited in various scenarios when
+; unrolling. Note that if we ever start doing outer loop peeling a test case
+; for that should be added here that will look essentially like a hybrid of the
+; current two cases.
+;
+; RUN: opt < %s -disable-output -debug-pass-manager 2>&1 \
+; RUN:     -passes='require<opt-remark-emit>,loop(unroll)' \
+; RUN:     | FileCheck %s
+;
+; Also run in a special mode that visits children.
+; RUN: opt < %s -disable-output -debug-pass-manager -unroll-revisit-child-loops 2>&1 \
+; RUN:     -passes='require<opt-remark-emit>,loop(unroll)' \
+; RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-CHILDREN
+
+; Basic test is fully unrolled and we revisit the post-unroll new sibling
+; loops, including the ones that used to be child loops.
+define void @full_unroll(i1* %ptr) {
+; CHECK-LABEL: FunctionToLoopPassAdaptor{{.*}} on full_unroll
+; CHECK-NOT: LoopUnrollPass
+
+entry:
+  br label %l0
+
+l0:
+  %cond.0 = load volatile i1, i1* %ptr
+  br i1 %cond.0, label %l0.0.ph, label %exit
+
+l0.0.ph:
+  br label %l0.0
+
+l0.0:
+  %iv = phi i32 [ %iv.next, %l0.0.latch ], [ 0, %l0.0.ph ]
+  %iv.next = add i32 %iv, 1
+  br label %l0.0.0.ph
+
+l0.0.0.ph:
+  br label %l0.0.0
+
+l0.0.0:
+  %cond.0.0.0 = load volatile i1, i1* %ptr
+  br i1 %cond.0.0.0, label %l0.0.0, label %l0.0.1.ph
+; CHECK: LoopUnrollPass on Loop at depth 3 containing: %l0.0.0<header>
+; CHECK-NOT: LoopUnrollPass
+
+l0.0.1.ph:
+  br label %l0.0.1
+
+l0.0.1:
+  %cond.0.0.1 = load volatile i1, i1* %ptr
+  br i1 %cond.0.0.1, label %l0.0.1, label %l0.0.latch
+; CHECK: LoopUnrollPass on Loop at depth 3 containing: %l0.0.1<header>
+; CHECK-NOT: LoopUnrollPass
+
+l0.0.latch:
+  %cmp = icmp slt i32 %iv.next, 2
+  br i1 %cmp, label %l0.0, label %l0.latch
+; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0
+; CHECK-NOT: LoopUnrollPass
+;
+; Unrolling occurs, so we visit what were the inner loops twice over. First we
+; visit their clones, and then we visit the original loops re-parented.
+; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.1.1<header>
+; CHECK-NOT: LoopUnrollPass
+; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.0.1<header>
+; CHECK-NOT: LoopUnrollPass
+; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.1<header>
+; CHECK-NOT: LoopUnrollPass
+; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.0<header>
+; CHECK-NOT: LoopUnrollPass
+
+l0.latch:
+  br label %l0
+; CHECK: LoopUnrollPass on Loop at depth 1 containing: %l0<header>
+; CHECK-NOT: LoopUnrollPass
+
+exit:
+  ret void
+}
+
+; Now we test forced runtime partial unrolling with metadata. Here we end up
+; duplicating child loops without changing their structure and so they aren't by
+; default visited, but will be visited with a special parameter.
+define void @partial_unroll(i32 %count, i1* %ptr) {
+; CHECK-LABEL: FunctionToLoopPassAdaptor{{.*}} on partial_unroll
+; CHECK-NOT: LoopUnrollPass
+
+entry:
+  br label %l0
+
+l0:
+  %cond.0 = load volatile i1, i1* %ptr
+  br i1 %cond.0, label %l0.0.ph, label %exit
+
+l0.0.ph:
+  br label %l0.0
+
+l0.0:
+  %iv = phi i32 [ %iv.next, %l0.0.latch ], [ 0, %l0.0.ph ]
+  %iv.next = add i32 %iv, 1
+  br label %l0.0.0.ph
+
+l0.0.0.ph:
+  br label %l0.0.0
+
+l0.0.0:
+  %cond.0.0.0 = load volatile i1, i1* %ptr
+  br i1 %cond.0.0.0, label %l0.0.0, label %l0.0.1.ph
+; CHECK: LoopUnrollPass on Loop at depth 3 containing: %l0.0.0<header>
+; CHECK-NOT: LoopUnrollPass
+
+l0.0.1.ph:
+  br label %l0.0.1
+
+l0.0.1:
+  %cond.0.0.1 = load volatile i1, i1* %ptr
+  br i1 %cond.0.0.1, label %l0.0.1, label %l0.0.latch
+; CHECK: LoopUnrollPass on Loop at depth 3 containing: %l0.0.1<header>
+; CHECK-NOT: LoopUnrollPass
+
+l0.0.latch:
+  %cmp = icmp slt i32 %iv.next, %count
+  br i1 %cmp, label %l0.0, label %l0.latch, !llvm.loop !1
+; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0
+; CHECK-NOT: LoopUnrollPass
+;
+; Partial unrolling occurs which introduces new child loops but not new sibling
+; loops. We only visit the child loops in a special mode, not by default.
+; CHECK-CHILDREN: LoopUnrollPass on Loop at depth 3 containing: %l0.0.0<header>
+; CHECK-CHILDREN-NOT: LoopUnrollPass
+; CHECK-CHILDREN: LoopUnrollPass on Loop at depth 3 containing: %l0.0.1<header>
+; CHECK-CHILDREN-NOT: LoopUnrollPass
+; CHECK-CHILDREN: LoopUnrollPass on Loop at depth 3 containing: %l0.0.0.1<header>
+; CHECK-CHILDREN-NOT: LoopUnrollPass
+; CHECK-CHILDREN: LoopUnrollPass on Loop at depth 3 containing: %l0.0.1.1<header>
+; CHECK-CHILDREN-NOT: LoopUnrollPass
+;
+; When we revisit children, we also revisit the current loop.
+; CHECK-CHILDREN: LoopUnrollPass on Loop at depth 2 containing: %l0.0<header>
+; CHECK-CHILDREN-NOT: LoopUnrollPass
+
+l0.latch:
+  br label %l0
+; CHECK: LoopUnrollPass on Loop at depth 1 containing: %l0<header>
+; CHECK-NOT: LoopUnrollPass
+
+exit:
+  ret void
+}
+!1 = !{!1, !2}
+!2 = !{!"llvm.loop.unroll.count", i32 2}
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop.ll
index b5299bb17f8..04661314eb1 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop.ll
@@ -1,6 +1,9 @@
 ; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true  | FileCheck %s -check-prefix=EPILOG
 ; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
 
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-runtime=true -unroll-runtime-epilog=true  | FileCheck %s -check-prefix=EPILOG
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
+
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
 ; Tests for unrolling loops with run-time trip counts
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop1.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop1.ll
index 5d7c6482478..d32c83571b5 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop1.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop1.ll
@@ -1,6 +1,9 @@
 ; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG
 ; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
 
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
+
 ; This tests that setting the unroll count works
 
 
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop2.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop2.ll
index 704ecca893a..7e7fb978713 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop2.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop2.ll
@@ -1,6 +1,9 @@
 ; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-partial-threshold=25 -unroll-runtime -unroll-runtime-epilog=true  -unroll-count=8 | FileCheck %s  -check-prefix=EPILOG
 ; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-partial-threshold=25 -unroll-runtime -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
 
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-threshold=25 -unroll-partial-threshold=25 -unroll-runtime -unroll-runtime-epilog=true  -unroll-count=8 | FileCheck %s  -check-prefix=EPILOG
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-threshold=25 -unroll-partial-threshold=25 -unroll-runtime -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
+
 ; Choose a smaller, power-of-two, unroll count if the loop is too large.
 ; This test makes sure we're not unrolling 'odd' counts
 
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop3.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop3.ll
index fd13ebfa0b8..ef39a29fa89 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop3.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop3.ll
@@ -1,5 +1,6 @@
 ; REQUIRES: asserts
 ; RUN: opt < %s -disable-output -stats -loop-unroll -unroll-runtime -unroll-threshold=400 -info-output-file - | FileCheck %s --check-prefix=STATS
+; RUN: opt < %s -disable-output -stats -passes='require<opt-remark-emit>,loop(unroll)' -unroll-runtime -unroll-threshold=400 -info-output-file - | FileCheck %s --check-prefix=STATS
 
 ; Test that nested loops can be unrolled.  We need to increase threshold to do it
 
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll
index e8d51775ce1..6340058411f 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll
@@ -1,6 +1,9 @@
 ; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-count=16 | FileCheck --check-prefix=UNROLL-16 %s
 ; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-count=4 | FileCheck --check-prefix=UNROLL-4 %s
 
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-runtime=true -unroll-count=16 | FileCheck --check-prefix=UNROLL-16 %s
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll)' -unroll-runtime=true -unroll-count=4 | FileCheck --check-prefix=UNROLL-4 %s
+
 ; Given that the trip-count of this loop is a 3-bit value, we cannot
 ; safely unroll it with a count of anything more than 8.
 
diff --git a/llvm/test/Transforms/LoopUnroll/unloop.ll b/llvm/test/Transforms/LoopUnroll/unloop.ll
index db7bad5322c..6af13a55d6b 100644
--- a/llvm/test/Transforms/LoopUnroll/unloop.ll
+++ b/llvm/test/Transforms/LoopUnroll/unloop.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -S -loop-unroll -verify-loop-info | FileCheck %s
-; RUN: opt < %s -S -passes='function(require<scalar-evolution>,require<targetir>,require<opt-remark-emit>,loop(unroll),verify<loops>)' | FileCheck %s
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll),verify<loops>' | FileCheck %s
 ;
 ; Unit tests for LoopInfo::markAsRemoved.
 
diff --git a/llvm/test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll b/llvm/test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll
index adbf47defe8..6748ebefa52 100644
--- a/llvm/test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll
+++ b/llvm/test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll
@@ -1,4 +1,5 @@
 ; RUN: opt -S < %s -loop-unroll -block-freq | FileCheck %s
+; RUN: opt -S < %s -passes='require<opt-remark-emit>,loop(unroll),require<block-freq>' | FileCheck %s
 ; Crasher from PR20987.
 
 ; CHECK: define void @update_loop_info_in_subloops