2 files changed, 54 insertions, 2 deletions
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 59408ffe7a7..2b432426b24 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -470,8 +470,9 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
                                       bool UseEpilogRemainder,
                                       LoopInfo *LI, ScalarEvolution *SE,
                                       DominatorTree *DT, bool PreserveLCSSA) {
-  // for now, only unroll loops that contain a single exit
-  if (!UnrollRuntimeMultiExit && !L->getExitingBlock())
+  bool hasMultipleExitingBlocks = !L->getExitingBlock();
+  // Support only single exiting block unless UnrollRuntimeMultiExit is true.
+  if (!UnrollRuntimeMultiExit && hasMultipleExitingBlocks)
     return false;
 
   // Make sure the loop is in canonical form.
@@ -516,6 +517,13 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
   // connectEpilog.
   if (!LatchExit->getSinglePredecessor())
     return false;
+  // FIXME: We bail out of multi-exit unrolling when epilog loop is generated
+  // and L is an inner loop. This is because in presence of multiple exits, the
+  // outer loop is incorrect: we do not add the EpilogPreheader and exit to the
+  // outer loop. This is automatically handled in the prolog case, so we do not
+  // have that bug in prolog generation.
+  if (hasMultipleExitingBlocks && UseEpilogRemainder && L->getParentLoop())
+    return false;
   // Use Scalar Evolution to compute the trip count. This allows more loops to
   // be unrolled than relying on induction var simplification.
   if (!SE)
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
index 18548c8af73..73672e14f78 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
@@ -434,3 +434,47 @@ loopexit1:                                             ; preds = %header
   %sext3 = phi i32 [ %shft, %header ]
   ret i32 %sext3
 }
+
+; Nested loop and inner loop is unrolled
+; FIXME: we cannot unroll with epilog remainder currently, because 
+; the outer loop does not contain the epilog preheader and epilog exit (while
+; infact it should). This causes us to choke up on LCSSA form being incorrect in
+; outer loop. However, the exit block where LCSSA fails, is infact still within
+; the outer loop. For now, we just bail out in presence of outer loop and epilog
+; loop is generated.
+; The outer loop header is the preheader for the inner loop and the inner header
+; branches back to the outer loop.
+define void @test8() {
+; EPILOG: test8(
+; EPILOG-NOT: niter
+
+; PROLOG: test8(
+; PROLOG: outerloop:
+; PROLOG-NEXT: phi i64 [ 3, %bb ], [ 0, %outerloop.loopexit ]
+; PROLOG:      %lcmp.mod = icmp eq i64
+; PROLOG-NEXT: br i1 %lcmp.mod, label %innerH.prol.loopexit, label %innerH.prol.preheader
+; PROLOG: latch.6:
+; PROLOG-NEXT: %tmp4.7 = add nsw i64 %tmp3, 8
+; PROLOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.7
+; PROLOG: latch.7
+; PROLOG-NEXT: %tmp6.7 = icmp ult i64 %tmp4.7, 100
+; PROLOG-NEXT: br i1 %tmp6.7, label %innerH, label %exit.unr-lcssa
+bb:
+  br label %outerloop
+
+outerloop:                                              ; preds = %innerH, %bb
+  %tmp = phi i64 [ 3, %bb ], [ 0, %innerH ]
+  br label %innerH
+
+innerH:                                              ; preds = %latch, %outerloop
+  %tmp3 = phi i64 [ %tmp4, %latch ], [ %tmp, %outerloop ]
+  %tmp4 = add nuw nsw i64 %tmp3, 1
+  br i1 false, label %outerloop, label %latch
+
+latch:                                              ; preds = %innerH
+  %tmp6 = icmp ult i64 %tmp4, 100
+  br i1 %tmp6, label %innerH, label %exit
+
+exit:                                              ; preds = %latch
+  ret void
+}