3 files changed, 104 insertions, 10 deletions
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index a397046315b..26dd4dd5fcf 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -784,6 +784,13 @@ namespace llvm {
     /// disconnect it from a def-use chain linking it to a loop.
     void forgetValue(Value *V);
 
+    /// \brief Called when the client has changed the disposition of values in
+    /// this loop.
+    ///
+    /// We don't have a way to invalidate per-loop dispositions. Clear and
+    /// recompute is simpler.
+    void forgetLoopDispositions(const Loop *L) { LoopDispositions.clear(); }
+
     /// GetMinTrailingZeros - Determine the minimum number of zero bits that S
     /// is guaranteed to end in (at every loop iteration).  It is, at the same
     /// time, the minimum number of times S is divisible by 2.  For example,
diff --git a/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/llvm/lib/Transforms/Utils/LoopSimplify.cpp
index 6d5f16ca333..1927d288d43 100644
--- a/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -309,6 +309,7 @@ ReprocessLoop:
       // Attempt to hoist out all instructions except for the
       // comparison and the branch.
       bool AllInvariant = true;
+      bool AnyInvariant = false;
       for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) {
         Instruction *Inst = I++;
         // Skip debug info intrinsics.
@@ -316,12 +317,26 @@ ReprocessLoop:
           continue;
         if (Inst == CI)
           continue;
-        if (!L->makeLoopInvariant(Inst, Changed,
-                                  Preheader ? Preheader->getTerminator() : 0)) {
+        if (!L->makeLoopInvariant(Inst, AnyInvariant,
+                                 Preheader ? Preheader->getTerminator() : 0)) {
           AllInvariant = false;
           break;
         }
       }
+      if (AnyInvariant) {
+        Changed = true;
+        // If any reachable control flow within this loop has changed, notify
+        // ScalarEvolution. Currently assume the parent loop doesn't change
+        // (spliting edges doesn't count). If blocks, CFG edges, or other values
+        // in the parent loop change, then we need call to forgetLoop() for the
+        // parent instead.
+        if (SE) {
+          SE->forgetLoop(L);
+          // The loop disposition of all SCEV expressions that depend on any
+          // hoisted values have also changed.
+          SE->forgetLoopDispositions(L);
+        }
+      }
       if (!AllInvariant) continue;
 
       // The block has now been cleared of all instructions except for
@@ -334,14 +349,6 @@ ReprocessLoop:
       DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "
                    << ExitingBlock->getName() << "\n");
 
-      // If any reachable control flow within this loop has changed, notify
-      // ScalarEvolution. Currently assume the parent loop doesn't change
-      // (spliting edges doesn't count). If blocks, CFG edges, or other values
-      // in the parent loop change, then we need call to forgetLoop() for the
-      // parent instead.
-      if (SE)
-        SE->forgetLoop(L);
-
       assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
       Changed = true;
       LI->removeBlock(ExitingBlock);
diff --git a/llvm/test/Transforms/LoopSimplify/ashr-crash.ll b/llvm/test/Transforms/LoopSimplify/ashr-crash.ll
new file mode 100644
index 00000000000..f736eefc9b3
--- /dev/null
+++ b/llvm/test/Transforms/LoopSimplify/ashr-crash.ll
@@ -0,0 +1,80 @@
+; RUN: opt -basicaa -loop-rotate -licm -instcombine -indvars -loop-unroll -S %s | FileCheck %s
+;
+; PR18361: ScalarEvolution::getAddRecExpr():
+;          Assertion `isLoopInvariant(Operands[i],...
+;
+; After a series of loop optimizations, SCEV's LoopDispositions grow stale.
+; In particular, LoopSimplify hoists %cmp4, resulting in this SCEV for %add:
+; {(zext i1 %cmp4 to i32),+,1}<nw><%for.cond1.preheader>
+;
+; When recomputing the SCEV for %ashr, we truncate the operands to get:
+; (zext i1 %cmp4 to i16)
+;
+; This SCEV was never mapped to a value so never invalidated. It's
+; loop disposition is still marked as non-loop-invariant, which is
+; inconsistent with the AddRec.
+
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx"
+
+@d = common global i32 0, align 4
+@a = common global i32 0, align 4
+@c = common global i32 0, align 4
+@b = common global i32 0, align 4
+
+; Check that the def-use chain that leads to the bad SCEV is still
+; there, and part of it is hoisted to the entry block.
+;
+; CHECK-LABEL: @foo
+; CHECK-LABEL: entry:
+; CHECK: %cmp4
+; CHECK-LABEL: for.cond1.preheader:
+; CHECK-LABEL: for.body3:
+; CHECK: %1 = zext i1 %cmp4 to i32
+; CHECK: %xor = xor i32 %1, 1
+define void @foo() {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc7, %entry
+  %storemerge = phi i32 [ 0, %entry ], [ %inc8, %for.inc7 ]
+  %f.0 = phi i32 [ undef, %entry ], [ %f.1, %for.inc7 ]
+  store i32 %storemerge, i32* @d, align 4
+  %cmp = icmp slt i32 %storemerge, 1
+  br i1 %cmp, label %for.cond1, label %for.end9
+
+for.cond1:                                        ; preds = %for.cond, %for.body3
+  %storemerge1 = phi i32 [ %inc, %for.body3 ], [ 0, %for.cond ]
+  %f.1 = phi i32 [ %xor, %for.body3 ], [ %f.0, %for.cond ]
+  store i32 %storemerge1, i32* @a, align 4
+  %cmp2 = icmp slt i32 %storemerge1, 1
+  br i1 %cmp2, label %for.body3, label %for.inc7
+
+for.body3:                                        ; preds = %for.cond1
+  %0 = load i32* @c, align 4
+  %cmp4 = icmp sge i32 %storemerge1, %0
+  %conv = zext i1 %cmp4 to i32
+  %1 = load i32* @d, align 4
+  %add = add nsw i32 %conv, %1
+  %sext = shl i32 %add, 16
+  %conv6 = ashr exact i32 %sext, 16
+  %xor = xor i32 %conv6, 1
+  %inc = add nsw i32 %storemerge1, 1
+  br label %for.cond1
+
+for.inc7:                                         ; preds = %for.cond1
+  %2 = load i32* @d, align 4
+  %inc8 = add nsw i32 %2, 1
+  br label %for.cond
+
+for.end9:                                         ; preds = %for.cond
+  %cmp10 = icmp sgt i32 %f.0, 0
+  br i1 %cmp10, label %if.then, label %if.end
+
+if.then:                                          ; preds = %for.end9
+  store i32 0, i32* @b, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %for.end9
+  ret void
+}