4 files changed, 101 insertions, 33 deletions
diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp
index bcadd4e2bee..a6fe51cc872 100644
--- a/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -163,7 +163,8 @@ namespace {
       AU.addPreserved<GlobalsAAWrapperPass>();
     }
   private:
-    void BuildRankMap(Function &F);
+    void BuildRankMap(Function &F, ReversePostOrderTraversal<Function *> &RPOT);
+
     unsigned getRank(Value *V);
     void canonicalizeOperands(Instruction *I);
     void ReassociateExpression(BinaryOperator *I);
@@ -246,7 +247,8 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1,
   return nullptr;
 }
 
-void Reassociate::BuildRankMap(Function &F) {
+void Reassociate::BuildRankMap(Function &F,
+                               ReversePostOrderTraversal<Function *> &RPOT) {
   unsigned i = 2;
 
   // Assign distinct ranks to function arguments.
@@ -255,7 +257,6 @@ void Reassociate::BuildRankMap(Function &F) {
     DEBUG(dbgs() << "Calculated Rank[" << I->getName() << "] = " << i << "\n");
   }
 
-  ReversePostOrderTraversal<Function*> RPOT(&F);
   for (ReversePostOrderTraversal<Function*>::rpo_iterator I = RPOT.begin(),
          E = RPOT.end(); I != E; ++I) {
     BasicBlock *BB = *I;
@@ -2259,13 +2260,28 @@ bool Reassociate::runOnFunction(Function &F) {
   if (skipOptnoneFunction(F))
     return false;
 
-  // Calculate the rank map for F
-  BuildRankMap(F);
+  // Reassociate needs for each instruction to have its operands already
+  // processed, so we first perform a RPOT of the basic blocks so that
+  // when we process a basic block, all its dominators have been processed
+  // before.
+  ReversePostOrderTraversal<Function *> RPOT(&F);
+  BuildRankMap(F, RPOT);
 
   MadeChange = false;
-  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
+  for (BasicBlock *BI : RPOT) {
+    // Use a worklist to keep track of which instructions have been processed
+    // (and which insts won't be optimized again) so when redoing insts,
+    // optimize insts rightaway which won't be processed later.
+    SmallSet<Instruction *, 8> Worklist;
+
+    // Insert all instructions in the BB
+    for (Instruction &I : *BI)
+      Worklist.insert(&I);
+
     // Optimize every instruction in the basic block.
-    for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; )
+    for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;) {
+      // This instruction has been processed.
+      Worklist.erase(&*II);
       if (isInstructionTriviallyDead(&*II)) {
         EraseInst(&*II++);
       } else {
@@ -2274,27 +2290,22 @@ bool Reassociate::runOnFunction(Function &F) {
         ++II;
       }
 
-    // Make a copy of all the instructions to be redone so we can remove dead
-    // instructions.
-    SetVector<AssertingVH<Instruction>> ToRedo(RedoInsts);
-    // Iterate over all instructions to be reevaluated and remove trivially dead
-    // instructions. If any operand of the trivially dead instruction becomes
-    // dead mark it for deletion as well. Continue this process until all
-    // trivially dead instructions have been removed.
-    while (!ToRedo.empty()) {
-      Instruction *I = ToRedo.pop_back_val();
-      if (isInstructionTriviallyDead(I))
-        RecursivelyEraseDeadInsts(I, ToRedo);
-    }
-
-    // Now that we have removed dead instructions, we can reoptimize the
-    // remaining instructions.
-    while (!RedoInsts.empty()) {
-      Instruction *I = RedoInsts.pop_back_val();
-      if (isInstructionTriviallyDead(I))
-        EraseInst(I);
-      else
-        OptimizeInst(I);
+      // If the above optimizations produced new instructions to optimize or
+      // made modifications which need to be redone, do them now if they won't
+      // be handled later.
+      while (!RedoInsts.empty()) {
+        Instruction *I = RedoInsts.pop_back_val();
+        // Process instructions that won't be processed later, either
+        // inside the block itself or in another basic block (based on rank),
+        // since these will be processed later.
+        if ((I->getParent() != BI || !Worklist.count(I)) &&
+            RankMap[I->getParent()] <= RankMap[BI]) {
+          if (isInstructionTriviallyDead(I))
+            EraseInst(I);
+          else
+            OptimizeInst(I);
+        }
+      }
     }
   }
 
diff --git a/llvm/test/Transforms/Reassociate/prev_insts_canonicalized.ll b/llvm/test/Transforms/Reassociate/prev_insts_canonicalized.ll
new file mode 100644
index 00000000000..649761e57c9
--- /dev/null
+++ b/llvm/test/Transforms/Reassociate/prev_insts_canonicalized.ll
@@ -0,0 +1,57 @@
+; RUN: opt < %s -reassociate -S | FileCheck %s
+
+; These tests make sure that before processing insts
+; any previous instructions are already canonicalized.
+define i32 @foo(i32 %in) {
+; CHECK-LABEL: @foo
+; CHECK-NEXT: %factor = mul i32 %in, -4
+; CHECK-NEXT: %factor1 = mul i32 %in, 2
+; CHECK-NEXT: %_3 = add i32 %factor, 1
+; CHECK-NEXT: %_5 = add i32 %_3, %factor1
+; CHECK-NEXT: ret i32 %_5
+  %_0 = add i32 %in, 1
+  %_1 = mul i32 %in, -2
+  %_2 = add i32 %_0, %_1
+  %_3 = add i32 %_1, %_2
+  %_4 = add i32 %_3, 1
+  %_5 = add i32 %in, %_3
+  ret i32 %_5
+}
+
+; CHECK-LABEL: @foo1
+define void @foo1(float %in, i1 %cmp) {
+wrapper_entry:
+  br label %foo1
+
+for.body:
+  %0 = fadd float %in1, %in1
+  br label %foo1
+
+foo1:
+  %_0 = fmul fast float %in, -3.000000e+00
+  %_1 = fmul fast float %_0, 3.000000e+00
+  %in1 = fadd fast float -3.000000e+00, %_1
+  %in1use = fadd fast float %in1, %in1
+  br label %for.body
+
+
+}
+
+; CHECK-LABEL: @foo2
+define void @foo2(float %in, i1 %cmp) {
+wrapper_entry:
+  br label %for.body
+
+for.body:
+; If the operands of the phi are sheduled for processing before
+; foo1 is processed, the invariant of reassociate are not preserved
+  %unused = phi float [%in1, %foo1], [undef, %wrapper_entry]
+  br label %foo1
+
+foo1:
+  %_0 = fmul fast float %in, -3.000000e+00
+  %_1 = fmul fast float %_0, 3.000000e+00
+  %in1 = fadd fast float -3.000000e+00, %_1
+  %in1use = fadd fast float %in1, %in1
+  br label %for.body
+}
diff --git a/llvm/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll b/llvm/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll
index c2cdffce61e..7d82ef7e7a2 100644
--- a/llvm/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll
+++ b/llvm/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll
@@ -1,8 +1,8 @@
 ; RUN: opt < %s -reassociate -S | FileCheck %s
 ; CHECK-LABEL: faddsubAssoc1
-; CHECK: [[TMP1:%tmp.*]] = fmul fast half %a, 0xH4500
-; CHECK: [[TMP2:%tmp.*]] = fmul fast half %b, 0xH4500
-; CHECK: fsub fast half [[TMP2]], [[TMP1]]
+; CHECK: [[TMP1:%.*]] = fsub fast half 0xH8000, %a
+; CHECK: [[TMP2:%.*]] = fadd fast half %b, [[TMP1]]
+; CHECK: fmul fast half [[TMP2]], 0xH4500
 ; CHECK: ret
 ; Input is A op (B op C)
 define half @faddsubAssoc1(half %a, half %b) {
diff --git a/llvm/test/Transforms/Reassociate/xor_reassoc.ll b/llvm/test/Transforms/Reassociate/xor_reassoc.ll
index 0bed6f35880..a22689805fb 100644
--- a/llvm/test/Transforms/Reassociate/xor_reassoc.ll
+++ b/llvm/test/Transforms/Reassociate/xor_reassoc.ll
@@ -88,8 +88,8 @@ define i32 @xor_special2(i32 %x, i32 %y) {
   %xor1 = xor i32 %xor, %and
   ret i32 %xor1
 ; CHECK-LABEL: @xor_special2(
-; CHECK: %xor = xor i32 %x, 123
-; CHECK: %xor1 = xor i32 %xor, %y
+; CHECK: %xor = xor i32 %y, 123
+; CHECK: %xor1 = xor i32 %xor, %x
 ; CHECK: ret i32 %xor1
 }