3 files changed, 950 insertions, 127 deletions
diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp
index 4cca61003b6..133049004ab 100644
--- a/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -30,9 +30,19 @@
 /// tracks what operations have a given value number (IE it also tracks the
 /// reverse mapping from value number -> operations with that value number), so
 /// that it only needs to reprocess the instructions that are affected when
-/// something's value number changes.  The rest of the algorithm is devoted to
-/// performing symbolic evaluation, forward propagation, and simplification of
-/// operations based on the value numbers deduced so far.
+/// something's value number changes.  The vast majority of complexity and code
+/// in this file is devoted to tracking what value numbers could change for what
+/// instructions when various things happen.  The rest of the algorithm is
+/// devoted to performing symbolic evaluation, forward propagation, and
+/// simplification of operations based on the value numbers deduced so far
+///
+/// In order to make the GVN mostly-complete, we use a technique derived from
+/// "Detection of Redundant Expressions: A Complete and Polynomial-time
+/// Algorithm in SSA" by R.R. Pai.  The source of incompleteness in most SSA
+/// based GVN algorithms is related to their inability to detect equivalence
+/// between phi of ops (IE phi(a+b, c+d)) and op of phis (phi(a,c) + phi(b, d)).
+/// We resolve this issue by generating the equivalent "phi of ops" form for
+/// each op of phis we see, in a way that only takes polynomial time to resolve.
 ///
 /// We also do not perform elimination by using any published algorithm.  All
 /// published algorithms are O(Instructions). Instead, we use a technique that
@@ -105,9 +115,13 @@ STATISTIC(NumGVNSortedLeaderChanges, "Number of sorted leader changes");
 STATISTIC(NumGVNAvoidedSortedLeaderChanges,
           "Number of avoided sorted leader changes");
 STATISTIC(NumGVNDeadStores, "Number of redundant/dead stores eliminated");
+STATISTIC(NumGVNPHIOfOpsCreated, "Number of PHI of ops created");
+STATISTIC(NumGVNPHIOfOpsEliminations,
+          "Number of things eliminated using PHI of ops");
 DEBUG_COUNTER(VNCounter, "newgvn-vn",
               "Controls which instructions are value numbered")
-
+DEBUG_COUNTER(PHIOfOpsCounter, "newgvn-phi",
+              "Controls which instructions we create phi of ops for")
 // Currently store defining access refinement is too slow due to basicaa being
 // egregiously slow.  This flag lets us keep it working while we work on this
 // issue.
@@ -170,10 +184,9 @@ private:
       }
     }
     // See if we really were the root of a component, by seeing if we still have
-    // our DFSNumber.
-    // If we do, we are the root of the component, and we have completed a
-    // component. If we do not,
-    // we are not the root of a component, and belong on the component stack.
+    // our DFSNumber.  If we do, we are the root of the component, and we have
+    // completed a component. If we do not, we are not the root of a component,
+    // and belong on the component stack.
     if (Root.lookup(I) == OurDFS) {
       unsigned ComponentID = Components.size();
       Components.resize(Components.size() + 1);
@@ -365,6 +378,7 @@ private:
   int StoreCount = 0;
 };
 
+struct HashedExpression;
 namespace llvm {
 template <> struct DenseMapInfo<const Expression *> {
   static const Expression *getEmptyKey() {
@@ -377,9 +391,11 @@ template <> struct DenseMapInfo<const Expression *> {
     Val <<= PointerLikeTypeTraits<const Expression *>::NumLowBitsAvailable;
     return reinterpret_cast<const Expression *>(Val);
   }
-  static unsigned getHashValue(const Expression *V) {
-    return static_cast<unsigned>(V->getHashValue());
+  static unsigned getHashValue(const Expression *E) {
+    return static_cast<unsigned>(E->getHashValue());
   }
+  static unsigned getHashValue(const HashedExpression &HE);
+  static bool isEqual(const HashedExpression &LHS, const Expression *RHS);
   static bool isEqual(const Expression *LHS, const Expression *RHS) {
     if (LHS == RHS)
       return true;
@@ -391,6 +407,26 @@ template <> struct DenseMapInfo<const Expression *> {
 };
 } // end namespace llvm
 
+// This is just a wrapper around Expression that computes the hash value once at
+// creation time.  Hash values for an Expression can't change once they are
+// inserted into the DenseMap (it breaks DenseMap), so they must be immutable at
+// that point anyway.
+struct HashedExpression {
+  const Expression *E;
+  unsigned HashVal;
+  HashedExpression(const Expression *E)
+      : E(E), HashVal(DenseMapInfo<const Expression *>::getHashValue(E)) {}
+};
+
+unsigned
+DenseMapInfo<const Expression *>::getHashValue(const HashedExpression &HE) {
+  return HE.HashVal;
+}
+bool DenseMapInfo<const Expression *>::isEqual(const HashedExpression &LHS,
+                                               const Expression *RHS) {
+  return isEqual(LHS.E, RHS);
+}
+
 namespace {
 class NewGVN {
   Function &F;
@@ -428,6 +464,33 @@ class NewGVN {
   // Value Mappings.
   DenseMap<Value *, CongruenceClass *> ValueToClass;
   DenseMap<Value *, const Expression *> ValueToExpression;
+  // Value PHI handling, used to make equivalence between phi(op, op) and
+  // op(phi, phi).
+  // These mappings just store various data that would normally be part of the
+  // IR.
+  DenseSet<const Instruction *> PHINodeUses;
+  // Map a temporary instruction we created to a parent block.
+  DenseMap<const Value *, BasicBlock *> TempToBlock;
+  // Map between the temporary phis we created and the real instructions they
+  // are known equivalent to.
+  DenseMap<const Value *, PHINode *> RealToTemp;
+  // In order to know when we should re-process instructions that have
+  // phi-of-ops, we track the set of expressions that they needed as
+  // leaders. When we discover new leaders for those expressions, we process the
+  // associated phi-of-op instructions again in case they have changed.  The
+  // other way they may change is if they had leaders, and those leaders
+  // disappear.  However, at the point they have leaders, there are uses of the
+  // relevant operands in the created phi node, and so they will get reprocessed
+  // through the normal user marking we perform.
+  mutable DenseMap<const Value *, SmallPtrSet<Value *, 2>> AdditionalUsers;
+  DenseMap<const Expression *, SmallPtrSet<Instruction *, 2>>
+      ExpressionToPhiOfOps;
+  // Map from basic block to the temporary operations we created
+  DenseMap<const BasicBlock *, SmallVector<Instruction *, 8>> PHIOfOpsPHIs;
+  // Map from temporary operation to MemoryAccess.
+  DenseMap<const Instruction *, MemoryUseOrDef *> TempToMemory;
+  // Set of all temporary instructions we created.
+  DenseSet<Instruction *> AllTempInstructions;
 
   // Mapping from predicate info we used to the instructions we used it with.
   // In order to correctly ensure propagation, we must keep track of what
@@ -460,8 +523,8 @@ class NewGVN {
   enum MemoryPhiState { MPS_Invalid, MPS_TOP, MPS_Equivalent, MPS_Unique };
   DenseMap<const MemoryPhi *, MemoryPhiState> MemoryPhiState;
 
-  enum PhiCycleState { PCS_Unknown, PCS_CycleFree, PCS_Cycle };
-  mutable DenseMap<const PHINode *, PhiCycleState> PhiCycleState;
+  enum InstCycleState { ICS_Unknown, ICS_CycleFree, ICS_Cycle };
+  mutable DenseMap<const Instruction *, InstCycleState> InstCycleState;
   // Expression to class mapping.
   using ExpressionClassMap = DenseMap<const Expression *, CongruenceClass *>;
   ExpressionClassMap ExpressionToClass;
@@ -519,7 +582,7 @@ private:
   const Expression *createBinaryExpression(unsigned, Type *, Value *,
                                            Value *) const;
   PHIExpression *createPHIExpression(Instruction *, bool &HasBackEdge,
-                                     bool &AllConstant) const;
+                                     bool &OriginalOpsConstant) const;
   const VariableExpression *createVariableExpression(Value *) const;
   const ConstantExpression *createConstantExpression(Constant *) const;
   const Expression *createVariableOrConstant(Value *V) const;
@@ -560,6 +623,9 @@ private:
     return CClass;
   }
   void initializeCongruenceClasses(Function &F);
+  const Expression *makePossiblePhiOfOps(Instruction *, bool,
+                                         SmallPtrSetImpl<Value *> &);
+  void addPhiOfOps(PHINode *Op, BasicBlock *BB, Instruction *ExistingValue);
 
   // Value number an Instruction or MemoryPhi.
   void valueNumberMemoryPhi(MemoryPhi *);
@@ -568,7 +634,8 @@ private:
   // Symbolic evaluation.
   const Expression *checkSimplificationResults(Expression *, Instruction *,
                                                Value *) const;
-  const Expression *performSymbolicEvaluation(Value *) const;
+  const Expression *performSymbolicEvaluation(Value *,
+                                              SmallPtrSetImpl<Value *> &) const;
   const Expression *performSymbolicLoadCoercion(Type *, Value *, LoadInst *,
                                                 Instruction *,
                                                 MemoryAccess *) const;
@@ -593,7 +660,7 @@ private:
   bool setMemoryClass(const MemoryAccess *From, CongruenceClass *To);
   CongruenceClass *getMemoryClass(const MemoryAccess *MA) const;
   const MemoryAccess *lookupMemoryLeader(const MemoryAccess *) const;
-  bool isMemoryAccessTop(const MemoryAccess *) const;
+  bool isMemoryAccessTOP(const MemoryAccess *) const;
 
   // Ranking
   unsigned int getRank(const Value *) const;
@@ -617,6 +684,7 @@ private:
   void replaceInstruction(Instruction *, Value *);
   void markInstructionForDeletion(Instruction *);
   void deleteInstructionsInBlock(BasicBlock *);
+  Value *findPhiOfOpsLeader(const Expression *E, const BasicBlock *BB) const;
 
   // New instruction creation.
   void handleNewInstruction(Instruction *){};
@@ -628,8 +696,10 @@ private:
   void markPredicateUsersTouched(Instruction *);
   void markValueLeaderChangeTouched(CongruenceClass *CC);
   void markMemoryLeaderChangeTouched(CongruenceClass *CC);
+  void markPhiOfOpsChanged(const HashedExpression &HE);
   void addPredicateUsers(const PredicateBase *, Instruction *) const;
   void addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) const;
+  void addAdditionalUsers(Value *To, Value *User) const;
 
   // Main loop of value numbering
   void iterateTouchedInstructions();
@@ -637,7 +707,7 @@ private:
   // Utilities.
   void cleanupTables();
   std::pair<unsigned, unsigned> assignDFSNumbers(BasicBlock *, unsigned);
-  void updateProcessedCount(Value *V);
+  void updateProcessedCount(const Value *V);
   void verifyMemoryCongruency() const;
   void verifyIterationSettled(Function &F);
   void verifyStoreExpressions() const;
@@ -645,6 +715,10 @@ private:
                               const MemoryAccess *, const MemoryAccess *) const;
   BasicBlock *getBlockForValue(Value *V) const;
   void deleteExpression(const Expression *E) const;
+  MemoryUseOrDef *getMemoryAccess(const Instruction *) const;
+  MemoryAccess *getDefiningAccess(const MemoryAccess *) const;
+  MemoryPhi *getMemoryAccess(const BasicBlock *) const;
+  template <class T, class Range> T *getMinDFSOfRange(const Range &) const;
   unsigned InstrToDFSNum(const Value *V) const {
     assert(isa<Instruction>(V) && "This should not be used for MemoryAccesses");
     return InstrDFS.lookup(V);
@@ -664,8 +738,8 @@ private:
                ? InstrToDFSNum(cast<MemoryUseOrDef>(MA)->getMemoryInst())
                : InstrDFS.lookup(MA);
   }
-  bool isCycleFree(const PHINode *PN) const;
-  template <class T, class Range> T *getMinDFSOfRange(const Range &) const;
+  bool isCycleFree(const Instruction *) const;
+  bool isBackedge(BasicBlock *From, BasicBlock *To) const;
   // Debug counter info.  When verifying, we have to reset the value numbering
   // debug counter to the same state it started in to get the same results.
   std::pair<int, int> StartingVNCounter;
@@ -693,20 +767,46 @@ bool StoreExpression::equals(const Expression &Other) const {
   return true;
 }
 
+// Determine if the edge From->To is a backedge
+bool NewGVN::isBackedge(BasicBlock *From, BasicBlock *To) const {
+  if (From == To)
+    return true;
+  auto *FromDTN = DT->getNode(From);
+  auto *ToDTN = DT->getNode(To);
+  return RPOOrdering.lookup(FromDTN) >= RPOOrdering.lookup(ToDTN);
+}
+
 #ifndef NDEBUG
 static std::string getBlockName(const BasicBlock *B) {
   return DOTGraphTraits<const Function *>::getSimpleNodeLabel(B, nullptr);
 }
 #endif
 
+// Get a MemoryAccess for an instruction, fake or real.
+MemoryUseOrDef *NewGVN::getMemoryAccess(const Instruction *I) const {
+  auto *Result = MSSA->getMemoryAccess(I);
+  return Result ? Result : TempToMemory.lookup(I);
+}
+
+// Get a MemoryPhi for a basic block. These are all real.
+MemoryPhi *NewGVN::getMemoryAccess(const BasicBlock *BB) const {
+  return MSSA->getMemoryAccess(BB);
+}
+
 // Get the basic block from an instruction/memory value.
 BasicBlock *NewGVN::getBlockForValue(Value *V) const {
-  if (auto *I = dyn_cast<Instruction>(V))
-    return I->getParent();
-  else if (auto *MP = dyn_cast<MemoryPhi>(V))
-    return MP->getBlock();
-  llvm_unreachable("Should have been able to figure out a block for our value");
-  return nullptr;
+  if (auto *I = dyn_cast<Instruction>(V)) {
+    auto *Parent = I->getParent();
+    if (Parent)
+      return Parent;
+    Parent = TempToBlock.lookup(V);
+    assert(Parent && "Every fake instruction should have a block");
+    return Parent;
+  }
+
+  auto *MP = dyn_cast<MemoryPhi>(V);
+  assert(MP && "Should have been an instruction or a MemoryPhi");
+  return MP->getBlock();
 }
 
 // Delete a definitely dead expression, so it can be reused by the expression
@@ -718,10 +818,9 @@ void NewGVN::deleteExpression(const Expression *E) const {
   const_cast<BasicExpression *>(BE)->deallocateOperands(ArgRecycler);
   ExpressionAllocator.Deallocate(E);
 }
-
 PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge,
-                                           bool &AllConstant) const {
-  BasicBlock *PHIBlock = I->getParent();
+                                           bool &OriginalOpsConstant) const {
+  BasicBlock *PHIBlock = getBlockForValue(I);
   auto *PN = cast<PHINode>(I);
   auto *E =
       new (ExpressionAllocator) PHIExpression(PN->getNumOperands(), PHIBlock);
@@ -730,8 +829,6 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge,
   E->setType(I->getType());
   E->setOpcode(I->getOpcode());
 
-  unsigned PHIRPO = RPOOrdering.lookup(DT->getNode(PHIBlock));
-
   // NewGVN assumes the operands of a PHI node are in a consistent order across
   // PHIs. LLVM doesn't seem to always guarantee this. While we need to fix
   // this in LLVM at some point we don't want GVN to find wrong congruences.
@@ -752,14 +849,12 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge,
   auto Filtered = make_filter_range(PHIOperands, [&](const Use *U) {
     return ReachableEdges.count({PN->getIncomingBlock(*U), PHIBlock});
   });
-
   std::transform(Filtered.begin(), Filtered.end(), op_inserter(E),
                  [&](const Use *U) -> Value * {
                    auto *BB = PN->getIncomingBlock(*U);
-                   auto *DTN = DT->getNode(BB);
-                   if (RPOOrdering.lookup(DTN) >= PHIRPO)
-                     HasBackedge = true;
-                   AllConstant &= isa<UndefValue>(*U) || isa<Constant>(*U);
+                   HasBackedge = HasBackedge || isBackedge(BB, PHIBlock);
+                   OriginalOpsConstant =
+                       OriginalOpsConstant && isa<Constant>(*U);
 
                    // Don't try to transform self-defined phis.
                    if (*U == PN)
@@ -784,7 +879,7 @@ bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E) const {
   // whether all members are constant.
   std::transform(I->op_begin(), I->op_end(), op_inserter(E), [&](Value *O) {
     auto Operand = lookupOperandLeader(O);
-    AllConstant &= isa<Constant>(Operand);
+    AllConstant = AllConstant && isa<Constant>(Operand);
     return Operand;
   });
 
@@ -1053,7 +1148,7 @@ const MemoryAccess *NewGVN::lookupMemoryLeader(const MemoryAccess *MA) const {
 // Return true if the MemoryAccess is really equivalent to everything. This is
 // equivalent to the lattice value "TOP" in most lattices.  This is the initial
 // state of all MemoryAccesses.
-bool NewGVN::isMemoryAccessTop(const MemoryAccess *MA) const {
+bool NewGVN::isMemoryAccessTOP(const MemoryAccess *MA) const {
   return getMemoryClass(MA) == TOPClass;
 }
 
@@ -1099,7 +1194,7 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) const {
   // Unlike loads, we never try to eliminate stores, so we do not check if they
   // are simple and avoid value numbering them.
   auto *SI = cast<StoreInst>(I);
-  auto *StoreAccess = MSSA->getMemoryAccess(SI);
+  auto *StoreAccess = getMemoryAccess(SI);
   // Get the expression, if any, for the RHS of the MemoryDef.
   const MemoryAccess *StoreRHS = StoreAccess->getDefiningAccess();
   if (EnableStoreRefinement)
@@ -1136,7 +1231,7 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) const {
             dyn_cast<LoadInst>(lookupOperandLeader(SI->getValueOperand()))) {
       if ((lookupOperandLeader(LI->getPointerOperand()) ==
            lookupOperandLeader(SI->getPointerOperand())) &&
-          (lookupMemoryLeader(MSSA->getMemoryAccess(LI)->getDefiningAccess()) ==
+          (lookupMemoryLeader(getMemoryAccess(LI)->getDefiningAccess()) ==
            StoreRHS))
         return createVariableExpression(LI);
     }
@@ -1240,8 +1335,9 @@ const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) const {
   // Load of undef is undef.
   if (isa<UndefValue>(LoadAddressLeader))
     return createConstantExpression(UndefValue::get(LI->getType()));
-
-  MemoryAccess *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(I);
+  MemoryAccess *OriginalAccess = getMemoryAccess(I);
+  MemoryAccess *DefiningAccess =
+      MSSAWalker->getClobberingMemoryAccess(OriginalAccess);
 
   if (!MSSA->isLiveOnEntryDef(DefiningAccess)) {
     if (auto *MD = dyn_cast<MemoryDef>(DefiningAccess)) {
@@ -1330,6 +1426,7 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) const {
     // operands are equal, because assumes must always be true.
     if (CmpInst::isTrueWhenEqual(Predicate)) {
       addPredicateUsers(PI, I);
+      addAdditionalUsers(Cmp->getOperand(0), I);
       return createVariableOrConstant(FirstOp);
     }
   }
@@ -1342,6 +1439,7 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) const {
     if ((PBranch->TrueEdge && Predicate == CmpInst::ICMP_EQ) ||
         (!PBranch->TrueEdge && Predicate == CmpInst::ICMP_NE)) {
       addPredicateUsers(PI, I);
+      addAdditionalUsers(Cmp->getOperand(0), I);
       return createVariableOrConstant(FirstOp);
     }
     // Handle the special case of floating point.
@@ -1349,6 +1447,7 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) const {
          (!PBranch->TrueEdge && Predicate == CmpInst::FCMP_UNE)) &&
         isa<ConstantFP>(FirstOp) && !cast<ConstantFP>(FirstOp)->isZero()) {
       addPredicateUsers(PI, I);
+      addAdditionalUsers(Cmp->getOperand(0), I);
       return createConstantExpression(cast<Constant>(FirstOp));
     }
   }
@@ -1429,34 +1528,33 @@ bool NewGVN::setMemoryClass(const MemoryAccess *From,
   return Changed;
 }
 
-// Determine if a phi is cycle-free.  That means the values in the phi don't
-// depend on any expressions that can change value as a result of the phi.
-// For example, a non-cycle free phi would be  v = phi(0, v+1).
-bool NewGVN::isCycleFree(const PHINode *PN) const {
-  // In order to compute cycle-freeness, we do SCC finding on the phi, and see
-  // what kind of SCC it ends up in.  If it is a singleton, it is cycle-free.
-  // If it is not in a singleton, it is only cycle free if the other members are
-  // all phi nodes (as they do not compute anything, they are copies).  TODO:
-  // There are likely a few other intrinsics or expressions that could be
-  // included here, but this happens so infrequently already that it is not
-  // likely to be worth it.
-  auto PCS = PhiCycleState.lookup(PN);
-  if (PCS == PCS_Unknown) {
-    SCCFinder.Start(PN);
-    auto &SCC = SCCFinder.getComponentFor(PN);
+// Determine if a instruction is cycle-free.  That means the values in the
+// instruction don't depend on any expressions that can change value as a result
+// of the instruction.  For example, a non-cycle free instruction would be v =
+// phi(0, v+1).
+bool NewGVN::isCycleFree(const Instruction *I) const {
+  // In order to compute cycle-freeness, we do SCC finding on the instruction,
+  // and see what kind of SCC it ends up in.  If it is a singleton, it is
+  // cycle-free.  If it is not in a singleton, it is only cycle free if the
+  // other members are all phi nodes (as they do not compute anything, they are
+  // copies).
+  auto ICS = InstCycleState.lookup(I);
+  if (ICS == ICS_Unknown) {
+    SCCFinder.Start(I);
+    auto &SCC = SCCFinder.getComponentFor(I);
     // It's cycle free if it's size 1 or or the SCC is *only* phi nodes.
     if (SCC.size() == 1)
-      PhiCycleState.insert({PN, PCS_CycleFree});
+      InstCycleState.insert({I, ICS_CycleFree});
     else {
       bool AllPhis =
           llvm::all_of(SCC, [](const Value *V) { return isa<PHINode>(V); });
-      PCS = AllPhis ? PCS_CycleFree : PCS_Cycle;
+      ICS = AllPhis ? ICS_CycleFree : ICS_Cycle;
       for (auto *Member : SCC)
         if (auto *MemberPhi = dyn_cast<PHINode>(Member))
-          PhiCycleState.insert({MemberPhi, PCS});
+          InstCycleState.insert({MemberPhi, ICS});
     }
   }
-  if (PCS == PCS_Cycle)
+  if (ICS == ICS_Cycle)
     return false;
   return true;
 }
@@ -1518,7 +1616,7 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const {
       // constants, or all operands are ignored but the undef, it also must be
       // cycle free.
       if (!AllConstant && HasBackedge && NumOps > 0 &&
-          !isa<UndefValue>(AllSameValue) && !isCycleFree(cast<PHINode>(I)))
+          !isa<UndefValue>(AllSameValue) && !isCycleFree(I))
         return E;
 
       // Only have to check for instructions
@@ -1689,8 +1787,18 @@ const Expression *NewGVN::performSymbolicCmpEvaluation(Instruction *I) const {
   return createExpression(I);
 }
 
+// Return true if V is a value that will always be available (IE can
+// be placed anywhere) in the function.  We don't do globals here
+// because they are often worse to put in place.
+// TODO: Separate cost from availability
+static bool alwaysAvailable(Value *V) {
+  return isa<Constant>(V) || isa<Argument>(V);
+}
+
 // Substitute and symbolize the value before value numbering.
-const Expression *NewGVN::performSymbolicEvaluation(Value *V) const {
+const Expression *
+NewGVN::performSymbolicEvaluation(Value *V,
+                                  SmallPtrSetImpl<Value *> &Visited) const {
   const Expression *E = nullptr;
   if (auto *C = dyn_cast<Constant>(V))
     E = createConstantExpression(C);
@@ -1768,12 +1876,22 @@ const Expression *NewGVN::performSymbolicEvaluation(Value *V) const {
   return E;
 }
 
+void NewGVN::addAdditionalUsers(Value *To, Value *User) const {
+  AdditionalUsers[To].insert(User);
+}
+
 void NewGVN::markUsersTouched(Value *V) {
   // Now mark the users as touched.
   for (auto *User : V->users()) {
     assert(isa<Instruction>(User) && "Use of value not within an instruction?");
     TouchedInstructions.set(InstrToDFSNum(User));
   }
+  const auto Result = AdditionalUsers.find(V);
+  if (Result != AdditionalUsers.end()) {
+    for (auto *User : Result->second)
+      TouchedInstructions.set(InstrToDFSNum(User));
+    AdditionalUsers.erase(Result);
+  }
 }
 
 void NewGVN::addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) const {
@@ -1800,6 +1918,10 @@ void NewGVN::markMemoryUsersTouched(const MemoryAccess *MA) {
 
 // Add I to the set of users of a given predicate.
 void NewGVN::addPredicateUsers(const PredicateBase *PB, Instruction *I) const {
+  // Don't add temporary instructions to the user lists.
+  if (AllTempInstructions.count(I))
+    return;
+
   if (auto *PBranch = dyn_cast<PredicateBranch>(PB))
     PredicateToUsers[PBranch->Condition].insert(I);
   else if (auto *PAssume = dyn_cast<PredicateBranch>(PB))
@@ -1855,11 +1977,11 @@ const MemoryAccess *NewGVN::getNextMemoryLeader(CongruenceClass *CC) const {
   assert(!CC->definesNoMemory() && "Can't get next leader if there is none");
   if (CC->getStoreCount() > 0) {
     if (auto *NL = dyn_cast_or_null<StoreInst>(CC->getNextLeader().first))
-      return MSSA->getMemoryAccess(NL);
+      return getMemoryAccess(NL);
     // Find the store with the minimum DFS number.
     auto *V = getMinDFSOfRange<Value>(make_filter_range(
         *CC, [&](const Value *V) { return isa<StoreInst>(V); }));
-    return MSSA->getMemoryAccess(cast<StoreInst>(V));
+    return getMemoryAccess(cast<StoreInst>(V));
   }
   assert(CC->getStoreCount() == 0);
 
@@ -1982,7 +2104,7 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E,
   // instructions before.
 
   // If it's not a memory use, set the MemoryAccess equivalence
-  auto *InstMA = dyn_cast_or_null<MemoryDef>(MSSA->getMemoryAccess(I));
+  auto *InstMA = dyn_cast_or_null<MemoryDef>(getMemoryAccess(I));
   if (InstMA)
     moveMemoryToNewCongruenceClass(I, InstMA, OldClass, NewClass);
   ValueToClass[I] = NewClass;
@@ -2014,21 +2136,31 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E,
   }
 }
 
+// For a given expression, mark the phi of ops instructions that could have
+// changed as a result.
+void NewGVN::markPhiOfOpsChanged(const HashedExpression &HE) {
+  auto PhiOfOpsSet = ExpressionToPhiOfOps.find_as(HE);
+  if (PhiOfOpsSet != ExpressionToPhiOfOps.end()) {
+    for (auto I : PhiOfOpsSet->second)
+      TouchedInstructions.set(InstrToDFSNum(I));
+    ExpressionToPhiOfOps.erase(PhiOfOpsSet);
+  }
+}
 // Perform congruence finding on a given value numbering expression.
 void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) {
   // This is guaranteed to return something, since it will at least find
   // TOP.
-
   CongruenceClass *IClass = ValueToClass[I];
   assert(IClass && "Should have found a IClass");
   // Dead classes should have been eliminated from the mapping.
   assert(!IClass->isDead() && "Found a dead class");
 
-  CongruenceClass *EClass;
+  CongruenceClass *EClass = nullptr;
+  HashedExpression HE(E);
   if (const auto *VE = dyn_cast<VariableExpression>(E)) {
-    EClass = ValueToClass[VE->getVariableValue()];
+    EClass = ValueToClass.lookup(VE->getVariableValue());
   } else {
-    auto lookupResult = ExpressionToClass.insert({E, nullptr});
+    auto lookupResult = ExpressionToClass.insert_as({E, nullptr}, HE);
 
     // If it's not in the value table, create a new congruence class.
     if (lookupResult.second) {
@@ -2077,10 +2209,13 @@ void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) {
   if (ClassChanged || LeaderChanged) {
     DEBUG(dbgs() << "New class " << EClass->getID() << " for expression " << *E
                  << "\n");
-    if (ClassChanged)
+    if (ClassChanged) {
       moveValueToNewCongruenceClass(I, E, IClass, EClass);
+      markPhiOfOpsChanged(HE);
+    }
+
     markUsersTouched(I);
-    if (MemoryAccess *MA = MSSA->getMemoryAccess(I))
+    if (MemoryAccess *MA = getMemoryAccess(I))
       markMemoryUsersTouched(MA);
     if (auto *CI = dyn_cast<CmpInst>(I))
       markPredicateUsersTouched(CI);
@@ -2118,7 +2253,7 @@ void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) {
       // impact predicates. Otherwise, only mark the phi nodes as touched, as
       // they are the only thing that depend on new edges. Anything using their
       // values will get propagated to if necessary.
-      if (MemoryAccess *MemPhi = MSSA->getMemoryAccess(To))
+      if (MemoryAccess *MemPhi = getMemoryAccess(To))
         TouchedInstructions.set(InstrToDFSNum(MemPhi));
 
       auto BI = To->begin();
@@ -2126,6 +2261,12 @@ void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) {
         TouchedInstructions.set(InstrToDFSNum(&*BI));
         ++BI;
       }
+      const auto PHIResult = PHIOfOpsPHIs.find(To);
+      if (PHIResult != PHIOfOpsPHIs.end()) {
+        const auto &PHIs = PHIResult->second;
+        for (auto I : PHIs)
+          TouchedInstructions.set(InstrToDFSNum(I));
+      }
     }
   }
 }
@@ -2215,7 +2356,7 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) {
     // This also may be a memory defining terminator, in which case, set it
     // equivalent only to itself.
     //
-    auto *MA = MSSA->getMemoryAccess(TI);
+    auto *MA = getMemoryAccess(TI);
     if (MA && !isa<MemoryUse>(MA)) {
       auto *CC = ensureLeaderOfMemoryClass(MA);
       if (setMemoryClass(MA, CC))
@@ -2224,6 +2365,158 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) {
   }
 }
 
+void NewGVN::addPhiOfOps(PHINode *Op, BasicBlock *BB,
+                         Instruction *ExistingValue) {
+  InstrDFS[Op] = InstrToDFSNum(ExistingValue);
+  AllTempInstructions.insert(Op);
+  PHIOfOpsPHIs[BB].push_back(Op);
+  TempToBlock[Op] = BB;
+  if (ExistingValue)
+    RealToTemp[ExistingValue] = Op;
+}
+
+static bool okayForPHIOfOps(const Instruction *I) {
+  return isa<BinaryOperator>(I) || isa<SelectInst>(I) || isa<CmpInst>(I) ||
+         isa<LoadInst>(I);
+}
+
+// When we see an instruction that is an op of phis, generate the equivalent phi
+// of ops form.
+const Expression *
+NewGVN::makePossiblePhiOfOps(Instruction *I, bool HasBackedge,
+                             SmallPtrSetImpl<Value *> &Visited) {
+  if (!okayForPHIOfOps(I))
+    return nullptr;
+
+  if (!Visited.insert(I).second)
+    return nullptr;
+  // For now, we require the instruction be cycle free because we don't
+  // *always* create a phi of ops for instructions that could be done as phi
+  // of ops, we only do it if we think it is useful.  If we did do it all the
+  // time, we could remove the cycle free check.
+  if (!isCycleFree(I))
+    return nullptr;
+
+  unsigned IDFSNum = InstrToDFSNum(I);
+  // Pretty much all of the instructions we can convert to phi of ops over a
+  // backedge that are adds, are really induction variables, and those are
+  // pretty much pointless to convert.  This is very coarse-grained for a
+  // test, so if we do find some value, we can change it later.
+  // But otherwise, what can happen is we convert the induction variable from
+  //
+  // i = phi (0, tmp)
+  // tmp = i + 1
+  //
+  // to
+  // i = phi (0, tmpphi)
+  // tmpphi = phi(1, tmpphi+1)
+  //
+  // Which we don't want to happen.  We could just avoid this for all non-cycle
+  // free phis, and we made go that route.
+  if (HasBackedge && I->getOpcode() == Instruction::Add)
+    return nullptr;
+
+  SmallPtrSet<const Value *, 8> ProcessedPHIs;
+  // TODO: We don't do phi translation on memory accesses because it's
+  // complicated. For a load, we'd need to be able to simulate a new memoryuse,
+  // which we don't have a good way of doing ATM.
+  auto *MemAccess = getMemoryAccess(I);
+  // If the memory operation is defined by a memory operation this block that
+  // isn't a MemoryPhi, transforming the pointer backwards through a scalar phi
+  // can't help, as it would still be killed by that memory operation.
+  if (MemAccess && !isa<MemoryPhi>(MemAccess->getDefiningAccess()) &&
+      MemAccess->getDefiningAccess()->getBlock() == I->getParent())
+    return nullptr;
+
+  // Convert op of phis to phi of ops
+  for (auto &Op : I->operands()) {
+    if (!isa<PHINode>(Op))
+      continue;
+    auto *OpPHI = cast<PHINode>(Op);
+    // No point in doing this for one-operand phis.
+    if (OpPHI->getNumOperands() == 1)
+      continue;
+    if (!DebugCounter::shouldExecute(PHIOfOpsCounter))
+      return nullptr;
+    SmallVector<std::pair<Value *, BasicBlock *>, 4> Ops;
+    auto *PHIBlock = getBlockForValue(OpPHI);
+    for (auto PredBB : OpPHI->blocks()) {
+      Value *FoundVal = nullptr;
+      // We could just skip unreachable edges entirely but it's tricky to do
+      // with rewriting existing phi nodes.
+      if (ReachableEdges.count({PredBB, PHIBlock})) {
+        // Clone the instruction, create an expression from it, and see if we
+        // have a leader.
+        Instruction *ValueOp = I->clone();
+        auto Iter = TempToMemory.end();
+        if (MemAccess)
+          Iter = TempToMemory.insert({ValueOp, MemAccess}).first;
+
+        for (auto &Op : ValueOp->operands()) {
+          Op = Op->DoPHITranslation(PHIBlock, PredBB);
+          // When this operand changes, it could change whether there is a
+          // leader for us or not.
+          addAdditionalUsers(Op, I);
+        }
+        // Make sure it's marked as a temporary instruction.
+        AllTempInstructions.insert(ValueOp);
+        // and make sure anything that tries to add it's DFS number is
+        // redirected to the instruction we are making a phi of ops
+        // for.
+        InstrDFS.insert({ValueOp, IDFSNum});
+        const Expression *E = performSymbolicEvaluation(ValueOp, Visited);
+        InstrDFS.erase(ValueOp);
+        AllTempInstructions.erase(ValueOp);
+        ValueOp->deleteValue();
+        if (MemAccess)
+          TempToMemory.erase(Iter);
+        if (!E)
+          return nullptr;
+        FoundVal = findPhiOfOpsLeader(E, PredBB);
+        if (!FoundVal) {
+          ExpressionToPhiOfOps[E].insert(I);
+          return nullptr;
+        }
+        if (auto *SI = dyn_cast<StoreInst>(FoundVal))
+          FoundVal = SI->getValueOperand();
+      } else {
+        DEBUG(dbgs() << "Skipping phi of ops operand for incoming block "
+                     << getBlockName(PredBB)
+                     << " because the block is unreachable\n");
+        FoundVal = UndefValue::get(I->getType());
+      }
+
+      Ops.push_back({FoundVal, PredBB});
+      DEBUG(dbgs() << "Found phi of ops operand " << *FoundVal << " in "
+                   << getBlockName(PredBB) << "\n");
+    }
+    auto *ValuePHI = RealToTemp.lookup(I);
+    bool NewPHI = false;
+    if (!ValuePHI) {
+      ValuePHI = PHINode::Create(I->getType(), OpPHI->getNumOperands());
+      addPhiOfOps(ValuePHI, PHIBlock, I);
+      NewPHI = true;
+      NumGVNPHIOfOpsCreated++;
+    }
+    if (NewPHI) {
+      for (auto PHIOp : Ops)
+        ValuePHI->addIncoming(PHIOp.first, PHIOp.second);
+    } else {
+      unsigned int i = 0;
+      for (auto PHIOp : Ops) {
+        ValuePHI->setIncomingValue(i, PHIOp.first);
+        ValuePHI->setIncomingBlock(i, PHIOp.second);
+        ++i;
+      }
+    }
+
+    DEBUG(dbgs() << "Created phi of ops " << *ValuePHI << " for " << *I
+                 << "\n");
+    return performSymbolicEvaluation(ValuePHI, Visited);
+  }
+  return nullptr;
+}
+
 // The algorithm initially places the values of the routine in the TOP
 // congruence class. The leader of TOP is the undetermined value `undef`.
 // When the algorithm has finished, values still in TOP are unreachable.
@@ -2266,6 +2559,12 @@ void NewGVN::initializeCongruenceClasses(Function &F) {
           TOPClass->incStoreCount();
       }
     for (auto &I : *BB) {
+      // TODO: Move to helper
+      if (isa<PHINode>(&I))
+        for (auto *U : I.users())
+          if (auto *UInst = dyn_cast<Instruction>(U))
+            if (InstrToDFSNum(UInst) != 0 && okayForPHIOfOps(UInst))
+              PHINodeUses.insert(UInst);
       // Don't insert void terminators into the class. We don't value number
       // them, and they just end up sitting in TOP.
       if (isa<TerminatorInst>(I) && I.getType()->isVoidTy())
@@ -2290,12 +2589,35 @@ void NewGVN::cleanupTables() {
     CongruenceClasses[i] = nullptr;
   }
 
+  // Destroy the value expressions
+  SmallVector<Instruction *, 8> TempInst(AllTempInstructions.begin(),
+                                         AllTempInstructions.end());
+  AllTempInstructions.clear();
+
+  // We have to drop all references for everything first, so there are no uses
+  // left as we delete them.
+  for (auto *I : TempInst) {
+    I->dropAllReferences();
+  }
+
+  while (!TempInst.empty()) {
+    auto *I = TempInst.back();
+    TempInst.pop_back();
+    I->deleteValue();
+  }
+
   ValueToClass.clear();
   ArgRecycler.clear(ExpressionAllocator);
   ExpressionAllocator.Reset();
   CongruenceClasses.clear();
   ExpressionToClass.clear();
   ValueToExpression.clear();
+  RealToTemp.clear();
+  AdditionalUsers.clear();
+  ExpressionToPhiOfOps.clear();
+  TempToBlock.clear();
+  TempToMemory.clear();
+  PHIOfOpsPHIs.clear();
   ReachableBlocks.clear();
   ReachableEdges.clear();
 #ifndef NDEBUG
@@ -2311,14 +2633,17 @@ void NewGVN::cleanupTables() {
   MemoryToUsers.clear();
 }
 
+// Assign local DFS number mapping to instructions, and leave space for Value
+// PHI's.
 std::pair<unsigned, unsigned> NewGVN::assignDFSNumbers(BasicBlock *B,
                                                        unsigned Start) {
   unsigned End = Start;
-  if (MemoryAccess *MemPhi = MSSA->getMemoryAccess(B)) {
+  if (MemoryAccess *MemPhi = getMemoryAccess(B)) {
     InstrDFS[MemPhi] = End++;
     DFSToInstr.emplace_back(MemPhi);
   }
 
+  // Then the real block goes next.
   for (auto &I : *B) {
     // There's no need to call isInstructionTriviallyDead more than once on
     // an instruction. Therefore, once we know that an instruction is dead
@@ -2329,7 +2654,6 @@ std::pair<unsigned, unsigned> NewGVN::assignDFSNumbers(BasicBlock *B,
       markInstructionForDeletion(&I);
       continue;
     }
-
     InstrDFS[&I] = End++;
     DFSToInstr.emplace_back(&I);
   }
@@ -2340,7 +2664,7 @@ std::pair<unsigned, unsigned> NewGVN::assignDFSNumbers(BasicBlock *B,
   return std::make_pair(Start, End);
 }
 
-void NewGVN::updateProcessedCount(Value *V) {
+void NewGVN::updateProcessedCount(const Value *V) {
 #ifndef NDEBUG
   if (ProcessedCount.count(V) == 0) {
     ProcessedCount.insert({V, 1});
@@ -2359,7 +2683,7 @@ void NewGVN::valueNumberMemoryPhi(MemoryPhi *MP) {
   const BasicBlock *PHIBlock = MP->getBlock();
   auto Filtered = make_filter_range(MP->operands(), [&](const Use &U) {
     return lookupMemoryLeader(cast<MemoryAccess>(U)) != MP &&
-           !isMemoryAccessTop(cast<MemoryAccess>(U)) &&
+           !isMemoryAccessTOP(cast<MemoryAccess>(U)) &&
            ReachableEdges.count({MP->getIncomingBlock(U), PHIBlock});
   });
   // If all that is left is nothing, our memoryphi is undef. We keep it as
@@ -2412,18 +2736,26 @@ void NewGVN::valueNumberInstruction(Instruction *I) {
   DEBUG(dbgs() << "Processing instruction " << *I << "\n");
   if (!I->isTerminator()) {
     const Expression *Symbolized = nullptr;
+    SmallPtrSet<Value *, 2> Visited;
     if (DebugCounter::shouldExecute(VNCounter)) {
-      Symbolized = performSymbolicEvaluation(I);
+      Symbolized = performSymbolicEvaluation(I, Visited);
+      // Make a phi of ops if necessary
+      if (Symbolized && !isa<ConstantExpression>(Symbolized) &&
+          !isa<VariableExpression>(Symbolized) && PHINodeUses.count(I)) {
+        // FIXME: Backedge argument
+        auto *PHIE = makePossiblePhiOfOps(I, false, Visited);
+        if (PHIE)
+          Symbolized = PHIE;
+      }
+
     } else {
       // Mark the instruction as unused so we don't value number it again.
       InstrDFS[I] = 0;
     }
     // If we couldn't come up with a symbolic expression, use the unknown
     // expression
-    if (Symbolized == nullptr) {
+    if (Symbolized == nullptr)
       Symbolized = createUnknownExpression(I);
-    }
-
     performCongruenceFinding(I, Symbolized);
   } else {
     // Handle terminators that return values. All of them produce values we
@@ -2542,8 +2874,6 @@ void NewGVN::verifyMemoryCongruency() const {
 
   auto Filtered = make_filter_range(MemoryAccessToClass, ReachableAccessPred);
   for (auto KV : Filtered) {
-    assert(KV.second != TOPClass &&
-           "Memory not unreachable but ended up in TOP");
     if (auto *FirstMUD = dyn_cast<MemoryUseOrDef>(KV.first)) {
       auto *SecondMUD = dyn_cast<MemoryUseOrDef>(KV.second->getMemoryLeader());
       if (FirstMUD && SecondMUD) {
@@ -2663,7 +2993,7 @@ void NewGVN::iterateTouchedInstructions() {
   // Nothing set, nothing to iterate, just return.
   if (FirstInstr == -1)
     return;
-  BasicBlock *LastBlock = getBlockForValue(InstrFromDFSNum(FirstInstr));
+  const BasicBlock *LastBlock = getBlockForValue(InstrFromDFSNum(FirstInstr));
   while (TouchedInstructions.any()) {
     ++Iterations;
     // Walk through all the instructions in all the blocks in RPO.
@@ -2680,7 +3010,7 @@ void NewGVN::iterateTouchedInstructions() {
       }
 
       Value *V = InstrFromDFSNum(InstrNum);
-      BasicBlock *CurrBlock = getBlockForValue(V);
+      const BasicBlock *CurrBlock = getBlockForValue(V);
 
       // If we hit a new block, do reachability processing.
       if (CurrBlock != LastBlock) {
@@ -2761,6 +3091,7 @@ bool NewGVN::runGVN() {
     BlockInstRange.insert({B, BlockRange});
     ICount += BlockRange.second - BlockRange.first;
   }
+  initializeCongruenceClasses(F);
 
   TouchedInstructions.resize(ICount);
   // Ensure we don't end up resizing the expressionToClass map, as
@@ -2771,9 +3102,10 @@ bool NewGVN::runGVN() {
   // Initialize the touched instructions to include the entry block.
   const auto &InstRange = BlockInstRange.lookup(&F.getEntryBlock());
   TouchedInstructions.set(InstRange.first, InstRange.second);
+  DEBUG(dbgs() << "Block " << getBlockName(&F.getEntryBlock())
+               << " marked reachable\n");
   ReachableBlocks.insert(&F.getEntryBlock());
 
-  initializeCongruenceClasses(F);
   iterateTouchedInstructions();
   verifyMemoryCongruency();
   verifyIterationSettled(F);
@@ -2786,7 +3118,8 @@ bool NewGVN::runGVN() {
     if (!ToErase->use_empty())
       ToErase->replaceAllUsesWith(UndefValue::get(ToErase->getType()));
 
-    ToErase->eraseFromParent();
+    if (ToErase->getParent())
+      ToErase->eraseFromParent();
   }
 
   // Delete all unreachable blocks.
@@ -2805,14 +3138,6 @@ bool NewGVN::runGVN() {
   return Changed;
 }
 
-// Return true if V is a value that will always be available (IE can
-// be placed anywhere) in the function.  We don't do globals here
-// because they are often worse to put in place.
-// TODO: Separate cost from availability
-static bool alwaysAvailable(Value *V) {
-  return isa<Constant>(V) || isa<Argument>(V);
-}
-
 struct NewGVN::ValueDFS {
   int DFSIn = 0;
   int DFSOut = 0;
@@ -2902,9 +3227,21 @@ void NewGVN::convertClassToDFSOrdered(
     }
     assert(isa<Instruction>(D) &&
            "The dense set member should always be an instruction");
-    VDDef.LocalNum = InstrToDFSNum(D);
-    DFSOrderedSet.emplace_back(VDDef);
     Instruction *Def = cast<Instruction>(D);
+    VDDef.LocalNum = InstrToDFSNum(D);
+    DFSOrderedSet.push_back(VDDef);
+    // If there is a phi node equivalent, add it
+    if (auto *PN = RealToTemp.lookup(Def)) {
+      auto *PHIE =
+          dyn_cast_or_null<PHIExpression>(ValueToExpression.lookup(Def));
+      if (PHIE) {
+        VDDef.Def.setInt(false);
+        VDDef.Def.setPointer(PN);
+        VDDef.LocalNum = 0;
+        DFSOrderedSet.push_back(VDDef);
+      }
+    }
+
     unsigned int UseCount = 0;
     // Now add the uses.
     for (auto &U : Def->uses()) {
@@ -2921,7 +3258,7 @@ void NewGVN::convertClassToDFSOrdered(
           // they are from.
           VDUse.LocalNum = InstrDFS.size() + 1;
         } else {
-          IBlock = I->getParent();
+          IBlock = getBlockForValue(I);
           VDUse.LocalNum = InstrToDFSNum(I);
         }
 
@@ -3091,6 +3428,37 @@ private:
 };
 }
 
+// Given a value and a basic block we are trying to see if it is available in,
+// see if the value has a leader available in that block.
+Value *NewGVN::findPhiOfOpsLeader(const Expression *E,
+                                  const BasicBlock *BB) const {
+  // It would already be constant if we could make it constant
+  if (auto *CE = dyn_cast<ConstantExpression>(E))
+    return CE->getConstantValue();
+  if (auto *VE = dyn_cast<VariableExpression>(E))
+    return VE->getVariableValue();
+
+  auto *CC = ExpressionToClass.lookup(E);
+  if (!CC)
+    return nullptr;
+  if (alwaysAvailable(CC->getLeader()))
+    return CC->getLeader();
+
+  for (auto Member : *CC) {
+    auto *MemberInst = dyn_cast<Instruction>(Member);
+    // Anything that isn't an instruction is always available.
+    if (!MemberInst)
+      return Member;
+    // If we are looking for something in the same block as the member, it must
+    // be a leader because this function is looking for operands for a phi node.
+    if (MemberInst->getParent() == BB ||
+        DT->dominates(MemberInst->getParent(), BB)) {
+      return Member;
+    }
+  }
+  return nullptr;
+}
+
 bool NewGVN::eliminateInstructions(Function &F) {
   // This is a non-standard eliminator. The normal way to eliminate is
   // to walk the dominator tree in order, keeping track of available
@@ -3121,25 +3489,46 @@ bool NewGVN::eliminateInstructions(Function &F) {
   // DFS numbers are updated, we compute some ourselves.
   DT->updateDFSNumbers();
 
-  for (auto &B : F) {
-    if (!ReachableBlocks.count(&B)) {
-      for (const auto S : successors(&B)) {
-        for (auto II = S->begin(); isa<PHINode>(II); ++II) {
-          auto &Phi = cast<PHINode>(*II);
-          DEBUG(dbgs() << "Replacing incoming value of " << *II << " for block "
-                       << getBlockName(&B)
-                       << " with undef due to it being unreachable\n");
-          for (auto &Operand : Phi.incoming_values())
-            if (Phi.getIncomingBlock(Operand) == &B)
-              Operand.set(UndefValue::get(Phi.getType()));
+  // Go through all of our phi nodes, and kill the arguments associated with unreachable edges.
+  auto ReplaceUnreachablePHIArgs = [&](PHINode &PHI, BasicBlock *BB) {
+    for (auto &Operand : PHI.incoming_values())
+      if (!ReachableEdges.count({PHI.getIncomingBlock(Operand), BB})) {
+        DEBUG(dbgs() << "Replacing incoming value of " << PHI << " for block "
+                     << getBlockName(PHI.getIncomingBlock(Operand))
+                     << " with undef due to it being unreachable\n");
+        Operand.set(UndefValue::get(PHI.getType()));
+      }
+  };
+  SmallPtrSet<BasicBlock *, 8> BlocksWithPhis;
+  for (auto &B : F)
+    if ((!B.empty() && isa<PHINode>(*B.begin())) ||
+        (PHIOfOpsPHIs.find(&B) != PHIOfOpsPHIs.end()))
+      BlocksWithPhis.insert(&B);
+  DenseMap<const BasicBlock *, unsigned> ReachablePredCount;
+  for (auto KV : ReachableEdges)
+    ReachablePredCount[KV.getEnd()]++;
+  for (auto *BB : BlocksWithPhis)
+    // TODO: It would be faster to use getNumIncomingBlocks() on a phi node in
+    // the block and subtract the pred count, but it's more complicated.
+    if (ReachablePredCount.lookup(BB) !=
+        std::distance(pred_begin(BB), pred_end(BB))) {
+      for (auto II = BB->begin(); isa<PHINode>(II); ++II) {
+        auto &PHI = cast<PHINode>(*II);
+        ReplaceUnreachablePHIArgs(PHI, BB);
+      }
+      auto PHIResult = PHIOfOpsPHIs.find(BB);
+      if (PHIResult != PHIOfOpsPHIs.end()) {
+        auto &PHIs = PHIResult->second;
+        for (auto I : PHIs) {
+          auto *PHI = dyn_cast<PHINode>(I);
+          ReplaceUnreachablePHIArgs(*PHI, BB);
         }
       }
     }
-  }
 
   // Map to store the use counts
   DenseMap<const Value *, unsigned int> UseCounts;
-  for (CongruenceClass *CC : reverse(CongruenceClasses)) {
+  for (auto *CC : reverse(CongruenceClasses)) {
     // Track the equivalent store info so we can decide whether to try
     // dead store elimination.
     SmallVector<ValueDFS, 8> PossibleDeadStores;
@@ -3187,7 +3576,7 @@ bool NewGVN::eliminateInstructions(Function &F) {
       DEBUG(dbgs() << "Eliminating in congruence class " << CC->getID()
                    << "\n");
       // If this is a singleton, we can skip it.
-      if (CC->size() != 1) {
+      if (CC->size() != 1 || RealToTemp.lookup(Leader)) {
         // This is a stack because equality replacement/etc may place
         // constants in the middle of the member list, and we want to use
         // those constant values in preference to the current leader, over
@@ -3209,6 +3598,22 @@ bool NewGVN::eliminateInstructions(Function &F) {
           // We ignore void things because we can't get a value from them.
           if (Def && Def->getType()->isVoidTy())
             continue;
+          auto *DefInst = dyn_cast_or_null<Instruction>(Def);
+          if (DefInst && AllTempInstructions.count(DefInst)) {
+            auto *PN = cast<PHINode>(DefInst);
+
+            // If this is a value phi and that's the expression we used, insert
+            // it into the program
+            // remove from temp instruction list.
+            AllTempInstructions.erase(PN);
+            auto *DefBlock = getBlockForValue(Def);
+            DEBUG(dbgs() << "Inserting fully real phi of ops" << *Def
+                         << " into block "
+                         << getBlockName(getBlockForValue(Def)) << "\n");
+            PN->insertBefore(&DefBlock->front());
+            Def = PN;
+            NumGVNPHIOfOpsEliminations++;
+          }
 
           if (EliminationStack.empty()) {
             DEBUG(dbgs() << "Elimination Stack is empty\n");
@@ -3373,7 +3778,6 @@ bool NewGVN::eliminateInstructions(Function &F) {
       }
     }
   }
-
   return AnythingReplaced;
 }
 
@@ -3383,19 +3787,23 @@ bool NewGVN::eliminateInstructions(Function &F) {
 // we will simplify an operation with all constants so that it doesn't matter
 // what order they appear in.
 unsigned int NewGVN::getRank(const Value *V) const {
-  // Prefer undef to anything else
+  // Prefer constants to undef to anything else
+  // Undef is a constant, have to check it first.
+  // Prefer smaller constants to constantexprs
+  if (isa<ConstantExpr>(V))
+    return 2;
   if (isa<UndefValue>(V))
-    return 0;
-  if (isa<Constant>(V))
     return 1;
+  if (isa<Constant>(V))
+    return 0;
   else if (auto *A = dyn_cast<Argument>(V))
-    return 2 + A->getArgNo();
+    return 3 + A->getArgNo();
 
   // Need to shift the instruction DFS by number of arguments + 3 to account for
   // the constant and argument ranking above.
   unsigned Result = InstrToDFSNum(V);
   if (Result > 0)
-    return 3 + NumFuncArgs + Result;
+    return 4 + NumFuncArgs + Result;
   // Unreachable or something else, just return a really large number.
   return ~0;
 }
diff --git a/llvm/test/Transforms/NewGVN/completeness.ll b/llvm/test/Transforms/NewGVN/completeness.ll
new file mode 100644
index 00000000000..bafe5f966d2
--- /dev/null
+++ b/llvm/test/Transforms/NewGVN/completeness.ll
@@ -0,0 +1,415 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define i32 @test1(i32, i8**) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP0:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP5:%.*]]
+; CHECK:         br label [[TMP6:%.*]]
+; CHECK:         br label [[TMP6]]
+; CHECK:         [[TMP7:%.*]] = phi i32 [ 75, [[TMP4]] ], [ 105, [[TMP5]] ]
+; CHECK-NEXT:    [[DOT0:%.*]] = phi i32 [ 5, [[TMP4]] ], [ 7, [[TMP5]] ]
+; CHECK-NEXT:    ret i32 [[TMP7]]
+;
+  %3 = icmp ne i32 %0, 0
+  br i1 %3, label %4, label %5
+
+; <label>:4:                                      ; preds = %2
+  br label %6
+
+; <label>:5:                                      ; preds = %2
+  br label %6
+
+; <label>:6:                                      ; preds = %5, %4
+  %.0 = phi i32 [ 5, %4 ], [ 7, %5 ]
+  %7 = mul nsw i32 %.0, 15
+  ret i32 %7
+}
+
+define i32 @test2(i32) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP0:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
+; CHECK:         br label [[TMP5:%.*]]
+; CHECK:         br label [[TMP5]]
+; CHECK:         [[DOT01:%.*]] = phi i32 [ 3, [[TMP3]] ], [ 2, [[TMP4]] ]
+; CHECK-NEXT:    [[DOT0:%.*]] = phi i32 [ 2, [[TMP3]] ], [ 3, [[TMP4]] ]
+; CHECK-NEXT:    ret i32 5
+;
+  %2 = icmp ne i32 %0, 0
+  br i1 %2, label %3, label %4
+
+; <label>:3:                                      ; preds = %1
+  br label %5
+
+; <label>:4:                                      ; preds = %1
+  br label %5
+
+; <label>:5:                                      ; preds = %4, %3
+  %.01 = phi i32 [ 3, %3 ], [ 2, %4 ]
+  %.0 = phi i32 [ 2, %3 ], [ 3, %4 ]
+  %6 = add nsw i32 %.01, %.0
+  ret i32 %6
+}
+define i32 @test3(i1 %which) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ -877, [[ENTRY:%.*]] ], [ 113, [[DELAY]] ]
+; CHECK-NEXT:    [[A:%.*]] = phi i32 [ 1000, [[ENTRY]] ], [ 10, [[DELAY]] ]
+; CHECK-NEXT:    ret i32 [[TMP0]]
+;
+
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi i32 [ 1000, %entry ], [ 10, %delay ]
+  %value = sub i32 123, %A
+  ret i32 %value
+}
+
+define <2 x i32> @test3vec(i1 %which) {
+; CHECK-LABEL: @test3vec(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i32> [ <i32 -877, i32 -877>, [[ENTRY:%.*]] ], [ <i32 113, i32 113>, [[DELAY]] ]
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 1000, i32 1000>, [[ENTRY]] ], [ <i32 10, i32 10>, [[DELAY]] ]
+; CHECK-NEXT:    ret <2 x i32> [[TMP0]]
+;
+
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi <2 x i32> [ <i32 1000, i32 1000>, %entry ], [ <i32 10, i32 10>, %delay ]
+  %value = sub <2 x i32> <i32 123, i32 123>, %A
+  ret <2 x i32> %value
+}
+
+define <2 x i32> @test3vec2(i1 %which) {
+; CHECK-LABEL: @test3vec2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i32> [ <i32 -877, i32 -2167>, [[ENTRY:%.*]] ], [ <i32 113, i32 303>, [[DELAY]] ]
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 1000, i32 2500>, [[ENTRY]] ], [ <i32 10, i32 30>, [[DELAY]] ]
+; CHECK-NEXT:    ret <2 x i32> [[TMP0]]
+;
+
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi <2 x i32> [ <i32 1000, i32 2500>, %entry ], [ <i32 10, i32 30>, %delay ]
+  %value = sub <2 x i32> <i32 123, i32 333>, %A
+  ret <2 x i32> %value
+}
+
+;; This example is a bit contrived because we can't create fake memoryuses, so we use two loads in the if blocks
+define i32 @test4(i32, i8**, i32* noalias, i32* noalias) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    store i32 5, i32* [[TMP2:%.*]], align 4
+; CHECK-NEXT:    store i32 7, i32* [[TMP3:%.*]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP0:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]]
+; CHECK:         br label [[TMP8:%.*]]
+; CHECK:         br label [[TMP8]]
+; CHECK:         [[DOT01:%.*]] = phi i32 [ 5, [[TMP6]] ], [ 7, [[TMP7]] ]
+; CHECK-NEXT:    [[DOT0:%.*]] = phi i32* [ [[TMP2]], [[TMP6]] ], [ [[TMP3]], [[TMP7]] ]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOT0]], align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = mul nsw i32 [[TMP9]], 15
+; CHECK-NEXT:    [[TMP11:%.*]] = mul nsw i32 [[TMP10]], [[DOT01]]
+; CHECK-NEXT:    ret i32 [[TMP11]]
+;
+  store i32 5, i32* %2, align 4
+  store i32 7, i32* %3, align 4
+  %5 = icmp ne i32 %0, 0
+  br i1 %5, label %6, label %8
+
+; <label>:6:                                      ; preds = %4
+  %7 = load i32, i32* %2, align 4
+  br label %10
+
+; <label>:8:                                      ; preds = %4
+  %9 = load i32, i32* %3, align 4
+  br label %10
+
+; <label>:10:                                     ; preds = %8, %6
+  %.01 = phi i32 [ %7, %6 ], [ %9, %8 ]
+  %.0 = phi i32* [ %2, %6 ], [ %3, %8 ]
+  %11 = load i32, i32* %.0, align 4
+  %12 = mul nsw i32 %11, 15
+  %13 = mul nsw i32 %12, %.01
+  ret i32 %13
+}
+
+@global = common global [100 x i64] zeroinitializer, align 16
+@global.1 = common global [100 x i64] zeroinitializer, align 16
+define i64 @test5(i64 %arg) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca i64, align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[ARG:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[BB28:%.*]], label [[BB2:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    br label [[BB7:%.*]]
+; CHECK:       bb4:
+; CHECK-NEXT:    br label [[BB5:%.*]]
+; CHECK:       bb5:
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP9:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP6]], label [[BB27:%.*]], label [[BB7]]
+; CHECK:       bb7:
+; CHECK-NEXT:    [[TMP8:%.*]] = phi i64 [ [[ARG]], [[BB2]] ], [ [[TMP9]], [[BB5]] ]
+; CHECK-NEXT:    [[TMP9]] = add nsw i64 [[TMP8]], -1
+; CHECK-NEXT:    [[TMP10:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @global, i64 0, i64 0), align 16
+; CHECK-NEXT:    [[TMP11:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @global.1, i64 0, i64 0), align 16
+; CHECK-NEXT:    [[TMP12:%.*]] = mul nsw i64 [[TMP11]], [[TMP10]]
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[TMP12]], 0
+; CHECK-NEXT:    br i1 [[TMP13]], label [[BB5]], label [[BB14:%.*]]
+; CHECK:       bb14:
+; CHECK-NEXT:    br label [[BB15:%.*]]
+; CHECK:       bb15:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i64 [ [[TMP25:%.*]], [[BB15]] ], [ [[TMP12]], [[BB14]] ]
+; CHECK-NEXT:    [[TMP16:%.*]] = phi i64 [ [[TMP24:%.*]], [[BB15]] ], [ [[TMP11]], [[BB14]] ]
+; CHECK-NEXT:    [[TMP17:%.*]] = phi i64 [ [[TMP22:%.*]], [[BB15]] ], [ [[TMP10]], [[BB14]] ]
+; CHECK-NEXT:    [[TMP18:%.*]] = phi i64 [ [[TMP20:%.*]], [[BB15]] ], [ 0, [[BB14]] ]
+; CHECK-NEXT:    store i64 [[TMP0]], i64* [[TMP]], align 8
+; CHECK-NEXT:    [[TMP20]] = add nuw nsw i64 [[TMP18]], 1
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [100 x i64], [100 x i64]* @global, i64 0, i64 [[TMP20]]
+; CHECK-NEXT:    [[TMP22]] = load i64, i64* [[TMP21]], align 8
+; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [100 x i64], [100 x i64]* @global.1, i64 0, i64 [[TMP20]]
+; CHECK-NEXT:    [[TMP24]] = load i64, i64* [[TMP23]], align 8
+; CHECK-NEXT:    [[TMP25]] = mul nsw i64 [[TMP24]], [[TMP22]]
+; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[TMP20]], [[TMP25]]
+; CHECK-NEXT:    br i1 [[TMP26]], label [[BB4:%.*]], label [[BB15]]
+; CHECK:       bb27:
+; CHECK-NEXT:    br label [[BB28]]
+; CHECK:       bb28:
+; CHECK-NEXT:    ret i64 0
+;
+bb:
+  %tmp = alloca i64, align 8
+  %tmp1 = icmp eq i64 %arg, 0
+  br i1 %tmp1, label %bb28, label %bb2
+
+bb2:                                              ; preds = %bb
+  %tmp3 = bitcast i64* %tmp to i8*
+  br label %bb7
+
+bb4:                                              ; preds = %bb15
+  br label %bb5
+
+bb5:                                              ; preds = %bb7, %bb4
+  %tmp6 = icmp eq i64 %tmp9, 0
+  br i1 %tmp6, label %bb27, label %bb7
+
+bb7:                                              ; preds = %bb5, %bb2
+  %tmp8 = phi i64 [ %arg, %bb2 ], [ %tmp9, %bb5 ]
+  %tmp9 = add nsw i64 %tmp8, -1
+  %tmp10 = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @global, i64 0, i64 0), align 16
+  %tmp11 = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @global.1, i64 0, i64 0), align 16
+  %tmp12 = mul nsw i64 %tmp11, %tmp10
+  %tmp13 = icmp eq i64 %tmp12, 0
+  br i1 %tmp13, label %bb5, label %bb14
+
+bb14:                                             ; preds = %bb7
+  br label %bb15
+
+bb15:                                             ; preds = %bb15, %bb14
+  %tmp16 = phi i64 [ %tmp24, %bb15 ], [ %tmp11, %bb14 ]
+  %tmp17 = phi i64 [ %tmp22, %bb15 ], [ %tmp10, %bb14 ]
+  %tmp18 = phi i64 [ %tmp20, %bb15 ], [ 0, %bb14 ]
+;; This multiply is an op of phis which is really equivalent to phi(tmp25, tmp12)
+  %tmp19 = mul nsw i64 %tmp16, %tmp17
+  store i64 %tmp19, i64* %tmp, align 8
+  %tmp20 = add nuw nsw i64 %tmp18, 1
+  %tmp21 = getelementptr inbounds [100 x i64], [100 x i64]* @global, i64 0, i64 %tmp20
+  %tmp22 = load i64, i64* %tmp21, align 8
+  %tmp23 = getelementptr inbounds [100 x i64], [100 x i64]* @global.1, i64 0, i64 %tmp20
+  %tmp24 = load i64, i64* %tmp23, align 8
+  %tmp25 = mul nsw i64 %tmp24, %tmp22
+  %tmp26 = icmp eq i64 %tmp20, %tmp25
+  br i1 %tmp26, label %bb4, label %bb15
+
+bb27:                                             ; preds = %bb5
+  br label %bb28
+
+bb28:                                             ; preds = %bb27, %bb
+  ret i64 0
+}
+
+;; These icmps are all equivalent to phis of constants
+define i8 @test6(i8* %addr) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:  entry-block:
+; CHECK-NEXT:    br label %main-loop
+; CHECK:       main-loop:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i1 [ true, %entry-block ], [ false, [[CORE:%.*]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i1 [ false, %entry-block ], [ true, [[CORE]] ]
+; CHECK-NEXT:    [[PHI:%.*]] = phi i8 [ 0, %entry-block ], [ 1, [[CORE]] ]
+; CHECK-NEXT:    store volatile i8 0, i8* [[ADDR:%.*]]
+; CHECK-NEXT:    br i1 [[TMP0]], label %busy-wait-phi-0, label [[EXIT:%.*]]
+; CHECK:       busy-wait-phi-0:
+; CHECK-NEXT:    [[LOAD:%.*]] = load volatile i8, i8* [[ADDR]]
+; CHECK-NEXT:    [[ICMP:%.*]] = icmp eq i8 [[LOAD]], 0
+; CHECK-NEXT:    br i1 [[ICMP]], label %busy-wait-phi-0, label [[CORE]]
+; CHECK:       core:
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TRAP:%.*]], label %main-loop
+; CHECK:       trap:
+; CHECK-NEXT:    ret i8 1
+; CHECK:       exit:
+; CHECK-NEXT:    ret i8 0
+;
+entry-block:
+  br label %main-loop
+
+main-loop:
+  %phi = phi i8 [ 0, %entry-block ], [ 1, %core ]
+  %switch_0 = icmp eq i8 %phi, 0
+  store volatile i8 0, i8* %addr
+  br i1 %switch_0, label %busy-wait-phi-0, label %exit
+
+busy-wait-phi-0:
+  %load = load volatile i8, i8* %addr
+  %icmp = icmp eq i8 %load, 0
+  br i1 %icmp, label %busy-wait-phi-0, label %core
+
+core:
+  %switch_1 = icmp eq i8 %phi, 1
+  br i1 %switch_1, label %trap, label %main-loop
+
+trap:
+  ret i8 1
+
+exit:
+  ret i8 0
+}
+
+; Test that we don't infinite loop simplifying
+; an undefined value that can go both ways.
+define void @test7() {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br label [[BB1]]
+;
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb1, %bb
+  %tmp = phi i32 [ undef, %bb ], [ %tmp3, %bb1 ]
+  %tmp2 = icmp eq i32 %tmp, 0
+  %tmp3 = select i1 %tmp2, i32 1, i32 %tmp
+  br label %bb1
+}
+
+
+
+; Test that we get a consistent answer about what the
+; value of this undefined select is.
+define void @test8() {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br label [[BB1]]
+;
+bb:
+  %tmp = select i1 undef, i8 0, i8 1
+  br label %bb1
+
+bb1:                                              ; preds = %bb1, %bb
+  %tmp2 = phi i8 [ %tmp4, %bb1 ], [ %tmp, %bb ]
+  %tmp3 = icmp eq i8 %tmp2, 0
+  %tmp4 = select i1 %tmp3, i8 1, i8 %tmp2
+  br label %bb1
+}
+
+
+;; Make sure we handle the case where we later come up with an expression that we need
+;; for a phi of ops.
+define void @test9() {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br i1 undef, label [[BB1]], label [[BB2:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    br label [[BB6:%.*]]
+; CHECK:       bb6:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ -13, [[BB2]] ], [ [[TMP11:%.*]], [[BB6]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = phi i32 [ 1, [[BB2]] ], [ [[TMP8:%.*]], [[BB6]] ]
+; CHECK-NEXT:    [[TMP8]] = add nuw nsw i32 [[TMP7]], 1
+; CHECK-NEXT:    [[TMP11]] = add i32 -14, [[TMP8]]
+; CHECK-NEXT:    br label [[BB6]]
+;
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb1, %bb
+  br i1 undef, label %bb1, label %bb2
+
+bb2:                                              ; preds = %bb1
+  %tmp = select i1 true, i32 -14, i32 -10
+  %tmp3 = add i32 %tmp, 0
+  %tmp4 = select i1 true, i32 -14, i32 -10
+  %tmp5 = add i32 %tmp4, 0
+  br label %bb6
+
+bb6:                                              ; preds = %bb6, %bb2
+  %tmp7 = phi i32 [ 1, %bb2 ], [ %tmp13, %bb6 ]
+  %tmp8 = add nuw nsw i32 %tmp7, 1
+  %tmp9 = add i32 %tmp3, %tmp7
+  %tmp10 = select i1 false, i32 undef, i32 %tmp9
+  %tmp11 = add i32 %tmp5, %tmp8
+  %tmp12 = select i1 undef, i32 undef, i32 %tmp11
+  %tmp13 = add nuw nsw i32 %tmp7, 1
+  br label %bb6
+}
+
+;; Ensure that we revisit predicateinfo operands at the right points in time.
+define void @test10() {
+b:
+  %m = getelementptr i32, i32* null, i64 8
+  br label %g
+
+g:                                                ; preds = %i, %b
+  %n = phi i32* [ %h, %i ], [ null, %b ]
+  %h = getelementptr i32, i32* %n, i64 1
+  %j = icmp eq i32* %h, %m
+  br i1 %j, label %c, label %i
+
+i:                                                ; preds = %g
+  br i1 undef, label %k, label %g
+
+k:                                                ; preds = %i
+  %l = icmp eq i32* %n, %m
+  br i1 %l, label %c, label %o
+
+o:                                                ; preds = %k
+  br label %c
+
+c:                                                ; preds = %o, %k, %g
+  %0 = phi i32* [ undef, %o ], [ %m, %k ], [ %m, %g ]
+  ret void
+}
diff --git a/llvm/test/Transforms/NewGVN/storeoverstore.ll b/llvm/test/Transforms/NewGVN/storeoverstore.ll
index 49b55d430dc..28f5eea03ce 100644
--- a/llvm/test/Transforms/NewGVN/storeoverstore.ll
+++ b/llvm/test/Transforms/NewGVN/storeoverstore.ll
@@ -13,11 +13,11 @@ define i32 @foo(i32*, i32)  {
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP1:%.*]], 0
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP5:%.*]]
 ; CHECK:         br label [[TMP5]]
-; CHECK:         [[DOT0:%.*]] = phi i32 [ 10, [[TMP4]] ], [ 5, [[TMP2:%.*]] ]
-; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP6:%.*]], label [[TMP8:%.*]]
-; CHECK:         [[TMP7:%.*]] = add nsw i32 [[DOT0]], 5
-; CHECK-NEXT:    br label [[TMP8]]
-; CHECK:         [[DOT1:%.*]] = phi i32 [ [[TMP7]], [[TMP6]] ], [ [[DOT0]], [[TMP5]] ]
+; CHECK:         [[TMP6:%.*]] = phi i32 [ 15, [[TMP4]] ], [ 10, [[TMP2:%.*]] ]
+; CHECK-NEXT:    [[DOT0:%.*]] = phi i32 [ 10, [[TMP4]] ], [ 5, [[TMP2]] ]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP7:%.*]], label [[TMP8:%.*]]
+; CHECK:         br label [[TMP8]]
+; CHECK:         [[DOT1:%.*]] = phi i32 [ [[TMP6]], [[TMP7]] ], [ [[DOT0]], [[TMP5]] ]
 ; CHECK-NEXT:    ret i32 [[DOT1]]
 ;
   store i32 5, i32* %0, align 4
@@ -54,11 +54,11 @@ define i32 @foo2(i32*, i32)  {
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP5:%.*]]
 ; CHECK:         br label [[TMP6:%.*]]
 ; CHECK:         br label [[TMP6]]
-; CHECK:         [[DOT0:%.*]] = phi i32 [ 10, [[TMP4]] ], [ 5, [[TMP5]] ]
-; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP7:%.*]], label [[TMP9:%.*]]
-; CHECK:         [[TMP8:%.*]] = add nsw i32 [[DOT0]], 5
-; CHECK-NEXT:    br label [[TMP9]]
-; CHECK:         [[DOT1:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ [[DOT0]], [[TMP6]] ]
+; CHECK:         [[TMP7:%.*]] = phi i32 [ 15, [[TMP4]] ], [ 10, [[TMP5]] ]
+; CHECK-NEXT:    [[DOT0:%.*]] = phi i32 [ 10, [[TMP4]] ], [ 5, [[TMP5]] ]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]]
+; CHECK:         br label [[TMP9]]
+; CHECK:         [[DOT1:%.*]] = phi i32 [ [[TMP7]], [[TMP8]] ], [ [[DOT0]], [[TMP6]] ]
 ; CHECK-NEXT:    ret i32 [[DOT1]]
 ;
   store i32 5, i32* %0, align 4