diff options
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Transforms/Scalar/NewGVN.cpp | 642 | ||||
-rw-r--r-- | llvm/test/Transforms/NewGVN/completeness.ll | 415 | ||||
-rw-r--r-- | llvm/test/Transforms/NewGVN/storeoverstore.ll | 20 |
3 files changed, 950 insertions, 127 deletions
diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp index 4cca61003b6..133049004ab 100644 --- a/llvm/lib/Transforms/Scalar/NewGVN.cpp +++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -30,9 +30,19 @@ /// tracks what operations have a given value number (IE it also tracks the /// reverse mapping from value number -> operations with that value number), so /// that it only needs to reprocess the instructions that are affected when -/// something's value number changes. The rest of the algorithm is devoted to -/// performing symbolic evaluation, forward propagation, and simplification of -/// operations based on the value numbers deduced so far. +/// something's value number changes. The vast majority of complexity and code +/// in this file is devoted to tracking what value numbers could change for what +/// instructions when various things happen. The rest of the algorithm is +/// devoted to performing symbolic evaluation, forward propagation, and +/// simplification of operations based on the value numbers deduced so far +/// +/// In order to make the GVN mostly-complete, we use a technique derived from +/// "Detection of Redundant Expressions: A Complete and Polynomial-time +/// Algorithm in SSA" by R.R. Pai. The source of incompleteness in most SSA +/// based GVN algorithms is related to their inability to detect equivalence +/// between phi of ops (IE phi(a+b, c+d)) and op of phis (phi(a,c) + phi(b, d)). +/// We resolve this issue by generating the equivalent "phi of ops" form for +/// each op of phis we see, in a way that only takes polynomial time to resolve. /// /// We also do not perform elimination by using any published algorithm. All /// published algorithms are O(Instructions). Instead, we use a technique that @@ -105,9 +115,13 @@ STATISTIC(NumGVNSortedLeaderChanges, "Number of sorted leader changes"); STATISTIC(NumGVNAvoidedSortedLeaderChanges, "Number of avoided sorted leader changes"); STATISTIC(NumGVNDeadStores, "Number of redundant/dead stores eliminated"); +STATISTIC(NumGVNPHIOfOpsCreated, "Number of PHI of ops created"); +STATISTIC(NumGVNPHIOfOpsEliminations, + "Number of things eliminated using PHI of ops"); DEBUG_COUNTER(VNCounter, "newgvn-vn", "Controls which instructions are value numbered") - +DEBUG_COUNTER(PHIOfOpsCounter, "newgvn-phi", + "Controls which instructions we create phi of ops for") // Currently store defining access refinement is too slow due to basicaa being // egregiously slow. This flag lets us keep it working while we work on this // issue. @@ -170,10 +184,9 @@ private: } } // See if we really were the root of a component, by seeing if we still have - // our DFSNumber. - // If we do, we are the root of the component, and we have completed a - // component. If we do not, - // we are not the root of a component, and belong on the component stack. + // our DFSNumber. If we do, we are the root of the component, and we have + // completed a component. If we do not, we are not the root of a component, + // and belong on the component stack. if (Root.lookup(I) == OurDFS) { unsigned ComponentID = Components.size(); Components.resize(Components.size() + 1); @@ -365,6 +378,7 @@ private: int StoreCount = 0; }; +struct HashedExpression; namespace llvm { template <> struct DenseMapInfo<const Expression *> { static const Expression *getEmptyKey() { @@ -377,9 +391,11 @@ template <> struct DenseMapInfo<const Expression *> { Val <<= PointerLikeTypeTraits<const Expression *>::NumLowBitsAvailable; return reinterpret_cast<const Expression *>(Val); } - static unsigned getHashValue(const Expression *V) { - return static_cast<unsigned>(V->getHashValue()); + static unsigned getHashValue(const Expression *E) { + return static_cast<unsigned>(E->getHashValue()); } + static unsigned getHashValue(const HashedExpression &HE); + static bool isEqual(const HashedExpression &LHS, const Expression *RHS); static bool isEqual(const Expression *LHS, const Expression *RHS) { if (LHS == RHS) return true; @@ -391,6 +407,26 @@ template <> struct DenseMapInfo<const Expression *> { }; } // end namespace llvm +// This is just a wrapper around Expression that computes the hash value once at +// creation time. Hash values for an Expression can't change once they are +// inserted into the DenseMap (it breaks DenseMap), so they must be immutable at +// that point anyway. +struct HashedExpression { + const Expression *E; + unsigned HashVal; + HashedExpression(const Expression *E) + : E(E), HashVal(DenseMapInfo<const Expression *>::getHashValue(E)) {} +}; + +unsigned +DenseMapInfo<const Expression *>::getHashValue(const HashedExpression &HE) { + return HE.HashVal; +} +bool DenseMapInfo<const Expression *>::isEqual(const HashedExpression &LHS, + const Expression *RHS) { + return isEqual(LHS.E, RHS); +} + namespace { class NewGVN { Function &F; @@ -428,6 +464,33 @@ class NewGVN { // Value Mappings. DenseMap<Value *, CongruenceClass *> ValueToClass; DenseMap<Value *, const Expression *> ValueToExpression; + // Value PHI handling, used to make equivalence between phi(op, op) and + // op(phi, phi). + // These mappings just store various data that would normally be part of the + // IR. + DenseSet<const Instruction *> PHINodeUses; + // Map a temporary instruction we created to a parent block. + DenseMap<const Value *, BasicBlock *> TempToBlock; + // Map between the temporary phis we created and the real instructions they + // are known equivalent to. + DenseMap<const Value *, PHINode *> RealToTemp; + // In order to know when we should re-process instructions that have + // phi-of-ops, we track the set of expressions that they needed as + // leaders. When we discover new leaders for those expressions, we process the + // associated phi-of-op instructions again in case they have changed. The + // other way they may change is if they had leaders, and those leaders + // disappear. However, at the point they have leaders, there are uses of the + // relevant operands in the created phi node, and so they will get reprocessed + // through the normal user marking we perform. + mutable DenseMap<const Value *, SmallPtrSet<Value *, 2>> AdditionalUsers; + DenseMap<const Expression *, SmallPtrSet<Instruction *, 2>> + ExpressionToPhiOfOps; + // Map from basic block to the temporary operations we created + DenseMap<const BasicBlock *, SmallVector<Instruction *, 8>> PHIOfOpsPHIs; + // Map from temporary operation to MemoryAccess. + DenseMap<const Instruction *, MemoryUseOrDef *> TempToMemory; + // Set of all temporary instructions we created. + DenseSet<Instruction *> AllTempInstructions; // Mapping from predicate info we used to the instructions we used it with. // In order to correctly ensure propagation, we must keep track of what @@ -460,8 +523,8 @@ class NewGVN { enum MemoryPhiState { MPS_Invalid, MPS_TOP, MPS_Equivalent, MPS_Unique }; DenseMap<const MemoryPhi *, MemoryPhiState> MemoryPhiState; - enum PhiCycleState { PCS_Unknown, PCS_CycleFree, PCS_Cycle }; - mutable DenseMap<const PHINode *, PhiCycleState> PhiCycleState; + enum InstCycleState { ICS_Unknown, ICS_CycleFree, ICS_Cycle }; + mutable DenseMap<const Instruction *, InstCycleState> InstCycleState; // Expression to class mapping. using ExpressionClassMap = DenseMap<const Expression *, CongruenceClass *>; ExpressionClassMap ExpressionToClass; @@ -519,7 +582,7 @@ private: const Expression *createBinaryExpression(unsigned, Type *, Value *, Value *) const; PHIExpression *createPHIExpression(Instruction *, bool &HasBackEdge, - bool &AllConstant) const; + bool &OriginalOpsConstant) const; const VariableExpression *createVariableExpression(Value *) const; const ConstantExpression *createConstantExpression(Constant *) const; const Expression *createVariableOrConstant(Value *V) const; @@ -560,6 +623,9 @@ private: return CClass; } void initializeCongruenceClasses(Function &F); + const Expression *makePossiblePhiOfOps(Instruction *, bool, + SmallPtrSetImpl<Value *> &); + void addPhiOfOps(PHINode *Op, BasicBlock *BB, Instruction *ExistingValue); // Value number an Instruction or MemoryPhi. void valueNumberMemoryPhi(MemoryPhi *); @@ -568,7 +634,8 @@ private: // Symbolic evaluation. const Expression *checkSimplificationResults(Expression *, Instruction *, Value *) const; - const Expression *performSymbolicEvaluation(Value *) const; + const Expression *performSymbolicEvaluation(Value *, + SmallPtrSetImpl<Value *> &) const; const Expression *performSymbolicLoadCoercion(Type *, Value *, LoadInst *, Instruction *, MemoryAccess *) const; @@ -593,7 +660,7 @@ private: bool setMemoryClass(const MemoryAccess *From, CongruenceClass *To); CongruenceClass *getMemoryClass(const MemoryAccess *MA) const; const MemoryAccess *lookupMemoryLeader(const MemoryAccess *) const; - bool isMemoryAccessTop(const MemoryAccess *) const; + bool isMemoryAccessTOP(const MemoryAccess *) const; // Ranking unsigned int getRank(const Value *) const; @@ -617,6 +684,7 @@ private: void replaceInstruction(Instruction *, Value *); void markInstructionForDeletion(Instruction *); void deleteInstructionsInBlock(BasicBlock *); + Value *findPhiOfOpsLeader(const Expression *E, const BasicBlock *BB) const; // New instruction creation. void handleNewInstruction(Instruction *){}; @@ -628,8 +696,10 @@ private: void markPredicateUsersTouched(Instruction *); void markValueLeaderChangeTouched(CongruenceClass *CC); void markMemoryLeaderChangeTouched(CongruenceClass *CC); + void markPhiOfOpsChanged(const HashedExpression &HE); void addPredicateUsers(const PredicateBase *, Instruction *) const; void addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) const; + void addAdditionalUsers(Value *To, Value *User) const; // Main loop of value numbering void iterateTouchedInstructions(); @@ -637,7 +707,7 @@ private: // Utilities. void cleanupTables(); std::pair<unsigned, unsigned> assignDFSNumbers(BasicBlock *, unsigned); - void updateProcessedCount(Value *V); + void updateProcessedCount(const Value *V); void verifyMemoryCongruency() const; void verifyIterationSettled(Function &F); void verifyStoreExpressions() const; @@ -645,6 +715,10 @@ private: const MemoryAccess *, const MemoryAccess *) const; BasicBlock *getBlockForValue(Value *V) const; void deleteExpression(const Expression *E) const; + MemoryUseOrDef *getMemoryAccess(const Instruction *) const; + MemoryAccess *getDefiningAccess(const MemoryAccess *) const; + MemoryPhi *getMemoryAccess(const BasicBlock *) const; + template <class T, class Range> T *getMinDFSOfRange(const Range &) const; unsigned InstrToDFSNum(const Value *V) const { assert(isa<Instruction>(V) && "This should not be used for MemoryAccesses"); return InstrDFS.lookup(V); @@ -664,8 +738,8 @@ private: ? InstrToDFSNum(cast<MemoryUseOrDef>(MA)->getMemoryInst()) : InstrDFS.lookup(MA); } - bool isCycleFree(const PHINode *PN) const; - template <class T, class Range> T *getMinDFSOfRange(const Range &) const; + bool isCycleFree(const Instruction *) const; + bool isBackedge(BasicBlock *From, BasicBlock *To) const; // Debug counter info. When verifying, we have to reset the value numbering // debug counter to the same state it started in to get the same results. std::pair<int, int> StartingVNCounter; @@ -693,20 +767,46 @@ bool StoreExpression::equals(const Expression &Other) const { return true; } +// Determine if the edge From->To is a backedge +bool NewGVN::isBackedge(BasicBlock *From, BasicBlock *To) const { + if (From == To) + return true; + auto *FromDTN = DT->getNode(From); + auto *ToDTN = DT->getNode(To); + return RPOOrdering.lookup(FromDTN) >= RPOOrdering.lookup(ToDTN); +} + #ifndef NDEBUG static std::string getBlockName(const BasicBlock *B) { return DOTGraphTraits<const Function *>::getSimpleNodeLabel(B, nullptr); } #endif +// Get a MemoryAccess for an instruction, fake or real. +MemoryUseOrDef *NewGVN::getMemoryAccess(const Instruction *I) const { + auto *Result = MSSA->getMemoryAccess(I); + return Result ? Result : TempToMemory.lookup(I); +} + +// Get a MemoryPhi for a basic block. These are all real. +MemoryPhi *NewGVN::getMemoryAccess(const BasicBlock *BB) const { + return MSSA->getMemoryAccess(BB); +} + // Get the basic block from an instruction/memory value. BasicBlock *NewGVN::getBlockForValue(Value *V) const { - if (auto *I = dyn_cast<Instruction>(V)) - return I->getParent(); - else if (auto *MP = dyn_cast<MemoryPhi>(V)) - return MP->getBlock(); - llvm_unreachable("Should have been able to figure out a block for our value"); - return nullptr; + if (auto *I = dyn_cast<Instruction>(V)) { + auto *Parent = I->getParent(); + if (Parent) + return Parent; + Parent = TempToBlock.lookup(V); + assert(Parent && "Every fake instruction should have a block"); + return Parent; + } + + auto *MP = dyn_cast<MemoryPhi>(V); + assert(MP && "Should have been an instruction or a MemoryPhi"); + return MP->getBlock(); } // Delete a definitely dead expression, so it can be reused by the expression @@ -718,10 +818,9 @@ void NewGVN::deleteExpression(const Expression *E) const { const_cast<BasicExpression *>(BE)->deallocateOperands(ArgRecycler); ExpressionAllocator.Deallocate(E); } - PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge, - bool &AllConstant) const { - BasicBlock *PHIBlock = I->getParent(); + bool &OriginalOpsConstant) const { + BasicBlock *PHIBlock = getBlockForValue(I); auto *PN = cast<PHINode>(I); auto *E = new (ExpressionAllocator) PHIExpression(PN->getNumOperands(), PHIBlock); @@ -730,8 +829,6 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge, E->setType(I->getType()); E->setOpcode(I->getOpcode()); - unsigned PHIRPO = RPOOrdering.lookup(DT->getNode(PHIBlock)); - // NewGVN assumes the operands of a PHI node are in a consistent order across // PHIs. LLVM doesn't seem to always guarantee this. While we need to fix // this in LLVM at some point we don't want GVN to find wrong congruences. @@ -752,14 +849,12 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge, auto Filtered = make_filter_range(PHIOperands, [&](const Use *U) { return ReachableEdges.count({PN->getIncomingBlock(*U), PHIBlock}); }); - std::transform(Filtered.begin(), Filtered.end(), op_inserter(E), [&](const Use *U) -> Value * { auto *BB = PN->getIncomingBlock(*U); - auto *DTN = DT->getNode(BB); - if (RPOOrdering.lookup(DTN) >= PHIRPO) - HasBackedge = true; - AllConstant &= isa<UndefValue>(*U) || isa<Constant>(*U); + HasBackedge = HasBackedge || isBackedge(BB, PHIBlock); + OriginalOpsConstant = + OriginalOpsConstant && isa<Constant>(*U); // Don't try to transform self-defined phis. if (*U == PN) @@ -784,7 +879,7 @@ bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E) const { // whether all members are constant. std::transform(I->op_begin(), I->op_end(), op_inserter(E), [&](Value *O) { auto Operand = lookupOperandLeader(O); - AllConstant &= isa<Constant>(Operand); + AllConstant = AllConstant && isa<Constant>(Operand); return Operand; }); @@ -1053,7 +1148,7 @@ const MemoryAccess *NewGVN::lookupMemoryLeader(const MemoryAccess *MA) const { // Return true if the MemoryAccess is really equivalent to everything. This is // equivalent to the lattice value "TOP" in most lattices. This is the initial // state of all MemoryAccesses. -bool NewGVN::isMemoryAccessTop(const MemoryAccess *MA) const { +bool NewGVN::isMemoryAccessTOP(const MemoryAccess *MA) const { return getMemoryClass(MA) == TOPClass; } @@ -1099,7 +1194,7 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) const { // Unlike loads, we never try to eliminate stores, so we do not check if they // are simple and avoid value numbering them. auto *SI = cast<StoreInst>(I); - auto *StoreAccess = MSSA->getMemoryAccess(SI); + auto *StoreAccess = getMemoryAccess(SI); // Get the expression, if any, for the RHS of the MemoryDef. const MemoryAccess *StoreRHS = StoreAccess->getDefiningAccess(); if (EnableStoreRefinement) @@ -1136,7 +1231,7 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) const { dyn_cast<LoadInst>(lookupOperandLeader(SI->getValueOperand()))) { if ((lookupOperandLeader(LI->getPointerOperand()) == lookupOperandLeader(SI->getPointerOperand())) && - (lookupMemoryLeader(MSSA->getMemoryAccess(LI)->getDefiningAccess()) == + (lookupMemoryLeader(getMemoryAccess(LI)->getDefiningAccess()) == StoreRHS)) return createVariableExpression(LI); } @@ -1240,8 +1335,9 @@ const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) const { // Load of undef is undef. if (isa<UndefValue>(LoadAddressLeader)) return createConstantExpression(UndefValue::get(LI->getType())); - - MemoryAccess *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(I); + MemoryAccess *OriginalAccess = getMemoryAccess(I); + MemoryAccess *DefiningAccess = + MSSAWalker->getClobberingMemoryAccess(OriginalAccess); if (!MSSA->isLiveOnEntryDef(DefiningAccess)) { if (auto *MD = dyn_cast<MemoryDef>(DefiningAccess)) { @@ -1330,6 +1426,7 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) const { // operands are equal, because assumes must always be true. if (CmpInst::isTrueWhenEqual(Predicate)) { addPredicateUsers(PI, I); + addAdditionalUsers(Cmp->getOperand(0), I); return createVariableOrConstant(FirstOp); } } @@ -1342,6 +1439,7 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) const { if ((PBranch->TrueEdge && Predicate == CmpInst::ICMP_EQ) || (!PBranch->TrueEdge && Predicate == CmpInst::ICMP_NE)) { addPredicateUsers(PI, I); + addAdditionalUsers(Cmp->getOperand(0), I); return createVariableOrConstant(FirstOp); } // Handle the special case of floating point. @@ -1349,6 +1447,7 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) const { (!PBranch->TrueEdge && Predicate == CmpInst::FCMP_UNE)) && isa<ConstantFP>(FirstOp) && !cast<ConstantFP>(FirstOp)->isZero()) { addPredicateUsers(PI, I); + addAdditionalUsers(Cmp->getOperand(0), I); return createConstantExpression(cast<Constant>(FirstOp)); } } @@ -1429,34 +1528,33 @@ bool NewGVN::setMemoryClass(const MemoryAccess *From, return Changed; } -// Determine if a phi is cycle-free. That means the values in the phi don't -// depend on any expressions that can change value as a result of the phi. -// For example, a non-cycle free phi would be v = phi(0, v+1). -bool NewGVN::isCycleFree(const PHINode *PN) const { - // In order to compute cycle-freeness, we do SCC finding on the phi, and see - // what kind of SCC it ends up in. If it is a singleton, it is cycle-free. - // If it is not in a singleton, it is only cycle free if the other members are - // all phi nodes (as they do not compute anything, they are copies). TODO: - // There are likely a few other intrinsics or expressions that could be - // included here, but this happens so infrequently already that it is not - // likely to be worth it. - auto PCS = PhiCycleState.lookup(PN); - if (PCS == PCS_Unknown) { - SCCFinder.Start(PN); - auto &SCC = SCCFinder.getComponentFor(PN); +// Determine if a instruction is cycle-free. That means the values in the +// instruction don't depend on any expressions that can change value as a result +// of the instruction. For example, a non-cycle free instruction would be v = +// phi(0, v+1). +bool NewGVN::isCycleFree(const Instruction *I) const { + // In order to compute cycle-freeness, we do SCC finding on the instruction, + // and see what kind of SCC it ends up in. If it is a singleton, it is + // cycle-free. If it is not in a singleton, it is only cycle free if the + // other members are all phi nodes (as they do not compute anything, they are + // copies). + auto ICS = InstCycleState.lookup(I); + if (ICS == ICS_Unknown) { + SCCFinder.Start(I); + auto &SCC = SCCFinder.getComponentFor(I); // It's cycle free if it's size 1 or or the SCC is *only* phi nodes. if (SCC.size() == 1) - PhiCycleState.insert({PN, PCS_CycleFree}); + InstCycleState.insert({I, ICS_CycleFree}); else { bool AllPhis = llvm::all_of(SCC, [](const Value *V) { return isa<PHINode>(V); }); - PCS = AllPhis ? PCS_CycleFree : PCS_Cycle; + ICS = AllPhis ? ICS_CycleFree : ICS_Cycle; for (auto *Member : SCC) if (auto *MemberPhi = dyn_cast<PHINode>(Member)) - PhiCycleState.insert({MemberPhi, PCS}); + InstCycleState.insert({MemberPhi, ICS}); } } - if (PCS == PCS_Cycle) + if (ICS == ICS_Cycle) return false; return true; } @@ -1518,7 +1616,7 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const { // constants, or all operands are ignored but the undef, it also must be // cycle free. if (!AllConstant && HasBackedge && NumOps > 0 && - !isa<UndefValue>(AllSameValue) && !isCycleFree(cast<PHINode>(I))) + !isa<UndefValue>(AllSameValue) && !isCycleFree(I)) return E; // Only have to check for instructions @@ -1689,8 +1787,18 @@ const Expression *NewGVN::performSymbolicCmpEvaluation(Instruction *I) const { return createExpression(I); } +// Return true if V is a value that will always be available (IE can +// be placed anywhere) in the function. We don't do globals here +// because they are often worse to put in place. +// TODO: Separate cost from availability +static bool alwaysAvailable(Value *V) { + return isa<Constant>(V) || isa<Argument>(V); +} + // Substitute and symbolize the value before value numbering. -const Expression *NewGVN::performSymbolicEvaluation(Value *V) const { +const Expression * +NewGVN::performSymbolicEvaluation(Value *V, + SmallPtrSetImpl<Value *> &Visited) const { const Expression *E = nullptr; if (auto *C = dyn_cast<Constant>(V)) E = createConstantExpression(C); @@ -1768,12 +1876,22 @@ const Expression *NewGVN::performSymbolicEvaluation(Value *V) const { return E; } +void NewGVN::addAdditionalUsers(Value *To, Value *User) const { + AdditionalUsers[To].insert(User); +} + void NewGVN::markUsersTouched(Value *V) { // Now mark the users as touched. for (auto *User : V->users()) { assert(isa<Instruction>(User) && "Use of value not within an instruction?"); TouchedInstructions.set(InstrToDFSNum(User)); } + const auto Result = AdditionalUsers.find(V); + if (Result != AdditionalUsers.end()) { + for (auto *User : Result->second) + TouchedInstructions.set(InstrToDFSNum(User)); + AdditionalUsers.erase(Result); + } } void NewGVN::addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) const { @@ -1800,6 +1918,10 @@ void NewGVN::markMemoryUsersTouched(const MemoryAccess *MA) { // Add I to the set of users of a given predicate. void NewGVN::addPredicateUsers(const PredicateBase *PB, Instruction *I) const { + // Don't add temporary instructions to the user lists. + if (AllTempInstructions.count(I)) + return; + if (auto *PBranch = dyn_cast<PredicateBranch>(PB)) PredicateToUsers[PBranch->Condition].insert(I); else if (auto *PAssume = dyn_cast<PredicateBranch>(PB)) @@ -1855,11 +1977,11 @@ const MemoryAccess *NewGVN::getNextMemoryLeader(CongruenceClass *CC) const { assert(!CC->definesNoMemory() && "Can't get next leader if there is none"); if (CC->getStoreCount() > 0) { if (auto *NL = dyn_cast_or_null<StoreInst>(CC->getNextLeader().first)) - return MSSA->getMemoryAccess(NL); + return getMemoryAccess(NL); // Find the store with the minimum DFS number. auto *V = getMinDFSOfRange<Value>(make_filter_range( *CC, [&](const Value *V) { return isa<StoreInst>(V); })); - return MSSA->getMemoryAccess(cast<StoreInst>(V)); + return getMemoryAccess(cast<StoreInst>(V)); } assert(CC->getStoreCount() == 0); @@ -1982,7 +2104,7 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E, // instructions before. // If it's not a memory use, set the MemoryAccess equivalence - auto *InstMA = dyn_cast_or_null<MemoryDef>(MSSA->getMemoryAccess(I)); + auto *InstMA = dyn_cast_or_null<MemoryDef>(getMemoryAccess(I)); if (InstMA) moveMemoryToNewCongruenceClass(I, InstMA, OldClass, NewClass); ValueToClass[I] = NewClass; @@ -2014,21 +2136,31 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E, } } +// For a given expression, mark the phi of ops instructions that could have +// changed as a result. +void NewGVN::markPhiOfOpsChanged(const HashedExpression &HE) { + auto PhiOfOpsSet = ExpressionToPhiOfOps.find_as(HE); + if (PhiOfOpsSet != ExpressionToPhiOfOps.end()) { + for (auto I : PhiOfOpsSet->second) + TouchedInstructions.set(InstrToDFSNum(I)); + ExpressionToPhiOfOps.erase(PhiOfOpsSet); + } +} // Perform congruence finding on a given value numbering expression. void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) { // This is guaranteed to return something, since it will at least find // TOP. - CongruenceClass *IClass = ValueToClass[I]; assert(IClass && "Should have found a IClass"); // Dead classes should have been eliminated from the mapping. assert(!IClass->isDead() && "Found a dead class"); - CongruenceClass *EClass; + CongruenceClass *EClass = nullptr; + HashedExpression HE(E); if (const auto *VE = dyn_cast<VariableExpression>(E)) { - EClass = ValueToClass[VE->getVariableValue()]; + EClass = ValueToClass.lookup(VE->getVariableValue()); } else { - auto lookupResult = ExpressionToClass.insert({E, nullptr}); + auto lookupResult = ExpressionToClass.insert_as({E, nullptr}, HE); // If it's not in the value table, create a new congruence class. if (lookupResult.second) { @@ -2077,10 +2209,13 @@ void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) { if (ClassChanged || LeaderChanged) { DEBUG(dbgs() << "New class " << EClass->getID() << " for expression " << *E << "\n"); - if (ClassChanged) + if (ClassChanged) { moveValueToNewCongruenceClass(I, E, IClass, EClass); + markPhiOfOpsChanged(HE); + } + markUsersTouched(I); - if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) + if (MemoryAccess *MA = getMemoryAccess(I)) markMemoryUsersTouched(MA); if (auto *CI = dyn_cast<CmpInst>(I)) markPredicateUsersTouched(CI); @@ -2118,7 +2253,7 @@ void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) { // impact predicates. Otherwise, only mark the phi nodes as touched, as // they are the only thing that depend on new edges. Anything using their // values will get propagated to if necessary. - if (MemoryAccess *MemPhi = MSSA->getMemoryAccess(To)) + if (MemoryAccess *MemPhi = getMemoryAccess(To)) TouchedInstructions.set(InstrToDFSNum(MemPhi)); auto BI = To->begin(); @@ -2126,6 +2261,12 @@ void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) { TouchedInstructions.set(InstrToDFSNum(&*BI)); ++BI; } + const auto PHIResult = PHIOfOpsPHIs.find(To); + if (PHIResult != PHIOfOpsPHIs.end()) { + const auto &PHIs = PHIResult->second; + for (auto I : PHIs) + TouchedInstructions.set(InstrToDFSNum(I)); + } } } } @@ -2215,7 +2356,7 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) { // This also may be a memory defining terminator, in which case, set it // equivalent only to itself. // - auto *MA = MSSA->getMemoryAccess(TI); + auto *MA = getMemoryAccess(TI); if (MA && !isa<MemoryUse>(MA)) { auto *CC = ensureLeaderOfMemoryClass(MA); if (setMemoryClass(MA, CC)) @@ -2224,6 +2365,158 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) { } } +void NewGVN::addPhiOfOps(PHINode *Op, BasicBlock *BB, + Instruction *ExistingValue) { + InstrDFS[Op] = InstrToDFSNum(ExistingValue); + AllTempInstructions.insert(Op); + PHIOfOpsPHIs[BB].push_back(Op); + TempToBlock[Op] = BB; + if (ExistingValue) + RealToTemp[ExistingValue] = Op; +} + +static bool okayForPHIOfOps(const Instruction *I) { + return isa<BinaryOperator>(I) || isa<SelectInst>(I) || isa<CmpInst>(I) || + isa<LoadInst>(I); +} + +// When we see an instruction that is an op of phis, generate the equivalent phi +// of ops form. +const Expression * +NewGVN::makePossiblePhiOfOps(Instruction *I, bool HasBackedge, + SmallPtrSetImpl<Value *> &Visited) { + if (!okayForPHIOfOps(I)) + return nullptr; + + if (!Visited.insert(I).second) + return nullptr; + // For now, we require the instruction be cycle free because we don't + // *always* create a phi of ops for instructions that could be done as phi + // of ops, we only do it if we think it is useful. If we did do it all the + // time, we could remove the cycle free check. + if (!isCycleFree(I)) + return nullptr; + + unsigned IDFSNum = InstrToDFSNum(I); + // Pretty much all of the instructions we can convert to phi of ops over a + // backedge that are adds, are really induction variables, and those are + // pretty much pointless to convert. This is very coarse-grained for a + // test, so if we do find some value, we can change it later. + // But otherwise, what can happen is we convert the induction variable from + // + // i = phi (0, tmp) + // tmp = i + 1 + // + // to + // i = phi (0, tmpphi) + // tmpphi = phi(1, tmpphi+1) + // + // Which we don't want to happen. We could just avoid this for all non-cycle + // free phis, and we made go that route. + if (HasBackedge && I->getOpcode() == Instruction::Add) + return nullptr; + + SmallPtrSet<const Value *, 8> ProcessedPHIs; + // TODO: We don't do phi translation on memory accesses because it's + // complicated. For a load, we'd need to be able to simulate a new memoryuse, + // which we don't have a good way of doing ATM. + auto *MemAccess = getMemoryAccess(I); + // If the memory operation is defined by a memory operation this block that + // isn't a MemoryPhi, transforming the pointer backwards through a scalar phi + // can't help, as it would still be killed by that memory operation. + if (MemAccess && !isa<MemoryPhi>(MemAccess->getDefiningAccess()) && + MemAccess->getDefiningAccess()->getBlock() == I->getParent()) + return nullptr; + + // Convert op of phis to phi of ops + for (auto &Op : I->operands()) { + if (!isa<PHINode>(Op)) + continue; + auto *OpPHI = cast<PHINode>(Op); + // No point in doing this for one-operand phis. + if (OpPHI->getNumOperands() == 1) + continue; + if (!DebugCounter::shouldExecute(PHIOfOpsCounter)) + return nullptr; + SmallVector<std::pair<Value *, BasicBlock *>, 4> Ops; + auto *PHIBlock = getBlockForValue(OpPHI); + for (auto PredBB : OpPHI->blocks()) { + Value *FoundVal = nullptr; + // We could just skip unreachable edges entirely but it's tricky to do + // with rewriting existing phi nodes. + if (ReachableEdges.count({PredBB, PHIBlock})) { + // Clone the instruction, create an expression from it, and see if we + // have a leader. + Instruction *ValueOp = I->clone(); + auto Iter = TempToMemory.end(); + if (MemAccess) + Iter = TempToMemory.insert({ValueOp, MemAccess}).first; + + for (auto &Op : ValueOp->operands()) { + Op = Op->DoPHITranslation(PHIBlock, PredBB); + // When this operand changes, it could change whether there is a + // leader for us or not. + addAdditionalUsers(Op, I); + } + // Make sure it's marked as a temporary instruction. + AllTempInstructions.insert(ValueOp); + // and make sure anything that tries to add it's DFS number is + // redirected to the instruction we are making a phi of ops + // for. + InstrDFS.insert({ValueOp, IDFSNum}); + const Expression *E = performSymbolicEvaluation(ValueOp, Visited); + InstrDFS.erase(ValueOp); + AllTempInstructions.erase(ValueOp); + ValueOp->deleteValue(); + if (MemAccess) + TempToMemory.erase(Iter); + if (!E) + return nullptr; + FoundVal = findPhiOfOpsLeader(E, PredBB); + if (!FoundVal) { + ExpressionToPhiOfOps[E].insert(I); + return nullptr; + } + if (auto *SI = dyn_cast<StoreInst>(FoundVal)) + FoundVal = SI->getValueOperand(); + } else { + DEBUG(dbgs() << "Skipping phi of ops operand for incoming block " + << getBlockName(PredBB) + << " because the block is unreachable\n"); + FoundVal = UndefValue::get(I->getType()); + } + + Ops.push_back({FoundVal, PredBB}); + DEBUG(dbgs() << "Found phi of ops operand " << *FoundVal << " in " + << getBlockName(PredBB) << "\n"); + } + auto *ValuePHI = RealToTemp.lookup(I); + bool NewPHI = false; + if (!ValuePHI) { + ValuePHI = PHINode::Create(I->getType(), OpPHI->getNumOperands()); + addPhiOfOps(ValuePHI, PHIBlock, I); + NewPHI = true; + NumGVNPHIOfOpsCreated++; + } + if (NewPHI) { + for (auto PHIOp : Ops) + ValuePHI->addIncoming(PHIOp.first, PHIOp.second); + } else { + unsigned int i = 0; + for (auto PHIOp : Ops) { + ValuePHI->setIncomingValue(i, PHIOp.first); + ValuePHI->setIncomingBlock(i, PHIOp.second); + ++i; + } + } + + DEBUG(dbgs() << "Created phi of ops " << *ValuePHI << " for " << *I + << "\n"); + return performSymbolicEvaluation(ValuePHI, Visited); + } + return nullptr; +} + // The algorithm initially places the values of the routine in the TOP // congruence class. The leader of TOP is the undetermined value `undef`. // When the algorithm has finished, values still in TOP are unreachable. @@ -2266,6 +2559,12 @@ void NewGVN::initializeCongruenceClasses(Function &F) { TOPClass->incStoreCount(); } for (auto &I : *BB) { + // TODO: Move to helper + if (isa<PHINode>(&I)) + for (auto *U : I.users()) + if (auto *UInst = dyn_cast<Instruction>(U)) + if (InstrToDFSNum(UInst) != 0 && okayForPHIOfOps(UInst)) + PHINodeUses.insert(UInst); // Don't insert void terminators into the class. We don't value number // them, and they just end up sitting in TOP. if (isa<TerminatorInst>(I) && I.getType()->isVoidTy()) @@ -2290,12 +2589,35 @@ void NewGVN::cleanupTables() { CongruenceClasses[i] = nullptr; } + // Destroy the value expressions + SmallVector<Instruction *, 8> TempInst(AllTempInstructions.begin(), + AllTempInstructions.end()); + AllTempInstructions.clear(); + + // We have to drop all references for everything first, so there are no uses + // left as we delete them. + for (auto *I : TempInst) { + I->dropAllReferences(); + } + + while (!TempInst.empty()) { + auto *I = TempInst.back(); + TempInst.pop_back(); + I->deleteValue(); + } + ValueToClass.clear(); ArgRecycler.clear(ExpressionAllocator); ExpressionAllocator.Reset(); CongruenceClasses.clear(); ExpressionToClass.clear(); ValueToExpression.clear(); + RealToTemp.clear(); + AdditionalUsers.clear(); + ExpressionToPhiOfOps.clear(); + TempToBlock.clear(); + TempToMemory.clear(); + PHIOfOpsPHIs.clear(); ReachableBlocks.clear(); ReachableEdges.clear(); #ifndef NDEBUG @@ -2311,14 +2633,17 @@ void NewGVN::cleanupTables() { MemoryToUsers.clear(); } +// Assign local DFS number mapping to instructions, and leave space for Value +// PHI's. std::pair<unsigned, unsigned> NewGVN::assignDFSNumbers(BasicBlock *B, unsigned Start) { unsigned End = Start; - if (MemoryAccess *MemPhi = MSSA->getMemoryAccess(B)) { + if (MemoryAccess *MemPhi = getMemoryAccess(B)) { InstrDFS[MemPhi] = End++; DFSToInstr.emplace_back(MemPhi); } + // Then the real block goes next. for (auto &I : *B) { // There's no need to call isInstructionTriviallyDead more than once on // an instruction. Therefore, once we know that an instruction is dead @@ -2329,7 +2654,6 @@ std::pair<unsigned, unsigned> NewGVN::assignDFSNumbers(BasicBlock *B, markInstructionForDeletion(&I); continue; } - InstrDFS[&I] = End++; DFSToInstr.emplace_back(&I); } @@ -2340,7 +2664,7 @@ std::pair<unsigned, unsigned> NewGVN::assignDFSNumbers(BasicBlock *B, return std::make_pair(Start, End); } -void NewGVN::updateProcessedCount(Value *V) { +void NewGVN::updateProcessedCount(const Value *V) { #ifndef NDEBUG if (ProcessedCount.count(V) == 0) { ProcessedCount.insert({V, 1}); @@ -2359,7 +2683,7 @@ void NewGVN::valueNumberMemoryPhi(MemoryPhi *MP) { const BasicBlock *PHIBlock = MP->getBlock(); auto Filtered = make_filter_range(MP->operands(), [&](const Use &U) { return lookupMemoryLeader(cast<MemoryAccess>(U)) != MP && - !isMemoryAccessTop(cast<MemoryAccess>(U)) && + !isMemoryAccessTOP(cast<MemoryAccess>(U)) && ReachableEdges.count({MP->getIncomingBlock(U), PHIBlock}); }); // If all that is left is nothing, our memoryphi is undef. We keep it as @@ -2412,18 +2736,26 @@ void NewGVN::valueNumberInstruction(Instruction *I) { DEBUG(dbgs() << "Processing instruction " << *I << "\n"); if (!I->isTerminator()) { const Expression *Symbolized = nullptr; + SmallPtrSet<Value *, 2> Visited; if (DebugCounter::shouldExecute(VNCounter)) { - Symbolized = performSymbolicEvaluation(I); + Symbolized = performSymbolicEvaluation(I, Visited); + // Make a phi of ops if necessary + if (Symbolized && !isa<ConstantExpression>(Symbolized) && + !isa<VariableExpression>(Symbolized) && PHINodeUses.count(I)) { + // FIXME: Backedge argument + auto *PHIE = makePossiblePhiOfOps(I, false, Visited); + if (PHIE) + Symbolized = PHIE; + } + } else { // Mark the instruction as unused so we don't value number it again. InstrDFS[I] = 0; } // If we couldn't come up with a symbolic expression, use the unknown // expression - if (Symbolized == nullptr) { + if (Symbolized == nullptr) Symbolized = createUnknownExpression(I); - } - performCongruenceFinding(I, Symbolized); } else { // Handle terminators that return values. All of them produce values we @@ -2542,8 +2874,6 @@ void NewGVN::verifyMemoryCongruency() const { auto Filtered = make_filter_range(MemoryAccessToClass, ReachableAccessPred); for (auto KV : Filtered) { - assert(KV.second != TOPClass && - "Memory not unreachable but ended up in TOP"); if (auto *FirstMUD = dyn_cast<MemoryUseOrDef>(KV.first)) { auto *SecondMUD = dyn_cast<MemoryUseOrDef>(KV.second->getMemoryLeader()); if (FirstMUD && SecondMUD) { @@ -2663,7 +2993,7 @@ void NewGVN::iterateTouchedInstructions() { // Nothing set, nothing to iterate, just return. if (FirstInstr == -1) return; - BasicBlock *LastBlock = getBlockForValue(InstrFromDFSNum(FirstInstr)); + const BasicBlock *LastBlock = getBlockForValue(InstrFromDFSNum(FirstInstr)); while (TouchedInstructions.any()) { ++Iterations; // Walk through all the instructions in all the blocks in RPO. @@ -2680,7 +3010,7 @@ void NewGVN::iterateTouchedInstructions() { } Value *V = InstrFromDFSNum(InstrNum); - BasicBlock *CurrBlock = getBlockForValue(V); + const BasicBlock *CurrBlock = getBlockForValue(V); // If we hit a new block, do reachability processing. if (CurrBlock != LastBlock) { @@ -2761,6 +3091,7 @@ bool NewGVN::runGVN() { BlockInstRange.insert({B, BlockRange}); ICount += BlockRange.second - BlockRange.first; } + initializeCongruenceClasses(F); TouchedInstructions.resize(ICount); // Ensure we don't end up resizing the expressionToClass map, as @@ -2771,9 +3102,10 @@ bool NewGVN::runGVN() { // Initialize the touched instructions to include the entry block. const auto &InstRange = BlockInstRange.lookup(&F.getEntryBlock()); TouchedInstructions.set(InstRange.first, InstRange.second); + DEBUG(dbgs() << "Block " << getBlockName(&F.getEntryBlock()) + << " marked reachable\n"); ReachableBlocks.insert(&F.getEntryBlock()); - initializeCongruenceClasses(F); iterateTouchedInstructions(); verifyMemoryCongruency(); verifyIterationSettled(F); @@ -2786,7 +3118,8 @@ bool NewGVN::runGVN() { if (!ToErase->use_empty()) ToErase->replaceAllUsesWith(UndefValue::get(ToErase->getType())); - ToErase->eraseFromParent(); + if (ToErase->getParent()) + ToErase->eraseFromParent(); } // Delete all unreachable blocks. @@ -2805,14 +3138,6 @@ bool NewGVN::runGVN() { return Changed; } -// Return true if V is a value that will always be available (IE can -// be placed anywhere) in the function. We don't do globals here -// because they are often worse to put in place. -// TODO: Separate cost from availability -static bool alwaysAvailable(Value *V) { - return isa<Constant>(V) || isa<Argument>(V); -} - struct NewGVN::ValueDFS { int DFSIn = 0; int DFSOut = 0; @@ -2902,9 +3227,21 @@ void NewGVN::convertClassToDFSOrdered( } assert(isa<Instruction>(D) && "The dense set member should always be an instruction"); - VDDef.LocalNum = InstrToDFSNum(D); - DFSOrderedSet.emplace_back(VDDef); Instruction *Def = cast<Instruction>(D); + VDDef.LocalNum = InstrToDFSNum(D); + DFSOrderedSet.push_back(VDDef); + // If there is a phi node equivalent, add it + if (auto *PN = RealToTemp.lookup(Def)) { + auto *PHIE = + dyn_cast_or_null<PHIExpression>(ValueToExpression.lookup(Def)); + if (PHIE) { + VDDef.Def.setInt(false); + VDDef.Def.setPointer(PN); + VDDef.LocalNum = 0; + DFSOrderedSet.push_back(VDDef); + } + } + unsigned int UseCount = 0; // Now add the uses. for (auto &U : Def->uses()) { @@ -2921,7 +3258,7 @@ void NewGVN::convertClassToDFSOrdered( // they are from. VDUse.LocalNum = InstrDFS.size() + 1; } else { - IBlock = I->getParent(); + IBlock = getBlockForValue(I); VDUse.LocalNum = InstrToDFSNum(I); } @@ -3091,6 +3428,37 @@ private: }; } +// Given a value and a basic block we are trying to see if it is available in, +// see if the value has a leader available in that block. +Value *NewGVN::findPhiOfOpsLeader(const Expression *E, + const BasicBlock *BB) const { + // It would already be constant if we could make it constant + if (auto *CE = dyn_cast<ConstantExpression>(E)) + return CE->getConstantValue(); + if (auto *VE = dyn_cast<VariableExpression>(E)) + return VE->getVariableValue(); + + auto *CC = ExpressionToClass.lookup(E); + if (!CC) + return nullptr; + if (alwaysAvailable(CC->getLeader())) + return CC->getLeader(); + + for (auto Member : *CC) { + auto *MemberInst = dyn_cast<Instruction>(Member); + // Anything that isn't an instruction is always available. + if (!MemberInst) + return Member; + // If we are looking for something in the same block as the member, it must + // be a leader because this function is looking for operands for a phi node. + if (MemberInst->getParent() == BB || + DT->dominates(MemberInst->getParent(), BB)) { + return Member; + } + } + return nullptr; +} + bool NewGVN::eliminateInstructions(Function &F) { // This is a non-standard eliminator. The normal way to eliminate is // to walk the dominator tree in order, keeping track of available @@ -3121,25 +3489,46 @@ bool NewGVN::eliminateInstructions(Function &F) { // DFS numbers are updated, we compute some ourselves. DT->updateDFSNumbers(); - for (auto &B : F) { - if (!ReachableBlocks.count(&B)) { - for (const auto S : successors(&B)) { - for (auto II = S->begin(); isa<PHINode>(II); ++II) { - auto &Phi = cast<PHINode>(*II); - DEBUG(dbgs() << "Replacing incoming value of " << *II << " for block " - << getBlockName(&B) - << " with undef due to it being unreachable\n"); - for (auto &Operand : Phi.incoming_values()) - if (Phi.getIncomingBlock(Operand) == &B) - Operand.set(UndefValue::get(Phi.getType())); + // Go through all of our phi nodes, and kill the arguments associated with unreachable edges. + auto ReplaceUnreachablePHIArgs = [&](PHINode &PHI, BasicBlock *BB) { + for (auto &Operand : PHI.incoming_values()) + if (!ReachableEdges.count({PHI.getIncomingBlock(Operand), BB})) { + DEBUG(dbgs() << "Replacing incoming value of " << PHI << " for block " + << getBlockName(PHI.getIncomingBlock(Operand)) + << " with undef due to it being unreachable\n"); + Operand.set(UndefValue::get(PHI.getType())); + } + }; + SmallPtrSet<BasicBlock *, 8> BlocksWithPhis; + for (auto &B : F) + if ((!B.empty() && isa<PHINode>(*B.begin())) || + (PHIOfOpsPHIs.find(&B) != PHIOfOpsPHIs.end())) + BlocksWithPhis.insert(&B); + DenseMap<const BasicBlock *, unsigned> ReachablePredCount; + for (auto KV : ReachableEdges) + ReachablePredCount[KV.getEnd()]++; + for (auto *BB : BlocksWithPhis) + // TODO: It would be faster to use getNumIncomingBlocks() on a phi node in + // the block and subtract the pred count, but it's more complicated. + if (ReachablePredCount.lookup(BB) != + std::distance(pred_begin(BB), pred_end(BB))) { + for (auto II = BB->begin(); isa<PHINode>(II); ++II) { + auto &PHI = cast<PHINode>(*II); + ReplaceUnreachablePHIArgs(PHI, BB); + } + auto PHIResult = PHIOfOpsPHIs.find(BB); + if (PHIResult != PHIOfOpsPHIs.end()) { + auto &PHIs = PHIResult->second; + for (auto I : PHIs) { + auto *PHI = dyn_cast<PHINode>(I); + ReplaceUnreachablePHIArgs(*PHI, BB); } } } - } // Map to store the use counts DenseMap<const Value *, unsigned int> UseCounts; - for (CongruenceClass *CC : reverse(CongruenceClasses)) { + for (auto *CC : reverse(CongruenceClasses)) { // Track the equivalent store info so we can decide whether to try // dead store elimination. SmallVector<ValueDFS, 8> PossibleDeadStores; @@ -3187,7 +3576,7 @@ bool NewGVN::eliminateInstructions(Function &F) { DEBUG(dbgs() << "Eliminating in congruence class " << CC->getID() << "\n"); // If this is a singleton, we can skip it. - if (CC->size() != 1) { + if (CC->size() != 1 || RealToTemp.lookup(Leader)) { // This is a stack because equality replacement/etc may place // constants in the middle of the member list, and we want to use // those constant values in preference to the current leader, over @@ -3209,6 +3598,22 @@ bool NewGVN::eliminateInstructions(Function &F) { // We ignore void things because we can't get a value from them. if (Def && Def->getType()->isVoidTy()) continue; + auto *DefInst = dyn_cast_or_null<Instruction>(Def); + if (DefInst && AllTempInstructions.count(DefInst)) { + auto *PN = cast<PHINode>(DefInst); + + // If this is a value phi and that's the expression we used, insert + // it into the program + // remove from temp instruction list. + AllTempInstructions.erase(PN); + auto *DefBlock = getBlockForValue(Def); + DEBUG(dbgs() << "Inserting fully real phi of ops" << *Def + << " into block " + << getBlockName(getBlockForValue(Def)) << "\n"); + PN->insertBefore(&DefBlock->front()); + Def = PN; + NumGVNPHIOfOpsEliminations++; + } if (EliminationStack.empty()) { DEBUG(dbgs() << "Elimination Stack is empty\n"); @@ -3373,7 +3778,6 @@ bool NewGVN::eliminateInstructions(Function &F) { } } } - return AnythingReplaced; } @@ -3383,19 +3787,23 @@ bool NewGVN::eliminateInstructions(Function &F) { // we will simplify an operation with all constants so that it doesn't matter // what order they appear in. unsigned int NewGVN::getRank(const Value *V) const { - // Prefer undef to anything else + // Prefer constants to undef to anything else + // Undef is a constant, have to check it first. + // Prefer smaller constants to constantexprs + if (isa<ConstantExpr>(V)) + return 2; if (isa<UndefValue>(V)) - return 0; - if (isa<Constant>(V)) return 1; + if (isa<Constant>(V)) + return 0; else if (auto *A = dyn_cast<Argument>(V)) - return 2 + A->getArgNo(); + return 3 + A->getArgNo(); // Need to shift the instruction DFS by number of arguments + 3 to account for // the constant and argument ranking above. unsigned Result = InstrToDFSNum(V); if (Result > 0) - return 3 + NumFuncArgs + Result; + return 4 + NumFuncArgs + Result; // Unreachable or something else, just return a really large number. return ~0; } diff --git a/llvm/test/Transforms/NewGVN/completeness.ll b/llvm/test/Transforms/NewGVN/completeness.ll new file mode 100644 index 00000000000..bafe5f966d2 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/completeness.ll @@ -0,0 +1,415 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +define i32 @test1(i32, i8**) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP0:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP5:%.*]] +; CHECK: br label [[TMP6:%.*]] +; CHECK: br label [[TMP6]] +; CHECK: [[TMP7:%.*]] = phi i32 [ 75, [[TMP4]] ], [ 105, [[TMP5]] ] +; CHECK-NEXT: [[DOT0:%.*]] = phi i32 [ 5, [[TMP4]] ], [ 7, [[TMP5]] ] +; CHECK-NEXT: ret i32 [[TMP7]] +; + %3 = icmp ne i32 %0, 0 + br i1 %3, label %4, label %5 + +; <label>:4: ; preds = %2 + br label %6 + +; <label>:5: ; preds = %2 + br label %6 + +; <label>:6: ; preds = %5, %4 + %.0 = phi i32 [ 5, %4 ], [ 7, %5 ] + %7 = mul nsw i32 %.0, 15 + ret i32 %7 +} + +define i32 @test2(i32) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP0:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; CHECK: br label [[TMP5:%.*]] +; CHECK: br label [[TMP5]] +; CHECK: [[DOT01:%.*]] = phi i32 [ 3, [[TMP3]] ], [ 2, [[TMP4]] ] +; CHECK-NEXT: [[DOT0:%.*]] = phi i32 [ 2, [[TMP3]] ], [ 3, [[TMP4]] ] +; CHECK-NEXT: ret i32 5 +; + %2 = icmp ne i32 %0, 0 + br i1 %2, label %3, label %4 + +; <label>:3: ; preds = %1 + br label %5 + +; <label>:4: ; preds = %1 + br label %5 + +; <label>:5: ; preds = %4, %3 + %.01 = phi i32 [ 3, %3 ], [ 2, %4 ] + %.0 = phi i32 [ 2, %3 ], [ 3, %4 ] + %6 = add nsw i32 %.01, %.0 + ret i32 %6 +} +define i32 @test3(i1 %which) { +; CHECK-LABEL: @test3( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]] +; CHECK: delay: +; CHECK-NEXT: br label [[FINAL]] +; CHECK: final: +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ -877, [[ENTRY:%.*]] ], [ 113, [[DELAY]] ] +; CHECK-NEXT: [[A:%.*]] = phi i32 [ 1000, [[ENTRY]] ], [ 10, [[DELAY]] ] +; CHECK-NEXT: ret i32 [[TMP0]] +; + +entry: + br i1 %which, label %final, label %delay + +delay: + br label %final + +final: + %A = phi i32 [ 1000, %entry ], [ 10, %delay ] + %value = sub i32 123, %A + ret i32 %value +} + +define <2 x i32> @test3vec(i1 %which) { +; CHECK-LABEL: @test3vec( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]] +; CHECK: delay: +; CHECK-NEXT: br label [[FINAL]] +; CHECK: final: +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ <i32 -877, i32 -877>, [[ENTRY:%.*]] ], [ <i32 113, i32 113>, [[DELAY]] ] +; CHECK-NEXT: [[A:%.*]] = phi <2 x i32> [ <i32 1000, i32 1000>, [[ENTRY]] ], [ <i32 10, i32 10>, [[DELAY]] ] +; CHECK-NEXT: ret <2 x i32> [[TMP0]] +; + +entry: + br i1 %which, label %final, label %delay + +delay: + br label %final + +final: + %A = phi <2 x i32> [ <i32 1000, i32 1000>, %entry ], [ <i32 10, i32 10>, %delay ] + %value = sub <2 x i32> <i32 123, i32 123>, %A + ret <2 x i32> %value +} + +define <2 x i32> @test3vec2(i1 %which) { +; CHECK-LABEL: @test3vec2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]] +; CHECK: delay: +; CHECK-NEXT: br label [[FINAL]] +; CHECK: final: +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ <i32 -877, i32 -2167>, [[ENTRY:%.*]] ], [ <i32 113, i32 303>, [[DELAY]] ] +; CHECK-NEXT: [[A:%.*]] = phi <2 x i32> [ <i32 1000, i32 2500>, [[ENTRY]] ], [ <i32 10, i32 30>, [[DELAY]] ] +; CHECK-NEXT: ret <2 x i32> [[TMP0]] +; + +entry: + br i1 %which, label %final, label %delay + +delay: + br label %final + +final: + %A = phi <2 x i32> [ <i32 1000, i32 2500>, %entry ], [ <i32 10, i32 30>, %delay ] + %value = sub <2 x i32> <i32 123, i32 333>, %A + ret <2 x i32> %value +} + +;; This example is a bit contrived because we can't create fake memoryuses, so we use two loads in the if blocks +define i32 @test4(i32, i8**, i32* noalias, i32* noalias) { +; CHECK-LABEL: @test4( +; CHECK-NEXT: store i32 5, i32* [[TMP2:%.*]], align 4 +; CHECK-NEXT: store i32 7, i32* [[TMP3:%.*]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP0:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; CHECK: br label [[TMP8:%.*]] +; CHECK: br label [[TMP8]] +; CHECK: [[DOT01:%.*]] = phi i32 [ 5, [[TMP6]] ], [ 7, [[TMP7]] ] +; CHECK-NEXT: [[DOT0:%.*]] = phi i32* [ [[TMP2]], [[TMP6]] ], [ [[TMP3]], [[TMP7]] ] +; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOT0]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = mul nsw i32 [[TMP9]], 15 +; CHECK-NEXT: [[TMP11:%.*]] = mul nsw i32 [[TMP10]], [[DOT01]] +; CHECK-NEXT: ret i32 [[TMP11]] +; + store i32 5, i32* %2, align 4 + store i32 7, i32* %3, align 4 + %5 = icmp ne i32 %0, 0 + br i1 %5, label %6, label %8 + +; <label>:6: ; preds = %4 + %7 = load i32, i32* %2, align 4 + br label %10 + +; <label>:8: ; preds = %4 + %9 = load i32, i32* %3, align 4 + br label %10 + +; <label>:10: ; preds = %8, %6 + %.01 = phi i32 [ %7, %6 ], [ %9, %8 ] + %.0 = phi i32* [ %2, %6 ], [ %3, %8 ] + %11 = load i32, i32* %.0, align 4 + %12 = mul nsw i32 %11, 15 + %13 = mul nsw i32 %12, %.01 + ret i32 %13 +} + +@global = common global [100 x i64] zeroinitializer, align 16 +@global.1 = common global [100 x i64] zeroinitializer, align 16 +define i64 @test5(i64 %arg) { +; CHECK-LABEL: @test5( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = alloca i64, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[ARG:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[BB28:%.*]], label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB7:%.*]] +; CHECK: bb4: +; CHECK-NEXT: br label [[BB5:%.*]] +; CHECK: bb5: +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP9:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP6]], label [[BB27:%.*]], label [[BB7]] +; CHECK: bb7: +; CHECK-NEXT: [[TMP8:%.*]] = phi i64 [ [[ARG]], [[BB2]] ], [ [[TMP9]], [[BB5]] ] +; CHECK-NEXT: [[TMP9]] = add nsw i64 [[TMP8]], -1 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @global, i64 0, i64 0), align 16 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @global.1, i64 0, i64 0), align 16 +; CHECK-NEXT: [[TMP12:%.*]] = mul nsw i64 [[TMP11]], [[TMP10]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[TMP12]], 0 +; CHECK-NEXT: br i1 [[TMP13]], label [[BB5]], label [[BB14:%.*]] +; CHECK: bb14: +; CHECK-NEXT: br label [[BB15:%.*]] +; CHECK: bb15: +; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ [[TMP25:%.*]], [[BB15]] ], [ [[TMP12]], [[BB14]] ] +; CHECK-NEXT: [[TMP16:%.*]] = phi i64 [ [[TMP24:%.*]], [[BB15]] ], [ [[TMP11]], [[BB14]] ] +; CHECK-NEXT: [[TMP17:%.*]] = phi i64 [ [[TMP22:%.*]], [[BB15]] ], [ [[TMP10]], [[BB14]] ] +; CHECK-NEXT: [[TMP18:%.*]] = phi i64 [ [[TMP20:%.*]], [[BB15]] ], [ 0, [[BB14]] ] +; CHECK-NEXT: store i64 [[TMP0]], i64* [[TMP]], align 8 +; CHECK-NEXT: [[TMP20]] = add nuw nsw i64 [[TMP18]], 1 +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [100 x i64], [100 x i64]* @global, i64 0, i64 [[TMP20]] +; CHECK-NEXT: [[TMP22]] = load i64, i64* [[TMP21]], align 8 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [100 x i64], [100 x i64]* @global.1, i64 0, i64 [[TMP20]] +; CHECK-NEXT: [[TMP24]] = load i64, i64* [[TMP23]], align 8 +; CHECK-NEXT: [[TMP25]] = mul nsw i64 [[TMP24]], [[TMP22]] +; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[TMP20]], [[TMP25]] +; CHECK-NEXT: br i1 [[TMP26]], label [[BB4:%.*]], label [[BB15]] +; CHECK: bb27: +; CHECK-NEXT: br label [[BB28]] +; CHECK: bb28: +; CHECK-NEXT: ret i64 0 +; +bb: + %tmp = alloca i64, align 8 + %tmp1 = icmp eq i64 %arg, 0 + br i1 %tmp1, label %bb28, label %bb2 + +bb2: ; preds = %bb + %tmp3 = bitcast i64* %tmp to i8* + br label %bb7 + +bb4: ; preds = %bb15 + br label %bb5 + +bb5: ; preds = %bb7, %bb4 + %tmp6 = icmp eq i64 %tmp9, 0 + br i1 %tmp6, label %bb27, label %bb7 + +bb7: ; preds = %bb5, %bb2 + %tmp8 = phi i64 [ %arg, %bb2 ], [ %tmp9, %bb5 ] + %tmp9 = add nsw i64 %tmp8, -1 + %tmp10 = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @global, i64 0, i64 0), align 16 + %tmp11 = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @global.1, i64 0, i64 0), align 16 + %tmp12 = mul nsw i64 %tmp11, %tmp10 + %tmp13 = icmp eq i64 %tmp12, 0 + br i1 %tmp13, label %bb5, label %bb14 + +bb14: ; preds = %bb7 + br label %bb15 + +bb15: ; preds = %bb15, %bb14 + %tmp16 = phi i64 [ %tmp24, %bb15 ], [ %tmp11, %bb14 ] + %tmp17 = phi i64 [ %tmp22, %bb15 ], [ %tmp10, %bb14 ] + %tmp18 = phi i64 [ %tmp20, %bb15 ], [ 0, %bb14 ] +;; This multiply is an op of phis which is really equivalent to phi(tmp25, tmp12) + %tmp19 = mul nsw i64 %tmp16, %tmp17 + store i64 %tmp19, i64* %tmp, align 8 + %tmp20 = add nuw nsw i64 %tmp18, 1 + %tmp21 = getelementptr inbounds [100 x i64], [100 x i64]* @global, i64 0, i64 %tmp20 + %tmp22 = load i64, i64* %tmp21, align 8 + %tmp23 = getelementptr inbounds [100 x i64], [100 x i64]* @global.1, i64 0, i64 %tmp20 + %tmp24 = load i64, i64* %tmp23, align 8 + %tmp25 = mul nsw i64 %tmp24, %tmp22 + %tmp26 = icmp eq i64 %tmp20, %tmp25 + br i1 %tmp26, label %bb4, label %bb15 + +bb27: ; preds = %bb5 + br label %bb28 + +bb28: ; preds = %bb27, %bb + ret i64 0 +} + +;; These icmps are all equivalent to phis of constants +define i8 @test6(i8* %addr) { +; CHECK-LABEL: @test6( +; CHECK-NEXT: entry-block: +; CHECK-NEXT: br label %main-loop +; CHECK: main-loop: +; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ true, %entry-block ], [ false, [[CORE:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %entry-block ], [ true, [[CORE]] ] +; CHECK-NEXT: [[PHI:%.*]] = phi i8 [ 0, %entry-block ], [ 1, [[CORE]] ] +; CHECK-NEXT: store volatile i8 0, i8* [[ADDR:%.*]] +; CHECK-NEXT: br i1 [[TMP0]], label %busy-wait-phi-0, label [[EXIT:%.*]] +; CHECK: busy-wait-phi-0: +; CHECK-NEXT: [[LOAD:%.*]] = load volatile i8, i8* [[ADDR]] +; CHECK-NEXT: [[ICMP:%.*]] = icmp eq i8 [[LOAD]], 0 +; CHECK-NEXT: br i1 [[ICMP]], label %busy-wait-phi-0, label [[CORE]] +; CHECK: core: +; CHECK-NEXT: br i1 [[TMP1]], label [[TRAP:%.*]], label %main-loop +; CHECK: trap: +; CHECK-NEXT: ret i8 1 +; CHECK: exit: +; CHECK-NEXT: ret i8 0 +; +entry-block: + br label %main-loop + +main-loop: + %phi = phi i8 [ 0, %entry-block ], [ 1, %core ] + %switch_0 = icmp eq i8 %phi, 0 + store volatile i8 0, i8* %addr + br i1 %switch_0, label %busy-wait-phi-0, label %exit + +busy-wait-phi-0: + %load = load volatile i8, i8* %addr + %icmp = icmp eq i8 %load, 0 + br i1 %icmp, label %busy-wait-phi-0, label %core + +core: + %switch_1 = icmp eq i8 %phi, 1 + br i1 %switch_1, label %trap, label %main-loop + +trap: + ret i8 1 + +exit: + ret i8 0 +} + +; Test that we don't infinite loop simplifying +; an undefined value that can go both ways. +define void @test7() { +; CHECK-LABEL: @test7( +; CHECK-NEXT: bb: +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB1]] +; +bb: + br label %bb1 + +bb1: ; preds = %bb1, %bb + %tmp = phi i32 [ undef, %bb ], [ %tmp3, %bb1 ] + %tmp2 = icmp eq i32 %tmp, 0 + %tmp3 = select i1 %tmp2, i32 1, i32 %tmp + br label %bb1 +} + + + +; Test that we get a consistent answer about what the +; value of this undefined select is. +define void @test8() { +; CHECK-LABEL: @test8( +; CHECK-NEXT: bb: +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB1]] +; +bb: + %tmp = select i1 undef, i8 0, i8 1 + br label %bb1 + +bb1: ; preds = %bb1, %bb + %tmp2 = phi i8 [ %tmp4, %bb1 ], [ %tmp, %bb ] + %tmp3 = icmp eq i8 %tmp2, 0 + %tmp4 = select i1 %tmp3, i8 1, i8 %tmp2 + br label %bb1 +} + + +;; Make sure we handle the case where we later come up with an expression that we need +;; for a phi of ops. +define void @test9() { +; CHECK-LABEL: @test9( +; CHECK-NEXT: bb: +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB6:%.*]] +; CHECK: bb6: +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ -13, [[BB2]] ], [ [[TMP11:%.*]], [[BB6]] ] +; CHECK-NEXT: [[TMP7:%.*]] = phi i32 [ 1, [[BB2]] ], [ [[TMP8:%.*]], [[BB6]] ] +; CHECK-NEXT: [[TMP8]] = add nuw nsw i32 [[TMP7]], 1 +; CHECK-NEXT: [[TMP11]] = add i32 -14, [[TMP8]] +; CHECK-NEXT: br label [[BB6]] +; +bb: + br label %bb1 + +bb1: ; preds = %bb1, %bb + br i1 undef, label %bb1, label %bb2 + +bb2: ; preds = %bb1 + %tmp = select i1 true, i32 -14, i32 -10 + %tmp3 = add i32 %tmp, 0 + %tmp4 = select i1 true, i32 -14, i32 -10 + %tmp5 = add i32 %tmp4, 0 + br label %bb6 + +bb6: ; preds = %bb6, %bb2 + %tmp7 = phi i32 [ 1, %bb2 ], [ %tmp13, %bb6 ] + %tmp8 = add nuw nsw i32 %tmp7, 1 + %tmp9 = add i32 %tmp3, %tmp7 + %tmp10 = select i1 false, i32 undef, i32 %tmp9 + %tmp11 = add i32 %tmp5, %tmp8 + %tmp12 = select i1 undef, i32 undef, i32 %tmp11 + %tmp13 = add nuw nsw i32 %tmp7, 1 + br label %bb6 +} + +;; Ensure that we revisit predicateinfo operands at the right points in time. +define void @test10() { +b: + %m = getelementptr i32, i32* null, i64 8 + br label %g + +g: ; preds = %i, %b + %n = phi i32* [ %h, %i ], [ null, %b ] + %h = getelementptr i32, i32* %n, i64 1 + %j = icmp eq i32* %h, %m + br i1 %j, label %c, label %i + +i: ; preds = %g + br i1 undef, label %k, label %g + +k: ; preds = %i + %l = icmp eq i32* %n, %m + br i1 %l, label %c, label %o + +o: ; preds = %k + br label %c + +c: ; preds = %o, %k, %g + %0 = phi i32* [ undef, %o ], [ %m, %k ], [ %m, %g ] + ret void +} diff --git a/llvm/test/Transforms/NewGVN/storeoverstore.ll b/llvm/test/Transforms/NewGVN/storeoverstore.ll index 49b55d430dc..28f5eea03ce 100644 --- a/llvm/test/Transforms/NewGVN/storeoverstore.ll +++ b/llvm/test/Transforms/NewGVN/storeoverstore.ll @@ -13,11 +13,11 @@ define i32 @foo(i32*, i32) { ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1:%.*]], 0 ; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP5:%.*]] ; CHECK: br label [[TMP5]] -; CHECK: [[DOT0:%.*]] = phi i32 [ 10, [[TMP4]] ], [ 5, [[TMP2:%.*]] ] -; CHECK-NEXT: br i1 [[TMP3]], label [[TMP6:%.*]], label [[TMP8:%.*]] -; CHECK: [[TMP7:%.*]] = add nsw i32 [[DOT0]], 5 -; CHECK-NEXT: br label [[TMP8]] -; CHECK: [[DOT1:%.*]] = phi i32 [ [[TMP7]], [[TMP6]] ], [ [[DOT0]], [[TMP5]] ] +; CHECK: [[TMP6:%.*]] = phi i32 [ 15, [[TMP4]] ], [ 10, [[TMP2:%.*]] ] +; CHECK-NEXT: [[DOT0:%.*]] = phi i32 [ 10, [[TMP4]] ], [ 5, [[TMP2]] ] +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP7:%.*]], label [[TMP8:%.*]] +; CHECK: br label [[TMP8]] +; CHECK: [[DOT1:%.*]] = phi i32 [ [[TMP6]], [[TMP7]] ], [ [[DOT0]], [[TMP5]] ] ; CHECK-NEXT: ret i32 [[DOT1]] ; store i32 5, i32* %0, align 4 @@ -54,11 +54,11 @@ define i32 @foo2(i32*, i32) { ; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP5:%.*]] ; CHECK: br label [[TMP6:%.*]] ; CHECK: br label [[TMP6]] -; CHECK: [[DOT0:%.*]] = phi i32 [ 10, [[TMP4]] ], [ 5, [[TMP5]] ] -; CHECK-NEXT: br i1 [[TMP3]], label [[TMP7:%.*]], label [[TMP9:%.*]] -; CHECK: [[TMP8:%.*]] = add nsw i32 [[DOT0]], 5 -; CHECK-NEXT: br label [[TMP9]] -; CHECK: [[DOT1:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ [[DOT0]], [[TMP6]] ] +; CHECK: [[TMP7:%.*]] = phi i32 [ 15, [[TMP4]] ], [ 10, [[TMP5]] ] +; CHECK-NEXT: [[DOT0:%.*]] = phi i32 [ 10, [[TMP4]] ], [ 5, [[TMP5]] ] +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]] +; CHECK: br label [[TMP9]] +; CHECK: [[DOT1:%.*]] = phi i32 [ [[TMP7]], [[TMP8]] ], [ [[DOT0]], [[TMP6]] ] ; CHECK-NEXT: ret i32 [[DOT1]] ; store i32 5, i32* %0, align 4 |