diff options
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 760 |
1 files changed, 390 insertions, 370 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d539bbeb653..30330e4588c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -53,10 +53,6 @@ STATISTIC(SlicedLoads, "Number of load sliced"); namespace { static cl::opt<bool> - CombinerAA("combiner-alias-analysis", cl::Hidden, - cl::desc("Enable DAG combiner alias-analysis heuristics")); - - static cl::opt<bool> CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Enable DAG combiner's use of IR alias analysis")); @@ -133,6 +129,9 @@ namespace { /// Add to the worklist making sure its instance is at the back (next to be /// processed.) void AddToWorklist(SDNode *N) { + assert(N->getOpcode() != ISD::DELETED_NODE && + "Deleted Node added to Worklist"); + // Skip handle nodes as they can't usefully be combined and confuse the // zero-use deletion strategy. if (N->getOpcode() == ISD::HANDLENODE) @@ -177,6 +176,7 @@ namespace { void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO); private: + unsigned MaximumLegalStoreInBits; /// Check the specified integer node value to see if it can be simplified or /// if things it uses can be simplified by bit propagation. @@ -422,15 +422,12 @@ namespace { /// Holds a pointer to an LSBaseSDNode as well as information on where it /// is located in a sequence of memory operations connected by a chain. struct MemOpLink { - MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq): - MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { } + MemOpLink(LSBaseSDNode *N, int64_t Offset) + : MemNode(N), OffsetFromBase(Offset) {} // Ptr to the mem node. LSBaseSDNode *MemNode; // Offset from the base ptr. int64_t OffsetFromBase; - // What is the sequence number of this mem node. - // Lowest mem operand in the DAG starts at zero. - unsigned SequenceNum; }; /// This is a helper function for visitMUL to check the profitability @@ -441,12 +438,6 @@ namespace { SDValue &AddNode, SDValue &ConstNode); - /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a - /// constant build_vector of the stored constant values in Stores. - SDValue getMergedConstantVectorStore(SelectionDAG &DAG, const SDLoc &SL, - ArrayRef<MemOpLink> Stores, - SmallVectorImpl<SDValue> &Chains, - EVT Ty) const; /// This is a helper function for visitAND and visitZERO_EXTEND. Returns /// true if the (and (load x) c) pattern matches an extload. ExtVT returns @@ -460,18 +451,15 @@ namespace { /// This is a helper function for MergeConsecutiveStores. When the source /// elements of the consecutive stores are all constants or all extracted /// vector elements, try to merge them into one larger store. - /// \return number of stores that were merged into a merged store (always - /// a prefix of \p StoreNode). - bool MergeStoresOfConstantsOrVecElts( - SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores, - bool IsConstantSrc, bool UseVector); + /// \return True if a merged store was created. + bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes, + EVT MemVT, unsigned NumStores, + bool IsConstantSrc, bool UseVector); /// This is a helper function for MergeConsecutiveStores. /// Stores that may be merged are placed in StoreNodes. - /// Loads that may alias with those stores are placed in AliasLoadNodes. - void getStoreMergeAndAliasCandidates( - StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes, - SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes); + void getStoreMergeCandidates(StoreSDNode *St, + SmallVectorImpl<MemOpLink> &StoreNodes); /// Helper function for MergeConsecutiveStores. Checks if /// Candidate stores have indirect dependency through their @@ -483,8 +471,7 @@ namespace { /// This optimization uses wide integers or vectors when possible. /// \return number of stores that were merged into a merged store (the /// affected nodes are stored as a prefix in \p StoreNodes). - bool MergeConsecutiveStores(StoreSDNode *N, - SmallVectorImpl<MemOpLink> &StoreNodes); + bool MergeConsecutiveStores(StoreSDNode *N); /// \brief Try to transform a truncation where C is a constant: /// (trunc (and X, C)) -> (and (trunc X), (trunc C)) @@ -499,6 +486,13 @@ namespace { : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) { ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize(); + + MaximumLegalStoreInBits = 0; + for (MVT VT : MVT::all_valuetypes()) + if (EVT(VT).isSimple() && VT != MVT::Other && + TLI.isTypeLegal(EVT(VT)) && + VT.getSizeInBits() >= MaximumLegalStoreInBits) + MaximumLegalStoreInBits = VT.getSizeInBits(); } /// Runs the dag combiner on all nodes in the work list @@ -1589,7 +1583,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { } SmallVector<SDNode *, 8> TFs; // List of token factors to visit. - SmallVector<SDValue, 8> Ops; // Ops for replacing token factor. + SmallVector<SDValue, 8> Ops; // Ops for replacing token factor. SmallPtrSet<SDNode*, 16> SeenOps; bool Changed = false; // If we should replace this token factor. @@ -1633,6 +1627,86 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { } } + // Remove Nodes that are chained to another node in the list. Do so + // by walking up chains breath-first stopping when we've seen + // another operand. In general we must climb to the EntryNode, but we can exit + // early if we find all remaining work is associated with just one operand as + // no further pruning is possible. + + // List of nodes to search through and original Ops from which they originate. + SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist; + SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op. + SmallPtrSet<SDNode *, 16> SeenChains; + bool DidPruneOps = false; + + unsigned NumLeftToConsider = 0; + for (const SDValue &Op : Ops) { + Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++)); + OpWorkCount.push_back(1); + } + + auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) { + // If this is an Op, we can remove the op from the list. Remark any + // search associated with it as from the current OpNumber. + if (SeenOps.count(Op) != 0) { + Changed = true; + DidPruneOps = true; + unsigned OrigOpNumber = 0; + while (Ops[OrigOpNumber].getNode() != Op && OrigOpNumber < Ops.size()) + OrigOpNumber++; + assert((OrigOpNumber != Ops.size()) && + "expected to find TokenFactor Operand"); + // Re-mark worklist from OrigOpNumber to OpNumber + for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) { + if (Worklist[i].second == OrigOpNumber) { + Worklist[i].second = OpNumber; + } + } + OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber]; + OpWorkCount[OrigOpNumber] = 0; + NumLeftToConsider--; + } + // Add if it's a new chain + if (SeenChains.insert(Op).second) { + OpWorkCount[OpNumber]++; + Worklist.push_back(std::make_pair(Op, OpNumber)); + } + }; + + for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) { + // We need at least be consider at least 2 Ops to prune. + if (NumLeftToConsider <= 1) + break; + auto CurNode = Worklist[i].first; + auto CurOpNumber = Worklist[i].second; + assert((OpWorkCount[CurOpNumber] > 0) && + "Node should not appear in worklist"); + switch (CurNode->getOpcode()) { + case ISD::EntryToken: + // Hitting EntryToken is the only way for the search to terminate without + // hitting + // another operand's search. Prevent us from marking this operand + // considered. + NumLeftToConsider++; + break; + case ISD::TokenFactor: + for (const SDValue &Op : CurNode->op_values()) + AddToWorklist(i, Op.getNode(), CurOpNumber); + break; + case ISD::CopyFromReg: + case ISD::CopyToReg: + AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber); + break; + default: + if (auto *MemNode = dyn_cast<MemSDNode>(CurNode)) + AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber); + break; + } + OpWorkCount[CurOpNumber]--; + if (OpWorkCount[CurOpNumber] == 0) + NumLeftToConsider--; + } + SDValue Result; // If we've changed things around then replace token factor. @@ -1641,15 +1715,22 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { // The entry token is the only possible outcome. Result = DAG.getEntryNode(); } else { - // New and improved token factor. - Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops); + if (DidPruneOps) { + SmallVector<SDValue, 8> PrunedOps; + // + for (const SDValue &Op : Ops) { + if (SeenChains.count(Op.getNode()) == 0) + PrunedOps.push_back(Op); + } + Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps); + } else { + Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops); + } } - // Add users to worklist if AA is enabled, since it may introduce - // a lot of new chained token factors while removing memory deps. - bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA - : DAG.getSubtarget().useAA(); - return CombineTo(N, Result, UseAA /*add to worklist*/); + // Add users to worklist, since we may introduce a lot of new + // chained token factors while removing memory deps. + return CombineTo(N, Result, true /*add to worklist*/); } return Result; @@ -6792,6 +6873,9 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) { SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads); + // Simplify TF. + AddToWorklist(NewChain.getNode()); + CombineTo(N, NewValue); // Replace uses of the original load (before extension) @@ -10947,7 +11031,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { dbgs() << "\n"); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); - + AddUsersToWorklist(Chain.getNode()); if (N->use_empty()) deleteAndRecombine(N); @@ -11000,7 +11084,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { StoreSDNode *PrevST = cast<StoreSDNode>(Chain); if (PrevST->getBasePtr() == Ptr && PrevST->getValue().getValueType() == N->getValueType(0)) - return CombineTo(N, Chain.getOperand(1), Chain); + return CombineTo(N, PrevST->getOperand(1), Chain); } } @@ -11018,14 +11102,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { } } - bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA - : DAG.getSubtarget().useAA(); -#ifndef NDEBUG - if (CombinerAAOnlyFunc.getNumOccurrences() && - CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) - UseAA = false; -#endif - if (UseAA && LD->isUnindexed()) { + if (LD->isUnindexed()) { // Walk up chain skipping non-aliasing memory nodes. SDValue BetterChain = FindBetterChain(N, Chain); @@ -11607,6 +11684,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) { SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, ArgChains); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); + AddToWorklist(Chain.getNode()); return true; } @@ -12000,20 +12078,6 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, return false; } -SDValue DAGCombiner::getMergedConstantVectorStore( - SelectionDAG &DAG, const SDLoc &SL, ArrayRef<MemOpLink> Stores, - SmallVectorImpl<SDValue> &Chains, EVT Ty) const { - SmallVector<SDValue, 8> BuildVector; - - for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) { - StoreSDNode *St = cast<StoreSDNode>(Stores[I].MemNode); - Chains.push_back(St->getChain()); - BuildVector.push_back(St->getValue()); - } - - return DAG.getBuildVector(Ty, SL, BuildVector); -} - bool DAGCombiner::MergeStoresOfConstantsOrVecElts( SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores, bool IsConstantSrc, bool UseVector) { @@ -12022,22 +12086,8 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( return false; int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8; - LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; - unsigned LatestNodeUsed = 0; - - for (unsigned i=0; i < NumStores; ++i) { - // Find a chain for the new wide-store operand. Notice that some - // of the store nodes that we found may not be selected for inclusion - // in the wide store. The chain we use needs to be the chain of the - // latest store node which is *used* and replaced by the wide store. - if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum) - LatestNodeUsed = i; - } - - SmallVector<SDValue, 8> Chains; // The latest Node in the DAG. - LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode; SDLoc DL(StoreNodes[0].MemNode); SDValue StoredVal; @@ -12053,7 +12103,18 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); if (IsConstantSrc) { - StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Chains, Ty); + SmallVector<SDValue, 8> BuildVector; + for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) { + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode); + SDValue Val = St->getValue(); + if (MemVT.getScalarType().isInteger()) + if (auto *CFP = dyn_cast<ConstantFPSDNode>(St->getValue())) + Val = DAG.getConstant( + (uint32_t)CFP->getValueAPF().bitcastToAPInt().getZExtValue(), + SDLoc(CFP), MemVT); + BuildVector.push_back(Val); + } + StoredVal = DAG.getBuildVector(Ty, DL, BuildVector); } else { SmallVector<SDValue, 8> Ops; for (unsigned i = 0; i < NumStores; ++i) { @@ -12063,7 +12124,6 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( if (Val.getValueType() != MemVT) return false; Ops.push_back(Val); - Chains.push_back(St->getChain()); } // Build the extracted vector elements back into a vector. @@ -12083,7 +12143,6 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( for (unsigned i = 0; i < NumStores; ++i) { unsigned Idx = IsLE ? (NumStores - 1 - i) : i; StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode); - Chains.push_back(St->getChain()); SDValue Val = St->getValue(); StoreInt <<= ElementSizeBytes * 8; @@ -12101,54 +12160,36 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( StoredVal = DAG.getConstant(StoreInt, DL, StoreTy); } - assert(!Chains.empty()); + SmallVector<SDValue, 8> Chains; + + // Gather all Chains we're inheriting. As generally all chains are + // equal, do minor check to remove obvious redundancies. + Chains.push_back(StoreNodes[0].MemNode->getChain()); + for (unsigned i = 1; i < NumStores; ++i) + if (StoreNodes[0].MemNode->getChain() != StoreNodes[i].MemNode->getChain()) + Chains.push_back(StoreNodes[i].MemNode->getChain()); + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), FirstInChain->getAlignment()); - bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA - : DAG.getSubtarget().useAA(); - if (UseAA) { - // Replace all merged stores with the new store. - for (unsigned i = 0; i < NumStores; ++i) - CombineTo(StoreNodes[i].MemNode, NewStore); - } else { - // Replace the last store with the new store. - CombineTo(LatestOp, NewStore); - // Erase all other stores. - for (unsigned i = 0; i < NumStores; ++i) { - if (StoreNodes[i].MemNode == LatestOp) - continue; - StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); - // ReplaceAllUsesWith will replace all uses that existed when it was - // called, but graph optimizations may cause new ones to appear. For - // example, the case in pr14333 looks like - // - // St's chain -> St -> another store -> X - // - // And the only difference from St to the other store is the chain. - // When we change it's chain to be St's chain they become identical, - // get CSEed and the net result is that X is now a use of St. - // Since we know that St is redundant, just iterate. - while (!St->use_empty()) - DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); - deleteAndRecombine(St); - } - } + // Replace all merged stores with the new store. + for (unsigned i = 0; i < NumStores; ++i) + CombineTo(StoreNodes[i].MemNode, NewStore); - StoreNodes.erase(StoreNodes.begin() + NumStores, StoreNodes.end()); + AddToWorklist(NewChain.getNode()); return true; } -void DAGCombiner::getStoreMergeAndAliasCandidates( - StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes, - SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes) { +void DAGCombiner::getStoreMergeCandidates( + StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); + EVT MemVT = St->getMemoryVT(); // We must have a base and an offset. if (!BasePtr.Base.getNode()) @@ -12158,104 +12199,70 @@ void DAGCombiner::getStoreMergeAndAliasCandidates( if (BasePtr.Base.isUndef()) return; - // Walk up the chain and look for nodes with offsets from the same - // base pointer. Stop when reaching an instruction with a different kind - // or instruction which has a different base pointer. - EVT MemVT = St->getMemoryVT(); - unsigned Seq = 0; - StoreSDNode *Index = St; - - - bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA - : DAG.getSubtarget().useAA(); - - if (UseAA) { - // Look at other users of the same chain. Stores on the same chain do not - // alias. If combiner-aa is enabled, non-aliasing stores are canonicalized - // to be on the same chain, so don't bother looking at adjacent chains. - - SDValue Chain = St->getChain(); - for (auto I = Chain->use_begin(), E = Chain->use_end(); I != E; ++I) { - if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) { - if (I.getOperandNo() != 0) - continue; - - if (OtherST->isVolatile() || OtherST->isIndexed()) - continue; - - if (OtherST->getMemoryVT() != MemVT) - continue; - - BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr(), DAG); - - if (Ptr.equalBaseIndex(BasePtr)) - StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset, Seq++)); - } - } - - return; - } - - while (Index) { - // If the chain has more than one use, then we can't reorder the mem ops. - if (Index != St && !SDValue(Index, 0)->hasOneUse()) - break; - - // Find the base pointer and offset for this memory node. - BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG); - - // Check that the base pointer is the same as the original one. - if (!Ptr.equalBaseIndex(BasePtr)) - break; - - // The memory operands must not be volatile. - if (Index->isVolatile() || Index->isIndexed()) - break; - - // No truncation. - if (Index->isTruncatingStore()) - break; + // We looking for a root node which is an ancestor to all mergable + // stores. We search up through a load, to our root and then down + // through all children. For instance we will find Store{1,2,3} if + // St is Store1, Store2. or Store3 where the root is not a load + // which always true for nonvolatile ops. TODO: Expand + // the search to find all valid candidates through multiple layers of loads. + // + // Root + // |-------|-------| + // Load Load Store3 + // | | + // Store1 Store2 + // + // FIXME: We should be able to climb and + // descend TokenFactors to find candidates as well. - // The stored memory type must be the same. - if (Index->getMemoryVT() != MemVT) - break; + SDNode *RootNode = (St->getChain()).getNode(); - // We do not allow under-aligned stores in order to prevent - // overriding stores. NOTE: this is a bad hack. Alignment SHOULD - // be irrelevant here; what MATTERS is that we not move memory - // operations that potentially overlap past each-other. - if (Index->getAlignment() < MemVT.getStoreSize()) - break; + // Set of Parents of Candidates + std::set<SDNode *> CandidateParents; - // We found a potential memory operand to merge. - StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++)); + if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) { + RootNode = Ldn->getChain().getNode(); + for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I) + if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain + CandidateParents.insert(*I); + } else + CandidateParents.insert(RootNode); + + bool IsLoadSrc = isa<LoadSDNode>(St->getValue()); + bool IsConstantSrc = isa<ConstantSDNode>(St->getValue()) || + isa<ConstantFPSDNode>(St->getValue()); + bool IsExtractVecSrc = + (St->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT || + St->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR); + auto CorrectValueKind = [&](StoreSDNode *Other) -> bool { + if (IsLoadSrc) + return isa<LoadSDNode>(Other->getValue()); + if (IsConstantSrc) + return (isa<ConstantSDNode>(Other->getValue()) || + isa<ConstantFPSDNode>(Other->getValue())); + if (IsExtractVecSrc) + return (Other->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT || + Other->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR); + return false; + }; - // Find the next memory operand in the chain. If the next operand in the - // chain is a store then move up and continue the scan with the next - // memory operand. If the next operand is a load save it and use alias - // information to check if it interferes with anything. - SDNode *NextInChain = Index->getChain().getNode(); - while (1) { - if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) { - // We found a store node. Use it for the next iteration. - Index = STn; - break; - } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) { - if (Ldn->isVolatile()) { - Index = nullptr; - break; + // check all parents of mergable children + for (auto P = CandidateParents.begin(); P != CandidateParents.end(); ++P) + for (auto I = (*P)->use_begin(), E = (*P)->use_end(); I != E; ++I) + if (I.getOperandNo() == 0) + if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) { + if (OtherST->isVolatile() || OtherST->isIndexed()) + continue; + // We can merge constant floats to equivalent integers + if (OtherST->getMemoryVT() != MemVT) + if (!(MemVT.isInteger() && MemVT.bitsEq(OtherST->getMemoryVT()) && + isa<ConstantFPSDNode>(OtherST->getValue()))) + continue; + BaseIndexOffset Ptr = + BaseIndexOffset::match(OtherST->getBasePtr(), DAG); + if (Ptr.equalBaseIndex(BasePtr) && CorrectValueKind(OtherST)) + StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset)); } - - // Save the load node for later. Continue the scan. - AliasLoadNodes.push_back(Ldn); - NextInChain = Ldn->getChain().getNode(); - continue; - } else { - Index = nullptr; - break; - } - } - } } // We need to check that merging these stores does not cause a loop @@ -12282,13 +12289,16 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies( return true; } -bool DAGCombiner::MergeConsecutiveStores( - StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes) { +bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { if (OptLevel == CodeGenOpt::None) return false; EVT MemVT = St->getMemoryVT(); int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8; + + if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits) + return false; + bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute( Attribute::NoImplicitFloat); @@ -12317,145 +12327,136 @@ bool DAGCombiner::MergeConsecutiveStores( if (MemVT.isVector() && IsLoadSrc) return false; - // Only look at ends of store sequences. - SDValue Chain = SDValue(St, 0); - if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE) - return false; - - // Save the LoadSDNodes that we find in the chain. - // We need to make sure that these nodes do not interfere with - // any of the store nodes. - SmallVector<LSBaseSDNode*, 8> AliasLoadNodes; - - getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes); + SmallVector<MemOpLink, 8> StoreNodes; + // Find potential store merge candidates by searching through chain sub-DAG + getStoreMergeCandidates(St, StoreNodes); // Check if there is anything to merge. if (StoreNodes.size() < 2) return false; - // only do dependence check in AA case - bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA - : DAG.getSubtarget().useAA(); - if (UseAA && !checkMergeStoreCandidatesForDependencies(StoreNodes)) + // Check that we can merge these candidates without causing a cycle + if (!checkMergeStoreCandidatesForDependencies(StoreNodes)) return false; // Sort the memory operands according to their distance from the - // base pointer. As a secondary criteria: make sure stores coming - // later in the code come first in the list. This is important for - // the non-UseAA case, because we're merging stores into the FINAL - // store along a chain which potentially contains aliasing stores. - // Thus, if there are multiple stores to the same address, the last - // one can be considered for merging but not the others. + // base pointer. std::sort(StoreNodes.begin(), StoreNodes.end(), [](MemOpLink LHS, MemOpLink RHS) { - return LHS.OffsetFromBase < RHS.OffsetFromBase || - (LHS.OffsetFromBase == RHS.OffsetFromBase && - LHS.SequenceNum < RHS.SequenceNum); - }); + return LHS.OffsetFromBase < RHS.OffsetFromBase; + }); // Scan the memory operations on the chain and find the first non-consecutive // store memory address. - unsigned LastConsecutiveStore = 0; + unsigned NumConsecutiveStores = 0; int64_t StartAddress = StoreNodes[0].OffsetFromBase; - for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) { - - // Check that the addresses are consecutive starting from the second - // element in the list of stores. - if (i > 0) { - int64_t CurrAddress = StoreNodes[i].OffsetFromBase; - if (CurrAddress - StartAddress != (ElementSizeBytes * i)) - break; - } - // Check if this store interferes with any of the loads that we found. - // If we find a load that alias with this store. Stop the sequence. - if (any_of(AliasLoadNodes, [&](LSBaseSDNode *Ldn) { - return isAlias(Ldn, StoreNodes[i].MemNode); - })) + // Check that the addresses are consecutive starting from the second + // element in the list of stores. + for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) { + int64_t CurrAddress = StoreNodes[i].OffsetFromBase; + if (CurrAddress - StartAddress != (ElementSizeBytes * i)) break; - - // Mark this node as useful. - LastConsecutiveStore = i; + NumConsecutiveStores = i + 1; } + if (NumConsecutiveStores < 2) + return false; + // The node with the lowest store address. - LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; - unsigned FirstStoreAS = FirstInChain->getAddressSpace(); - unsigned FirstStoreAlign = FirstInChain->getAlignment(); LLVMContext &Context = *DAG.getContext(); const DataLayout &DL = DAG.getDataLayout(); // Store the constants into memory as one consecutive store. if (IsConstantSrc) { - unsigned LastLegalType = 0; - unsigned LastLegalVectorType = 0; - bool NonZero = false; - for (unsigned i=0; i<LastConsecutiveStore+1; ++i) { - StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); - SDValue StoredVal = St->getValue(); - - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) { - NonZero |= !C->isNullValue(); - } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) { - NonZero |= !C->getConstantFPValue()->isNullValue(); - } else { - // Non-constant. - break; - } + bool RV = false; + while (NumConsecutiveStores > 1) { + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned FirstStoreAS = FirstInChain->getAddressSpace(); + unsigned FirstStoreAlign = FirstInChain->getAlignment(); + unsigned LastLegalType = 0; + unsigned LastLegalVectorType = 0; + bool NonZero = false; + for (unsigned i = 0; i < NumConsecutiveStores; ++i) { + StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode); + SDValue StoredVal = ST->getValue(); + + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) { + NonZero |= !C->isNullValue(); + } else if (ConstantFPSDNode *C = + dyn_cast<ConstantFPSDNode>(StoredVal)) { + NonZero |= !C->getConstantFPValue()->isNullValue(); + } else { + // Non-constant. + break; + } - // Find a legal type for the constant store. - unsigned SizeInBits = (i+1) * ElementSizeBytes * 8; - EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); - bool IsFast; - if (TLI.isTypeLegal(StoreTy) && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, - FirstStoreAlign, &IsFast) && IsFast) { - LastLegalType = i+1; - // Or check whether a truncstore is legal. - } else if (TLI.getTypeAction(Context, StoreTy) == - TargetLowering::TypePromoteInteger) { - EVT LegalizedStoredValueTy = - TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); - if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && - TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, - FirstStoreAS, FirstStoreAlign, &IsFast) && + // Find a legal type for the constant store. + unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; + EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); + bool IsFast = false; + if (TLI.isTypeLegal(StoreTy) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, + FirstStoreAlign, &IsFast) && IsFast) { LastLegalType = i + 1; + // Or check whether a truncstore is legal. + } else if (TLI.getTypeAction(Context, StoreTy) == + TargetLowering::TypePromoteInteger) { + EVT LegalizedStoredValueTy = + TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); + if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && + TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, + FirstStoreAS, FirstStoreAlign, &IsFast) && + IsFast) { + LastLegalType = i + 1; + } } - } - // We only use vectors if the constant is known to be zero or the target - // allows it and the function is not marked with the noimplicitfloat - // attribute. - if ((!NonZero || TLI.storeOfVectorConstantIsCheap(MemVT, i+1, - FirstStoreAS)) && - !NoVectors) { - // Find a legal type for the vector store. - EVT Ty = EVT::getVectorVT(Context, MemVT, i+1); - if (TLI.isTypeLegal(Ty) && - TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, - FirstStoreAlign, &IsFast) && IsFast) - LastLegalVectorType = i + 1; + // We only use vectors if the constant is known to be zero or the target + // allows it and the function is not marked with the noimplicitfloat + // attribute. + if ((!NonZero || + TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) && + !NoVectors) { + // Find a legal type for the vector store. + EVT Ty = EVT::getVectorVT(Context, MemVT, i + 1); + if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(Ty) && + TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, + FirstStoreAlign, &IsFast) && + IsFast) + LastLegalVectorType = i + 1; + } } - } - // Check if we found a legal integer type to store. - if (LastLegalType == 0 && LastLegalVectorType == 0) - return false; + // Check if we found a legal integer type that creates a meaningful merge. + if (LastLegalType < 2 && LastLegalVectorType < 2) + break; - bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; - unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType; + bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; + unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType; - return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, - true, UseVector); + bool Merged = MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, + true, UseVector); + if (!Merged) + break; + // Remove merged stores for next iteration. + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + RV = true; + NumConsecutiveStores -= NumElem; + } + return RV; } // When extracting multiple vector elements, try to store them // in one vector store rather than a sequence of scalar stores. if (IsExtractVecSrc) { + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned FirstStoreAS = FirstInChain->getAddressSpace(); + unsigned FirstStoreAlign = FirstInChain->getAlignment(); unsigned NumStoresToMerge = 0; bool IsVec = MemVT.isVector(); - for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) { + for (unsigned i = 0; i < NumConsecutiveStores; ++i) { StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); unsigned StoreValOpcode = St->getValue().getOpcode(); // This restriction could be loosened. @@ -12495,7 +12496,7 @@ bool DAGCombiner::MergeConsecutiveStores( // Find acceptable loads. Loads need to have the same chain (token factor), // must not be zext, volatile, indexed, and they must be consecutive. BaseIndexOffset LdBasePtr; - for (unsigned i=0; i<LastConsecutiveStore+1; ++i) { + for (unsigned i = 0; i < NumConsecutiveStores; ++i) { StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue()); if (!Ld) break; @@ -12528,7 +12529,7 @@ bool DAGCombiner::MergeConsecutiveStores( } // We found a potential memory operand to merge. - LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0)); + LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset)); } if (LoadNodes.size() < 2) @@ -12540,7 +12541,9 @@ bool DAGCombiner::MergeConsecutiveStores( if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) && St->getAlignment() >= RequiredAlignment) return false; - + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned FirstStoreAS = FirstInChain->getAddressSpace(); + unsigned FirstStoreAlign = FirstInChain->getAlignment(); LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); unsigned FirstLoadAS = FirstLoad->getAddressSpace(); unsigned FirstLoadAlign = FirstLoad->getAlignment(); @@ -12609,30 +12612,19 @@ bool DAGCombiner::MergeConsecutiveStores( // We add +1 here because the LastXXX variables refer to location while // the NumElem refers to array/index size. - unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1; + unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1); NumElem = std::min(LastLegalType, NumElem); if (NumElem < 2) return false; - // Collect the chains from all merged stores. + // Collect the chains from all merged stores. Because the common case + // all chains are the same, check if we match the first Chain. SmallVector<SDValue, 8> MergeStoreChains; MergeStoreChains.push_back(StoreNodes[0].MemNode->getChain()); - - // The latest Node in the DAG. - unsigned LatestNodeUsed = 0; - for (unsigned i=1; i<NumElem; ++i) { - // Find a chain for the new wide-store operand. Notice that some - // of the store nodes that we found may not be selected for inclusion - // in the wide store. The chain we use needs to be the chain of the - // latest store node which is *used* and replaced by the wide store. - if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum) - LatestNodeUsed = i; - - MergeStoreChains.push_back(StoreNodes[i].MemNode->getChain()); - } - - LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode; + for (unsigned i = 1; i < NumElem; ++i) + if (StoreNodes[0].MemNode->getChain() != StoreNodes[i].MemNode->getChain()) + MergeStoreChains.push_back(StoreNodes[i].MemNode->getChain()); // Find if it is better to use vectors or integers to load and store // to memory. @@ -12656,6 +12648,8 @@ bool DAGCombiner::MergeConsecutiveStores( SDValue NewStoreChain = DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, MergeStoreChains); + AddToWorklist(NewStoreChain.getNode()); + SDValue NewStore = DAG.getStore(NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), FirstStoreAlign); @@ -12667,25 +12661,9 @@ bool DAGCombiner::MergeConsecutiveStores( SDValue(NewLoad.getNode(), 1)); } - if (UseAA) { - // Replace the all stores with the new store. - for (unsigned i = 0; i < NumElem; ++i) - CombineTo(StoreNodes[i].MemNode, NewStore); - } else { - // Replace the last store with the new store. - CombineTo(LatestOp, NewStore); - // Erase all other stores. - for (unsigned i = 0; i < NumElem; ++i) { - // Remove all Store nodes. - if (StoreNodes[i].MemNode == LatestOp) - continue; - StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); - DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain()); - deleteAndRecombine(St); - } - } - - StoreNodes.erase(StoreNodes.begin() + NumElem, StoreNodes.end()); + // Replace the all stores with the new store. + for (unsigned i = 0; i < NumElem; ++i) + CombineTo(StoreNodes[i].MemNode, NewStore); return true; } @@ -12842,19 +12820,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (SDValue NewST = TransformFPLoadStorePair(N)) return NewST; - bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA - : DAG.getSubtarget().useAA(); -#ifndef NDEBUG - if (CombinerAAOnlyFunc.getNumOccurrences() && - CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) - UseAA = false; -#endif - if (UseAA && ST->isUnindexed()) { - // FIXME: We should do this even without AA enabled. AA will just allow - // FindBetterChain to work in more situations. The problem with this is that - // any combine that expects memory operations to be on consecutive chains - // first needs to be updated to look for users of the same chain. - + if (ST->isUnindexed()) { // Walk up chain skipping non-aliasing memory nodes, on this store and any // adjacent stores. if (findBetterNeighborChains(ST)) { @@ -12888,8 +12854,15 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (SimplifyDemandedBits( Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(), - ST->getMemoryVT().getScalarSizeInBits()))) + ST->getMemoryVT().getScalarSizeInBits()))) { + // Re-visit the store if anything changed and the store hasn't been merged + // with another node (N is deleted) SimplifyDemandedBits will add Value's + // node back to the worklist if necessary, but we also need to re-visit + // the Store node itself. + if (N->getOpcode() != ISD::DELETED_NODE) + AddToWorklist(N); return SDValue(N, 0); + } } // If this is a load followed by a store to the same location, then the store @@ -12933,15 +12906,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // There can be multiple store sequences on the same chain. // Keep trying to merge store sequences until we are unable to do so // or until we merge the last store on the chain. - SmallVector<MemOpLink, 8> StoreNodes; - bool Changed = MergeConsecutiveStores(ST, StoreNodes); + bool Changed = MergeConsecutiveStores(ST); if (!Changed) break; - - if (any_of(StoreNodes, - [ST](const MemOpLink &Link) { return Link.MemNode == ST; })) { - // ST has been merged and no longer exists. + // Return N as merge only uses CombineTo and no worklist clean + // up is necessary. + if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N)) return SDValue(N, 0); - } } } @@ -12950,7 +12920,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Make sure to do this only after attempting to merge stores in order to // avoid changing the types of some subset of stores due to visit order, // preventing their merging. - if (isa<ConstantFPSDNode>(Value)) { + if (isa<ConstantFPSDNode>(ST->getValue())) { if (SDValue NewSt = replaceStoreOfFPConstant(ST)) return NewSt; } @@ -13887,6 +13857,35 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (ISD::allOperandsUndef(N)) return DAG.getUNDEF(VT); + // Check if we can express BUILD VECTOR via subvector extract. + if (!LegalTypes && (N->getNumOperands() > 1)) { + SDValue Op0 = N->getOperand(0); + auto checkElem = [&](SDValue Op) -> uint64_t { + if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) && + (Op0.getOperand(0) == Op.getOperand(0))) + if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1))) + return CNode->getZExtValue(); + return -1; + }; + + int Offset = checkElem(Op0); + for (unsigned i = 0; i < N->getNumOperands(); ++i) { + if (Offset + i != checkElem(N->getOperand(i))) { + Offset = -1; + break; + } + } + + if ((Offset == 0) && + (Op0.getOperand(0).getValueType() == N->getValueType(0))) + return Op0.getOperand(0); + if ((Offset != -1) && + ((Offset % N->getValueType(0).getVectorNumElements()) == + 0)) // IDX must be multiple of output size. + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0), + Op0.getOperand(0), Op0.getOperand(1)); + } + if (SDValue V = reduceBuildVecExtToExtBuildVec(N)) return V; @@ -15983,7 +15982,7 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, if (Base.getOpcode() == ISD::ADD) { if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) { Base = Base.getOperand(0); - Offset += C->getZExtValue(); + Offset += C->getSExtValue(); } } @@ -16180,6 +16179,12 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, ++Depth; break; + case ISD::CopyFromReg: + // Forward past CopyFromReg. + Chains.push_back(Chain.getOperand(0)); + ++Depth; + break; + default: // For all other instructions we will just have to take what we can get. Aliases.push_back(Chain); @@ -16208,6 +16213,18 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases); } +// This function tries to collect a bunch of potentially interesting +// nodes to improve the chains of, all at once. This might seem +// redundant, as this function gets called when visiting every store +// node, so why not let the work be done on each store as it's visited? +// +// I believe this is mainly important because MergeConsecutiveStores +// is unable to deal with merging stores of different sizes, so unless +// we improve the chains of all the potential candidates up-front +// before running MergeConsecutiveStores, it might only see some of +// the nodes that will eventually be candidates, and then not be able +// to go from a partially-merged state to the desired final +// fully-merged state. bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. @@ -16243,10 +16260,8 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { if (!Ptr.equalBaseIndex(BasePtr)) break; - // Find the next memory operand in the chain. If the next operand in the - // chain is a store then move up and continue the scan with the next - // memory operand. If the next operand is a load save it and use alias - // information to check if it interferes with anything. + // Walk up the chain to find the next store node, ignoring any + // intermediate loads. Any other kind of node will halt the loop. SDNode *NextInChain = Index->getChain().getNode(); while (true) { if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) { @@ -16265,9 +16280,14 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { Index = nullptr; break; } - } + } // end while } + // At this point, ChainedStores lists all of the Store nodes + // reachable by iterating up through chain nodes matching the above + // conditions. For each such store identified, try to find an + // earlier chain to attach the store to which won't violate the + // required ordering. bool MadeChangeToSt = false; SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains; |