diff options
author | Nirav Dave <niravd@google.com> | 2018-11-08 19:14:20 +0000 |
---|---|---|
committer | Nirav Dave <niravd@google.com> | 2018-11-08 19:14:20 +0000 |
commit | 6ce9f72f76e3c1c9c5b1cd5a65ba2b0bb319294f (patch) | |
tree | bcfc45bd36ac8be46d6c84a2b7e5e3625ee985a9 /llvm/lib | |
parent | f3dc9649ced6d3e5a2574bf939e2c8cfcfb9c465 (diff) | |
download | bcm5719-llvm-6ce9f72f76e3c1c9c5b1cd5a65ba2b0bb319294f.tar.gz bcm5719-llvm-6ce9f72f76e3c1c9c5b1cd5a65ba2b0bb319294f.zip |
[DAGCombine] Improve alias analysis for chain of independent stores.
FindBetterNeighborChains simulateanously improves the chain
dependencies of a chain of related stores avoiding the generation of
extra token factors. For chains longer than the GatherAllAliasDepths,
stores further down in the chain will necessarily fail, a potentially
significant waste and preventing otherwise trivial parallelization.
This patch directly parallelize the chains of stores before improving
each store. This generally improves DAG-level parallelism.
Reviewers: courbet, spatel, RKSimon, bogner, efriedma, craig.topper, rnk
Subscribers: sdardis, javed.absar, hiraditya, jrtc27, atanasyan, llvm-commits
Differential Revision: https://reviews.llvm.org/D53552
llvm-svn: 346432
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 175 |
1 files changed, 116 insertions, 59 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index fc0e8efebdc..bb0c3fb4c2f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IntervalMap.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" @@ -490,6 +491,10 @@ namespace { /// returns false. bool findBetterNeighborChains(StoreSDNode *St); + // Helper for findBetterNeighborChains. Walk up store chain add additional + // chained stores that do not overlap and can be parallelized. + bool parallelizeChainedStores(StoreSDNode *St); + /// Holds a pointer to an LSBaseSDNode as well as information on where it /// is located in a sequence of memory operations connected by a chain. struct MemOpLink { @@ -18905,6 +18910,11 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases); } +// TODO: Replace with with std::monostate when we move to C++17. +struct UnitT { } Unit; +bool operator==(const UnitT &, const UnitT &) { return true; } +bool operator!=(const UnitT &, const UnitT &) { return false; } + // This function tries to collect a bunch of potentially interesting // nodes to improve the chains of, all at once. This might seem // redundant, as this function gets called when visiting every store @@ -18917,13 +18927,22 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { // the nodes that will eventually be candidates, and then not be able // to go from a partially-merged state to the desired final // fully-merged state. -bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { - if (OptLevel == CodeGenOpt::None) - return false; + +bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { + SmallVector<StoreSDNode *, 8> ChainedStores; + StoreSDNode *STChain = St; + // Intervals records which offsets from BaseIndex have been covered. In + // the common case, every store writes to the immediately previous address + // space and thus merged with the previous interval at insertion time. + + using IMap = + llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>; + IMap::Allocator A; + IMap Intervals(A); // This holds the base pointer, index, and the offset in bytes from the base // pointer. - BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); + const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); // We must have a base and an offset. if (!BasePtr.getBase().getNode()) @@ -18933,76 +18952,114 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { if (BasePtr.getBase().isUndef()) return false; - SmallVector<StoreSDNode *, 8> ChainedStores; - ChainedStores.push_back(St); + // Add ST's interval. + Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit); - // Walk up the chain and look for nodes with offsets from the same - // base pointer. Stop when reaching an instruction with a different kind - // or instruction which has a different base pointer. - StoreSDNode *Index = St; - while (Index) { + while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) { // If the chain has more than one use, then we can't reorder the mem ops. - if (Index != St && !SDValue(Index, 0)->hasOneUse()) + if (!SDValue(Chain, 0)->hasOneUse()) break; - - if (Index->isVolatile() || Index->isIndexed()) + if (Chain->isVolatile() || Chain->isIndexed()) break; // Find the base pointer and offset for this memory node. - BaseIndexOffset Ptr = BaseIndexOffset::match(Index, DAG); - + const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG); // Check that the base pointer is the same as the original one. - if (!BasePtr.equalBaseIndex(Ptr, DAG)) + int64_t Offset; + if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset)) break; + int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8; + // Make sure we don't overlap with other intervals by checking the ones to + // the left or right before inserting. + auto I = Intervals.find(Offset); + // If there's a next interval, we should end before it. + if (I != Intervals.end() && I.start() < (Offset + Length)) + break; + // If there's a previous interval, we should start after it. + if (I != Intervals.begin() && (--I).stop() <= Offset) + break; + Intervals.insert(Offset, Offset + Length, Unit); - // Walk up the chain to find the next store node, ignoring any - // intermediate loads. Any other kind of node will halt the loop. - SDNode *NextInChain = Index->getChain().getNode(); - while (true) { - if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) { - // We found a store node. Use it for the next iteration. - if (STn->isVolatile() || STn->isIndexed()) { - Index = nullptr; - break; - } - ChainedStores.push_back(STn); - Index = STn; - break; - } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) { - NextInChain = Ldn->getChain().getNode(); - continue; - } else { - Index = nullptr; - break; - } - }// end while + ChainedStores.push_back(Chain); + STChain = Chain; } - // At this point, ChainedStores lists all of the Store nodes - // reachable by iterating up through chain nodes matching the above - // conditions. For each such store identified, try to find an - // earlier chain to attach the store to which won't violate the - // required ordering. - bool MadeChangeToSt = false; - SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains; + // If we didn't find a chained store, exit. + if (ChainedStores.size() == 0) + return false; + + // Improve all chained stores (St and ChainedStores members) starting from + // where the store chain ended and return single TokenFactor. + SDValue NewChain = STChain->getChain(); + SmallVector<SDValue, 8> TFOps; + for (unsigned I = ChainedStores.size(); I;) { + StoreSDNode *S = ChainedStores[--I]; + SDValue BetterChain = FindBetterChain(S, NewChain); + S = cast<StoreSDNode>(DAG.UpdateNodeOperands( + S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3))); + TFOps.push_back(SDValue(S, 0)); + ChainedStores[I] = S; + } + + // Improve St's chain. Use a new node to avoid creating a loop from CombineTo. + SDValue BetterChain = FindBetterChain(St, NewChain); + SDValue NewST; + if (St->isTruncatingStore()) + NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(), + St->getBasePtr(), St->getMemoryVT(), + St->getMemOperand()); + else + NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(), + St->getBasePtr(), St->getMemOperand()); - for (StoreSDNode *ChainedStore : ChainedStores) { - SDValue Chain = ChainedStore->getChain(); - SDValue BetterChain = FindBetterChain(ChainedStore, Chain); + TFOps.push_back(NewST); - if (Chain != BetterChain) { - if (ChainedStore == St) - MadeChangeToSt = true; - BetterChains.push_back(std::make_pair(ChainedStore, BetterChain)); - } - } + // If we improved every element of TFOps, then we've lost the dependence on + // NewChain to successors of St and we need to add it back to TFOps. Do so at + // the beginning to keep relative order consistent with FindBetterChains. + auto hasImprovedChain = [&](SDValue ST) -> bool { + return ST->getOperand(0) != NewChain; + }; + bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain); + if (AddNewChain) + TFOps.insert(TFOps.begin(), NewChain); + + SDValue TF = DAG.getNode(ISD::TokenFactor, SDLoc(STChain), MVT::Other, TFOps); + CombineTo(St, TF); + + AddToWorklist(STChain); + // Add TF operands worklist in reverse order. + for (auto I = TF->getNumOperands(); I;) + AddToWorklist(TF->getOperand(--I).getNode()); + AddToWorklist(TF.getNode()); + return true; +} + +bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { + if (OptLevel == CodeGenOpt::None) + return false; + + const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); - // Do all replacements after finding the replacements to make to avoid making - // the chains more complicated by introducing new TokenFactors. - for (auto Replacement : BetterChains) - replaceStoreChain(Replacement.first, Replacement.second); + // We must have a base and an offset. + if (!BasePtr.getBase().getNode()) + return false; + + // Do not handle stores to undef base pointers. + if (BasePtr.getBase().isUndef()) + return false; + + // Directly improve a chain of disjoint stores starting at St. + if (parallelizeChainedStores(St)) + return true; - return MadeChangeToSt; + // Improve St's Chain.. + SDValue BetterChain = FindBetterChain(St, St->getChain()); + if (St->getChain() != BetterChain) { + replaceStoreChain(St, BetterChain); + return true; + } + return false; } /// This is the entry point for the file. |