summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorNirav Dave <niravd@google.com>2018-11-08 19:14:20 +0000
committerNirav Dave <niravd@google.com>2018-11-08 19:14:20 +0000
commit6ce9f72f76e3c1c9c5b1cd5a65ba2b0bb319294f (patch)
treebcfc45bd36ac8be46d6c84a2b7e5e3625ee985a9 /llvm/lib
parentf3dc9649ced6d3e5a2574bf939e2c8cfcfb9c465 (diff)
downloadbcm5719-llvm-6ce9f72f76e3c1c9c5b1cd5a65ba2b0bb319294f.tar.gz
bcm5719-llvm-6ce9f72f76e3c1c9c5b1cd5a65ba2b0bb319294f.zip
[DAGCombine] Improve alias analysis for chain of independent stores.
FindBetterNeighborChains simulateanously improves the chain dependencies of a chain of related stores avoiding the generation of extra token factors. For chains longer than the GatherAllAliasDepths, stores further down in the chain will necessarily fail, a potentially significant waste and preventing otherwise trivial parallelization. This patch directly parallelize the chains of stores before improving each store. This generally improves DAG-level parallelism. Reviewers: courbet, spatel, RKSimon, bogner, efriedma, craig.topper, rnk Subscribers: sdardis, javed.absar, hiraditya, jrtc27, atanasyan, llvm-commits Differential Revision: https://reviews.llvm.org/D53552 llvm-svn: 346432
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp175
1 files changed, 116 insertions, 59 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index fc0e8efebdc..bb0c3fb4c2f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IntervalMap.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
@@ -490,6 +491,10 @@ namespace {
/// returns false.
bool findBetterNeighborChains(StoreSDNode *St);
+ // Helper for findBetterNeighborChains. Walk up store chain add additional
+ // chained stores that do not overlap and can be parallelized.
+ bool parallelizeChainedStores(StoreSDNode *St);
+
/// Holds a pointer to an LSBaseSDNode as well as information on where it
/// is located in a sequence of memory operations connected by a chain.
struct MemOpLink {
@@ -18905,6 +18910,11 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
}
+// TODO: Replace with with std::monostate when we move to C++17.
+struct UnitT { } Unit;
+bool operator==(const UnitT &, const UnitT &) { return true; }
+bool operator!=(const UnitT &, const UnitT &) { return false; }
+
// This function tries to collect a bunch of potentially interesting
// nodes to improve the chains of, all at once. This might seem
// redundant, as this function gets called when visiting every store
@@ -18917,13 +18927,22 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
// the nodes that will eventually be candidates, and then not be able
// to go from a partially-merged state to the desired final
// fully-merged state.
-bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
- if (OptLevel == CodeGenOpt::None)
- return false;
+
+bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
+ SmallVector<StoreSDNode *, 8> ChainedStores;
+ StoreSDNode *STChain = St;
+ // Intervals records which offsets from BaseIndex have been covered. In
+ // the common case, every store writes to the immediately previous address
+ // space and thus merged with the previous interval at insertion time.
+
+ using IMap =
+ llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
+ IMap::Allocator A;
+ IMap Intervals(A);
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
- BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
+ const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
// We must have a base and an offset.
if (!BasePtr.getBase().getNode())
@@ -18933,76 +18952,114 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
if (BasePtr.getBase().isUndef())
return false;
- SmallVector<StoreSDNode *, 8> ChainedStores;
- ChainedStores.push_back(St);
+ // Add ST's interval.
+ Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
- // Walk up the chain and look for nodes with offsets from the same
- // base pointer. Stop when reaching an instruction with a different kind
- // or instruction which has a different base pointer.
- StoreSDNode *Index = St;
- while (Index) {
+ while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
// If the chain has more than one use, then we can't reorder the mem ops.
- if (Index != St && !SDValue(Index, 0)->hasOneUse())
+ if (!SDValue(Chain, 0)->hasOneUse())
break;
-
- if (Index->isVolatile() || Index->isIndexed())
+ if (Chain->isVolatile() || Chain->isIndexed())
break;
// Find the base pointer and offset for this memory node.
- BaseIndexOffset Ptr = BaseIndexOffset::match(Index, DAG);
-
+ const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
// Check that the base pointer is the same as the original one.
- if (!BasePtr.equalBaseIndex(Ptr, DAG))
+ int64_t Offset;
+ if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
break;
+ int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
+ // Make sure we don't overlap with other intervals by checking the ones to
+ // the left or right before inserting.
+ auto I = Intervals.find(Offset);
+ // If there's a next interval, we should end before it.
+ if (I != Intervals.end() && I.start() < (Offset + Length))
+ break;
+ // If there's a previous interval, we should start after it.
+ if (I != Intervals.begin() && (--I).stop() <= Offset)
+ break;
+ Intervals.insert(Offset, Offset + Length, Unit);
- // Walk up the chain to find the next store node, ignoring any
- // intermediate loads. Any other kind of node will halt the loop.
- SDNode *NextInChain = Index->getChain().getNode();
- while (true) {
- if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
- // We found a store node. Use it for the next iteration.
- if (STn->isVolatile() || STn->isIndexed()) {
- Index = nullptr;
- break;
- }
- ChainedStores.push_back(STn);
- Index = STn;
- break;
- } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
- NextInChain = Ldn->getChain().getNode();
- continue;
- } else {
- Index = nullptr;
- break;
- }
- }// end while
+ ChainedStores.push_back(Chain);
+ STChain = Chain;
}
- // At this point, ChainedStores lists all of the Store nodes
- // reachable by iterating up through chain nodes matching the above
- // conditions. For each such store identified, try to find an
- // earlier chain to attach the store to which won't violate the
- // required ordering.
- bool MadeChangeToSt = false;
- SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
+ // If we didn't find a chained store, exit.
+ if (ChainedStores.size() == 0)
+ return false;
+
+ // Improve all chained stores (St and ChainedStores members) starting from
+ // where the store chain ended and return single TokenFactor.
+ SDValue NewChain = STChain->getChain();
+ SmallVector<SDValue, 8> TFOps;
+ for (unsigned I = ChainedStores.size(); I;) {
+ StoreSDNode *S = ChainedStores[--I];
+ SDValue BetterChain = FindBetterChain(S, NewChain);
+ S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
+ S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
+ TFOps.push_back(SDValue(S, 0));
+ ChainedStores[I] = S;
+ }
+
+ // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
+ SDValue BetterChain = FindBetterChain(St, NewChain);
+ SDValue NewST;
+ if (St->isTruncatingStore())
+ NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
+ St->getBasePtr(), St->getMemoryVT(),
+ St->getMemOperand());
+ else
+ NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
+ St->getBasePtr(), St->getMemOperand());
- for (StoreSDNode *ChainedStore : ChainedStores) {
- SDValue Chain = ChainedStore->getChain();
- SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
+ TFOps.push_back(NewST);
- if (Chain != BetterChain) {
- if (ChainedStore == St)
- MadeChangeToSt = true;
- BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
- }
- }
+ // If we improved every element of TFOps, then we've lost the dependence on
+ // NewChain to successors of St and we need to add it back to TFOps. Do so at
+ // the beginning to keep relative order consistent with FindBetterChains.
+ auto hasImprovedChain = [&](SDValue ST) -> bool {
+ return ST->getOperand(0) != NewChain;
+ };
+ bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
+ if (AddNewChain)
+ TFOps.insert(TFOps.begin(), NewChain);
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, SDLoc(STChain), MVT::Other, TFOps);
+ CombineTo(St, TF);
+
+ AddToWorklist(STChain);
+ // Add TF operands worklist in reverse order.
+ for (auto I = TF->getNumOperands(); I;)
+ AddToWorklist(TF->getOperand(--I).getNode());
+ AddToWorklist(TF.getNode());
+ return true;
+}
+
+bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
+ if (OptLevel == CodeGenOpt::None)
+ return false;
+
+ const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
- // Do all replacements after finding the replacements to make to avoid making
- // the chains more complicated by introducing new TokenFactors.
- for (auto Replacement : BetterChains)
- replaceStoreChain(Replacement.first, Replacement.second);
+ // We must have a base and an offset.
+ if (!BasePtr.getBase().getNode())
+ return false;
+
+ // Do not handle stores to undef base pointers.
+ if (BasePtr.getBase().isUndef())
+ return false;
+
+ // Directly improve a chain of disjoint stores starting at St.
+ if (parallelizeChainedStores(St))
+ return true;
- return MadeChangeToSt;
+ // Improve St's Chain..
+ SDValue BetterChain = FindBetterChain(St, St->getChain());
+ if (St->getChain() != BetterChain) {
+ replaceStoreChain(St, BetterChain);
+ return true;
+ }
+ return false;
}
/// This is the entry point for the file.
OpenPOWER on IntegriCloud