summaryrefslogtreecommitdiffstats
path: root/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
diff options
context:
space:
mode:
authorSirish Pande <sirishrp@gmail.com>2018-05-16 15:36:52 +0000
committerSirish Pande <sirishrp@gmail.com>2018-05-16 15:36:52 +0000
commitcabe50a308736a0f5c72fc7bf034b4ee3dac5760 (patch)
treeafa9272c2d9b6a6bc38bcf97d2d1f9ca85f1e5d5 /llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
parentefdb9be29fd24e86a7a6f6610b760aec88512f1e (diff)
downloadbcm5719-llvm-cabe50a308736a0f5c72fc7bf034b4ee3dac5760.tar.gz
bcm5719-llvm-cabe50a308736a0f5c72fc7bf034b4ee3dac5760.zip
[AArch64] Gangup loads and stores for pairing.
Keep loads and stores together (target defines how many loads and stores to gang up), such that it will help in pairing and vectorization. Differential Revision https://reviews.llvm.org/D46477 llvm-svn: 332482
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp89
1 files changed, 85 insertions, 4 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index f486a90380e..8f74d6a2a71 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -89,6 +89,14 @@ void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {}
#define DEBUG_TYPE "selectiondag"
+static cl::opt<bool> EnableMemCpyDAGOpt("enable-memcpy-dag-opt",
+ cl::Hidden, cl::init(true),
+ cl::desc("Gang up loads and stores generated by inlining of memcpy"));
+
+static cl::opt<int> MaxLdStGlue("ldstmemcpy-glue-max",
+ cl::desc("Number limit for gluing ld/st of memcpy."),
+ cl::Hidden, cl::init(0));
+
static void NewSDValueDbgMsg(SDValue V, StringRef Msg, SelectionDAG *G) {
LLVM_DEBUG(dbgs() << Msg; V.getNode()->dump(G););
}
@@ -5218,6 +5226,31 @@ static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
return MF.getFunction().optForSize();
}
+static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
+ SmallVector<SDValue, 32> &OutChains, unsigned From,
+ unsigned To, SmallVector<SDValue, 16> &OutLoadChains,
+ SmallVector<SDValue, 16> &OutStoreChains) {
+ assert(OutLoadChains.size() && "Missing loads in memcpy inlining");
+ assert(OutStoreChains.size() && "Missing stores in memcpy inlining");
+ SmallVector<SDValue, 16> GluedLoadChains;
+ for (unsigned i = From; i < To; ++i) {
+ OutChains.push_back(OutLoadChains[i]);
+ GluedLoadChains.push_back(OutLoadChains[i]);
+ }
+
+ // Chain for all loads.
+ SDValue LoadToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ GluedLoadChains);
+
+ for (unsigned i = From; i < To; ++i) {
+ StoreSDNode *ST = dyn_cast<StoreSDNode>(OutStoreChains[i]);
+ SDValue NewStore = DAG.getTruncStore(LoadToken, dl, ST->getValue(),
+ ST->getBasePtr(), ST->getMemoryVT(),
+ ST->getMemOperand());
+ OutChains.push_back(NewStore);
+ }
+}
+
static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
uint64_t Size, unsigned Align,
@@ -5282,7 +5315,9 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
MachineMemOperand::Flags MMOFlags =
isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
- SmallVector<SDValue, 8> OutChains;
+ SmallVector<SDValue, 16> OutLoadChains;
+ SmallVector<SDValue, 16> OutStoreChains;
+ SmallVector<SDValue, 32> OutChains;
unsigned NumMemOps = MemOps.size();
uint64_t SrcOff = 0, DstOff = 0;
for (unsigned i = 0; i != NumMemOps; ++i) {
@@ -5316,11 +5351,13 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
SubSlice.Length = VTSize;
}
Value = getMemsetStringVal(VT, dl, DAG, TLI, SubSlice);
- if (Value.getNode())
+ if (Value.getNode()) {
Store = DAG.getStore(Chain, dl, Value,
DAG.getMemBasePlusOffset(Dst, DstOff, dl),
DstPtrInfo.getWithOffset(DstOff), Align,
MMOFlags);
+ OutChains.push_back(Store);
+ }
}
if (!Store.getNode()) {
@@ -5342,17 +5379,61 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
DAG.getMemBasePlusOffset(Src, SrcOff, dl),
SrcPtrInfo.getWithOffset(SrcOff), VT,
MinAlign(SrcAlign, SrcOff), SrcMMOFlags);
- OutChains.push_back(Value.getValue(1));
+ OutLoadChains.push_back(Value.getValue(1));
+
Store = DAG.getTruncStore(
Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
DstPtrInfo.getWithOffset(DstOff), VT, Align, MMOFlags);
+ OutStoreChains.push_back(Store);
}
- OutChains.push_back(Store);
SrcOff += VTSize;
DstOff += VTSize;
Size -= VTSize;
}
+ unsigned GluedLdStLimit = MaxLdStGlue == 0 ?
+ TLI.getMaxGluedStoresPerMemcpy() : MaxLdStGlue;
+ unsigned NumLdStInMemcpy = OutStoreChains.size();
+
+ if (NumLdStInMemcpy) {
+ // It may be that memcpy might be converted to memset if it's memcpy
+ // of constants. In such a case, we won't have loads and stores, but
+ // just stores. In the absence of loads, there is nothing to gang up.
+ if ((GluedLdStLimit <= 1) || !EnableMemCpyDAGOpt) {
+ // If target does not care, just leave as it.
+ for (unsigned i = 0; i < NumLdStInMemcpy; ++i) {
+ OutChains.push_back(OutLoadChains[i]);
+ OutChains.push_back(OutStoreChains[i]);
+ }
+ } else {
+ // Ld/St less than/equal limit set by target.
+ if (NumLdStInMemcpy <= GluedLdStLimit) {
+ chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, 0,
+ NumLdStInMemcpy, OutLoadChains,
+ OutStoreChains);
+ } else {
+ unsigned NumberLdChain = NumLdStInMemcpy / GluedLdStLimit;
+ unsigned RemainingLdStInMemcpy = NumLdStInMemcpy % GluedLdStLimit;
+ unsigned GlueIter = 0;
+
+ for (unsigned cnt = 0; cnt < NumberLdChain; ++cnt) {
+ unsigned IndexFrom = NumLdStInMemcpy - GlueIter - GluedLdStLimit;
+ unsigned IndexTo = NumLdStInMemcpy - GlueIter;
+
+ chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, IndexFrom, IndexTo,
+ OutLoadChains, OutStoreChains);
+ GlueIter += GluedLdStLimit;
+ }
+
+ // Residual ld/st.
+ if (RemainingLdStInMemcpy) {
+ chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, 0,
+ RemainingLdStInMemcpy, OutLoadChains,
+ OutStoreChains);
+ }
+ }
+ }
+ }
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
}
OpenPOWER on IntegriCloud