diff options
author | Sanjay Patel <spatel@rotateright.com> | 2017-05-27 14:07:03 +0000 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2017-05-27 14:07:03 +0000 |
commit | 33f4a9728741b1305496ec89f5b472a57e21a32b (patch) | |
tree | 647340b9f5eb29c43204f874206ff6a7cae58647 /llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | |
parent | 7aa22859b66223c59a5dacc45e8bd7535659057b (diff) | |
download | bcm5719-llvm-33f4a9728741b1305496ec89f5b472a57e21a32b.tar.gz bcm5719-llvm-33f4a9728741b1305496ec89f5b472a57e21a32b.zip |
[DAGCombiner] use narrow load to avoid vector extract
If we have (extract_subvector(load wide vector)) with no other users,
that can just be (load narrow vector). This is intentionally conservative.
Follow-ups may loosen the one-use constraint to account for the extract cost
or just remove the one-use check.
The memop chain updating is based on code that already exists multiple times
in x86 lowering, so that should be pulled into a helper function as a follow-up.
Background: this is a potential improvement noticed via regressions caused by
making x86's peekThroughBitcasts() not loop on consecutive bitcasts (see
comments in D33137).
Differential Revision: https://reviews.llvm.org/D33578
llvm-svn: 304072
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 50 |
1 files changed, 50 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 6928ad6bef0..23a302f3e56 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -14555,6 +14555,52 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { return DAG.getBitcast(VT, NarrowBinOp); } +/// If we are extracting a subvector from a wide vector load, convert to a +/// narrow load to eliminate the extraction: +/// (extract_subvector (load wide vector)) --> (load narrow vector) +static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { + // TODO: Add support for big-endian. The offset calculation must be adjusted. + if (DAG.getDataLayout().isBigEndian()) + return SDValue(); + + // TODO: The one-use check is overly conservative. Check the cost of the + // extract instead or remove that condition entirely. + auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0)); + auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1)); + if (!Ld || !Ld->hasOneUse() || Ld->isVolatile() || !ExtIdx) + return SDValue(); + + // The narrow load will be offset from the base address of the old load if + // we are extracting from something besides index 0 (little-endian). + EVT VT = Extract->getValueType(0); + SDLoc DL(Extract); + SDValue BaseAddr = Ld->getOperand(1); + unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize(); + + // TODO: Use "BaseIndexOffset" to make this more effective. + SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL); + MachineFunction &MF = DAG.getMachineFunction(); + MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset, + VT.getStoreSize()); + SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO); + + // The new load must have the same position as the old load in terms of memory + // dependency. Create a TokenFactor for Ld and NewLd and update uses of Ld's + // output chain to use that TokenFactor. + // TODO: This code is based on a similar sequence in x86 lowering. It should + // be moved to a helper function, so it can be shared and reused. + if (Ld->hasAnyUseOfValue(1)) { + SDValue OldChain = SDValue(Ld, 1); + SDValue NewChain = SDValue(NewLd.getNode(), 1); + SDValue TokenFactor = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + OldChain, NewChain); + DAG.ReplaceAllUsesOfValueWith(OldChain, TokenFactor); + DAG.UpdateNodeOperands(TokenFactor.getNode(), OldChain, NewChain); + } + + return NewLd; +} + SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { EVT NVT = N->getValueType(0); SDValue V = N->getOperand(0); @@ -14563,6 +14609,10 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { if (V.isUndef()) return DAG.getUNDEF(NVT); + if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT)) + if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG)) + return NarrowLoad; + // Combine: // (extract_subvec (concat V1, V2, ...), i) // Into: |