diff options
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 50 |
1 files changed, 50 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 6928ad6bef0..23a302f3e56 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -14555,6 +14555,52 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { return DAG.getBitcast(VT, NarrowBinOp); } +/// If we are extracting a subvector from a wide vector load, convert to a +/// narrow load to eliminate the extraction: +/// (extract_subvector (load wide vector)) --> (load narrow vector) +static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { + // TODO: Add support for big-endian. The offset calculation must be adjusted. + if (DAG.getDataLayout().isBigEndian()) + return SDValue(); + + // TODO: The one-use check is overly conservative. Check the cost of the + // extract instead or remove that condition entirely. + auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0)); + auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1)); + if (!Ld || !Ld->hasOneUse() || Ld->isVolatile() || !ExtIdx) + return SDValue(); + + // The narrow load will be offset from the base address of the old load if + // we are extracting from something besides index 0 (little-endian). + EVT VT = Extract->getValueType(0); + SDLoc DL(Extract); + SDValue BaseAddr = Ld->getOperand(1); + unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize(); + + // TODO: Use "BaseIndexOffset" to make this more effective. + SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL); + MachineFunction &MF = DAG.getMachineFunction(); + MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset, + VT.getStoreSize()); + SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO); + + // The new load must have the same position as the old load in terms of memory + // dependency. Create a TokenFactor for Ld and NewLd and update uses of Ld's + // output chain to use that TokenFactor. + // TODO: This code is based on a similar sequence in x86 lowering. It should + // be moved to a helper function, so it can be shared and reused. + if (Ld->hasAnyUseOfValue(1)) { + SDValue OldChain = SDValue(Ld, 1); + SDValue NewChain = SDValue(NewLd.getNode(), 1); + SDValue TokenFactor = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + OldChain, NewChain); + DAG.ReplaceAllUsesOfValueWith(OldChain, TokenFactor); + DAG.UpdateNodeOperands(TokenFactor.getNode(), OldChain, NewChain); + } + + return NewLd; +} + SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { EVT NVT = N->getValueType(0); SDValue V = N->getOperand(0); @@ -14563,6 +14609,10 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { if (V.isUndef()) return DAG.getUNDEF(NVT); + if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT)) + if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG)) + return NarrowLoad; + // Combine: // (extract_subvec (concat V1, V2, ...), i) // Into: |