diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-06-17 18:20:04 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-06-17 18:20:04 +0000 |
| commit | 835999e48aa05ade2adf86cbe76d78743d90aa66 (patch) | |
| tree | c6100a712a98b6329f13bfe8fa07edbfed5db871 /llvm/lib | |
| parent | 5d942d5a95c48526c66ac7843f9b385bdb716b30 (diff) | |
| download | bcm5719-llvm-835999e48aa05ade2adf86cbe76d78743d90aa66.tar.gz bcm5719-llvm-835999e48aa05ade2adf86cbe76d78743d90aa66.zip | |
[X86][SSE] Scalarize under-aligned XMM vector nt-stores (PR42026)
If a XMM non-temporal store has less than natural alignment, scalarize the vector - with SSE4A we can stay on the vector and use MOVNTSD(f64), else we must move to GPRs and use MOVNTI(i32/i64).
llvm-svn: 363592
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c9b8e5fa2c0..42fcb5e92e9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -21110,6 +21110,42 @@ static SDValue splitVectorStore(StoreSDNode *Store, SelectionDAG &DAG) { return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Ch0, Ch1); } +/// Scalarize a vector store, bitcasting to TargetVT to determine the scalar +/// type. +static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, + SelectionDAG &DAG) { + SDValue StoredVal = Store->getValue(); + assert(StoreVT.is128BitVector() && + StoredVal.getValueType().is128BitVector() && "Expecting 128-bit op"); + StoredVal = DAG.getBitcast(StoreVT, StoredVal); + + // Splitting volatile memory ops is not allowed unless the operation was not + // legal to begin with. We are assuming the input op is legal (this transform + // is only used for targets with AVX). + if (Store->isVolatile()) + return SDValue(); + + MVT StoreSVT = StoreVT.getScalarType(); + unsigned NumElems = StoreVT.getVectorNumElements(); + unsigned ScalarSize = StoreSVT.getStoreSize(); + unsigned Alignment = Store->getAlignment(); + + SDLoc DL(Store); + SmallVector<SDValue, 4> Stores; + for (unsigned i = 0; i != NumElems; ++i) { + unsigned Offset = i * ScalarSize; + SDValue Ptr = DAG.getMemBasePlusOffset(Store->getBasePtr(), Offset, DL); + SDValue Scl = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, StoreSVT, StoredVal, + DAG.getIntPtrConstant(i, DL)); + SDValue Ch = DAG.getStore(Store->getChain(), DL, Scl, Ptr, + Store->getPointerInfo().getWithOffset(Offset), + MinAlign(Alignment, Offset), + Store->getMemOperand()->getFlags()); + Stores.push_back(Ch); + } + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores); +} + static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { StoreSDNode *St = cast<StoreSDNode>(Op.getNode()); @@ -39640,6 +39676,15 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, return SDValue(); return splitVectorStore(St, DAG); } + + // XMM nt-stores - scalarize this to f64 nt-stores on SSE4A, else i32/i64 + // to use MOVNTI. + if (VT.is128BitVector() && Subtarget.hasSSE2()) { + MVT NTVT = Subtarget.hasSSE4A() + ? MVT::v2f64 + : (TLI.isTypeLegal(MVT::i64) ? MVT::v2i64 : MVT::v4i32); + return scalarizeVectorStore(St, NTVT, DAG); + } } // Optimize trunc store (of multiple scalars) to shuffle and store. |

