summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2019-06-17 17:22:38 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2019-06-17 17:22:38 +0000
commitbb9adfdb4e862ca9f43342b129d6151a23869865 (patch)
treea5f1da06ccd1f6d944947d3e352f10376cc7c014 /llvm/lib
parent6452bdd29b5a0bd0f902c06046add459f910d335 (diff)
downloadbcm5719-llvm-bb9adfdb4e862ca9f43342b129d6151a23869865.tar.gz
bcm5719-llvm-bb9adfdb4e862ca9f43342b129d6151a23869865.zip
[X86][AVX] Split under-aligned vector nt-stores.
If a YMM/ZMM non-temporal store has less than natural alignment, split the vector - either they will be satisfactorily aligned or will continue to be split until they are XMMs - at which point the legalizer will scalarize it. llvm-svn: 363582
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp15
1 files changed, 13 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 8827ff3de3f..c9b8e5fa2c0 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -39545,6 +39545,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
EVT VT = St->getValue().getValueType();
EVT StVT = St->getMemoryVT();
SDLoc dl(St);
+ unsigned Alignment = St->getAlignment();
SDValue StoredVal = St->getOperand(1);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -39595,8 +39596,6 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
StoredVal->ops().slice(32, 32));
Hi = combinevXi1ConstantToInteger(Hi, DAG);
- unsigned Alignment = St->getAlignment();
-
SDValue Ptr0 = St->getBasePtr();
SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, 4, dl);
@@ -39631,6 +39630,18 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
return splitVectorStore(St, DAG);
}
+ // Split under-aligned vector non-temporal stores.
+ if (St->isNonTemporal() && StVT == VT && Alignment < VT.getStoreSize()) {
+ // ZMM/YMM nt-stores - either it can be stored as a series of shorter
+ // vectors or the legalizer can scalarize it to use MOVNTI.
+ if (VT.is256BitVector() || VT.is512BitVector()) {
+ unsigned NumElems = VT.getVectorNumElements();
+ if (NumElems < 2)
+ return SDValue();
+ return splitVectorStore(St, DAG);
+ }
+ }
+
// Optimize trunc store (of multiple scalars) to shuffle and store.
// First, pack all of the elements in one place. Next, store to memory
// in fewer chunks.
OpenPOWER on IntegriCloud