diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-06-17 17:22:38 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-06-17 17:22:38 +0000 |
| commit | bb9adfdb4e862ca9f43342b129d6151a23869865 (patch) | |
| tree | a5f1da06ccd1f6d944947d3e352f10376cc7c014 /llvm/lib | |
| parent | 6452bdd29b5a0bd0f902c06046add459f910d335 (diff) | |
| download | bcm5719-llvm-bb9adfdb4e862ca9f43342b129d6151a23869865.tar.gz bcm5719-llvm-bb9adfdb4e862ca9f43342b129d6151a23869865.zip | |
[X86][AVX] Split under-aligned vector nt-stores.
If a YMM/ZMM non-temporal store has less than natural alignment, split the vector - either they will be satisfactorily aligned or will continue to be split until they are XMMs - at which point the legalizer will scalarize it.
llvm-svn: 363582
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 15 |
1 files changed, 13 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 8827ff3de3f..c9b8e5fa2c0 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -39545,6 +39545,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, EVT VT = St->getValue().getValueType(); EVT StVT = St->getMemoryVT(); SDLoc dl(St); + unsigned Alignment = St->getAlignment(); SDValue StoredVal = St->getOperand(1); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -39595,8 +39596,6 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, StoredVal->ops().slice(32, 32)); Hi = combinevXi1ConstantToInteger(Hi, DAG); - unsigned Alignment = St->getAlignment(); - SDValue Ptr0 = St->getBasePtr(); SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, 4, dl); @@ -39631,6 +39630,18 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, return splitVectorStore(St, DAG); } + // Split under-aligned vector non-temporal stores. + if (St->isNonTemporal() && StVT == VT && Alignment < VT.getStoreSize()) { + // ZMM/YMM nt-stores - either it can be stored as a series of shorter + // vectors or the legalizer can scalarize it to use MOVNTI. + if (VT.is256BitVector() || VT.is512BitVector()) { + unsigned NumElems = VT.getVectorNumElements(); + if (NumElems < 2) + return SDValue(); + return splitVectorStore(St, DAG); + } + } + // Optimize trunc store (of multiple scalars) to shuffle and store. // First, pack all of the elements in one place. Next, store to memory // in fewer chunks. |

