diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-02-24 14:53:27 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-02-24 14:53:27 +0000 |
| commit | dd01f700850f5615f0ac6b3e37c738db0ed19411 (patch) | |
| tree | 491e87566aaf0716c99d2158590ee77c46fd5e5f /llvm/lib | |
| parent | 4f073ca7faba0ae155ca2d33be53928203bf3f34 (diff) | |
| download | bcm5719-llvm-dd01f700850f5615f0ac6b3e37c738db0ed19411.tar.gz bcm5719-llvm-dd01f700850f5615f0ac6b3e37c738db0ed19411.zip | |
[X86][SSE41] Combine insertion of zero scalars into vector blends with zero
Part 1 of 2
This patch attempts to replace the insertion of zero scalars with a vector blend with zero, avoiding the use of the integer insertion instructions (which are particularly slow on many targets).
(Part 2 will add support for combining multiple blends-with-zero).
Differential Revision: http://reviews.llvm.org/D17483
llvm-svn: 261743
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 14 |
1 files changed, 14 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 36e3e5a9016..c7b4fa5ec6a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -12301,6 +12301,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { MVT VT = Op.getSimpleValueType(); MVT EltVT = VT.getVectorElementType(); + unsigned NumElts = VT.getVectorNumElements(); if (EltVT == MVT::i1) return InsertBitToMaskVector(Op, DAG); @@ -12314,6 +12315,19 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, auto *N2C = cast<ConstantSDNode>(N2); unsigned IdxVal = N2C->getZExtValue(); + // If we are clearing out a element, we do this more efficiently with a + // blend shuffle than a costly integer insertion. + // TODO: would other rematerializable values (e.g. allbits) benefit as well? + // TODO: pre-SSE41 targets will tend to use bit masking - this could still + // be beneficial if we are inserting several zeros and can combine the masks. + if (X86::isZeroNode(N1) && Subtarget.hasSSE41() && NumElts <= 8) { + SmallVector<int, 8> ClearMask; + for (unsigned i = 0; i != NumElts; ++i) + ClearMask.push_back(i == IdxVal ? i + NumElts : i); + SDValue ZeroVector = getZeroVector(VT, Subtarget, DAG, dl); + return DAG.getVectorShuffle(VT, dl, N0, ZeroVector, ClearMask); + } + // If the vector is wider than 128 bits, extract the 128-bit subvector, insert // into that, and then insert the subvector back into the result. if (VT.is256BitVector() || VT.is512BitVector()) { |

