diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 14 |
1 files changed, 14 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 36e3e5a9016..c7b4fa5ec6a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -12301,6 +12301,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { MVT VT = Op.getSimpleValueType(); MVT EltVT = VT.getVectorElementType(); + unsigned NumElts = VT.getVectorNumElements(); if (EltVT == MVT::i1) return InsertBitToMaskVector(Op, DAG); @@ -12314,6 +12315,19 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, auto *N2C = cast<ConstantSDNode>(N2); unsigned IdxVal = N2C->getZExtValue(); + // If we are clearing out a element, we do this more efficiently with a + // blend shuffle than a costly integer insertion. + // TODO: would other rematerializable values (e.g. allbits) benefit as well? + // TODO: pre-SSE41 targets will tend to use bit masking - this could still + // be beneficial if we are inserting several zeros and can combine the masks. + if (X86::isZeroNode(N1) && Subtarget.hasSSE41() && NumElts <= 8) { + SmallVector<int, 8> ClearMask; + for (unsigned i = 0; i != NumElts; ++i) + ClearMask.push_back(i == IdxVal ? i + NumElts : i); + SDValue ZeroVector = getZeroVector(VT, Subtarget, DAG, dl); + return DAG.getVectorShuffle(VT, dl, N0, ZeroVector, ClearMask); + } + // If the vector is wider than 128 bits, extract the 128-bit subvector, insert // into that, and then insert the subvector back into the result. if (VT.is256BitVector() || VT.is512BitVector()) { |

