[X86][SSE41] Combine insertion of zero scalars into vector blends with zero

Part 1 of 2 This patch attempts to replace the insertion of zero scalars with a vector blend with zero, avoiding the use of the integer insertion instructions (which are particularly slow on many targets). (Part 2 will add support for combining multiple blends-with-zero). Differential Revision: http://reviews.llvm.org/D17483 llvm-svn: 261743
author: Simon Pilgrim <llvm-dev@redking.me.uk> 2016-02-24 14:53:27 +0000
committer: Simon Pilgrim <llvm-dev@redking.me.uk> 2016-02-24 14:53:27 +0000
commit: dd01f700850f5615f0ac6b3e37c738db0ed19411 (patch)
tree: 491e87566aaf0716c99d2158590ee77c46fd5e5f /llvm/lib
parent: 4f073ca7faba0ae155ca2d33be53928203bf3f34 (diff)
download: bcm5719-llvm-dd01f700850f5615f0ac6b3e37c738db0ed19411.tar.gz
bcm5719-llvm-dd01f700850f5615f0ac6b3e37c738db0ed19411.zip
1 files changed, 14 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 36e3e5a9016..c7b4fa5ec6a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -12301,6 +12301,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
                                                   SelectionDAG &DAG) const {
   MVT VT = Op.getSimpleValueType();
   MVT EltVT = VT.getVectorElementType();
+  unsigned NumElts = VT.getVectorNumElements();
 
   if (EltVT == MVT::i1)
     return InsertBitToMaskVector(Op, DAG);
@@ -12314,6 +12315,19 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
   auto *N2C = cast<ConstantSDNode>(N2);
   unsigned IdxVal = N2C->getZExtValue();
 
+  // If we are clearing out a element, we do this more efficiently with a
+  // blend shuffle than a costly integer insertion.
+  // TODO: would other rematerializable values (e.g. allbits) benefit as well?
+  // TODO: pre-SSE41 targets will tend to use bit masking - this could still
+  // be beneficial if we are inserting several zeros and can combine the masks.
+  if (X86::isZeroNode(N1) && Subtarget.hasSSE41() && NumElts <= 8) {
+    SmallVector<int, 8> ClearMask;
+    for (unsigned i = 0; i != NumElts; ++i)
+      ClearMask.push_back(i == IdxVal ? i + NumElts : i);
+    SDValue ZeroVector = getZeroVector(VT, Subtarget, DAG, dl);
+    return DAG.getVectorShuffle(VT, dl, N0, ZeroVector, ClearMask);
+  }
+
   // If the vector is wider than 128 bits, extract the 128-bit subvector, insert
   // into that, and then insert the subvector back into the result.
   if (VT.is256BitVector() || VT.is512BitVector()) {
author	Simon Pilgrim <llvm-dev@redking.me.uk>	2016-02-24 14:53:27 +0000
committer	Simon Pilgrim <llvm-dev@redking.me.uk>	2016-02-24 14:53:27 +0000
commit	dd01f700850f5615f0ac6b3e37c738db0ed19411 (patch)
tree	491e87566aaf0716c99d2158590ee77c46fd5e5f /llvm/lib
parent	4f073ca7faba0ae155ca2d33be53928203bf3f34 (diff)
download	bcm5719-llvm-dd01f700850f5615f0ac6b3e37c738db0ed19411.tar.gz bcm5719-llvm-dd01f700850f5615f0ac6b3e37c738db0ed19411.zip