[X86] Genericize the handling of INSERT_SUBVECTOR from an EXTRACT_SUBVECTOR to support 512-bit vectors with 128-bit or 256-bit subvectors.

We now detect that both the extract and insert indices are non-zero and convert to a shuffle. This will be lowered as a blend for 256-bit vectors or as a vshuf operations for 512-bit vectors. llvm-svn: 294931
author: Craig Topper <craig.topper@gmail.com> 2017-02-13 04:53:29 +0000
committer: Craig Topper <craig.topper@gmail.com> 2017-02-13 04:53:29 +0000
commit: 680c73e7ab086ea7d5844f711186f12d590dab13 (patch)
tree: 9a8cf5826e4f087ff0e19a85ed7625e176a42d09 /llvm/lib
parent: aa46204ed9f36077c4bf138288a391ac395cfcce (diff)
download: bcm5719-llvm-680c73e7ab086ea7d5844f711186f12d590dab13.tar.gz
bcm5719-llvm-680c73e7ab086ea7d5844f711186f12d590dab13.zip
1 files changed, 18 insertions, 21 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 9d2b3fcd039..bc1b7bc1894 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -34160,28 +34160,25 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
   MVT OpVT = N->getSimpleValueType(0);
   MVT SubVecVT = SubVec.getSimpleValueType();
 
-  // If we're inserting into the upper half of a 256-bit vector with a vector
-  // that was extracted from the upper half of a 256-bit vector, we should
-  // use a blend instead.
-  if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR && OpVT.is256BitVector() &&
+  // If this is an insert of an extract, combine to a shuffle. Don't do this
+  // if the insert or extract can be represented with a subvector operation.
+  if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
       SubVec.getOperand(0).getSimpleValueType() == OpVT &&
-      Idx == SubVec.getOperand(1) && IdxVal == OpVT.getVectorNumElements()/2) {
-
-    // Integers must be cast to 32-bit because there is only vpblendd;
-    // vpblendw can't be used for this because it has a handicapped mask.
-    // If we don't have AVX2, then cast to float. Using a wrong domain blend
-    // is still more efficient than using the wrong domain vinsertf128 that
-    // will be created by InsertSubVector().
-    MVT CastVT = OpVT;
-    if (OpVT.isInteger())
-      CastVT = Subtarget.hasAVX2() ? MVT::v8i32 : MVT::v8f32;
-
-    // The blend instruction, and therefore its mask, depend on the data type.
-    unsigned MaskVal = CastVT.getScalarSizeInBits() == 64 ? 0x0c : 0xf0;
-    SDValue Mask = DAG.getConstant(MaskVal, dl, MVT::i8);
-    Vec = DAG.getNode(X86ISD::BLENDI, dl, CastVT, DAG.getBitcast(CastVT, Vec),
-                      DAG.getBitcast(CastVT, SubVec.getOperand(0)), Mask);
-    return DAG.getBitcast(OpVT, Vec);
+      (IdxVal != 0 || !Vec.isUndef())) {
+    int ExtIdxVal = cast<ConstantSDNode>(SubVec.getOperand(1))->getZExtValue();
+    if (ExtIdxVal != 0) {
+      int VecNumElts = OpVT.getVectorNumElements();
+      int SubVecNumElts = SubVecVT.getVectorNumElements();
+      SmallVector<int, 64> Mask(VecNumElts);
+      // First create an identity shuffle mask.
+      for (int i = 0; i != VecNumElts; ++i)
+        Mask[i] = i;
+      // Now insert the extracted portion.
+      for (int i = 0; i != SubVecNumElts; ++i)
+        Mask[i + IdxVal] = i + ExtIdxVal + VecNumElts;
+
+      return DAG.getVectorShuffle(OpVT, dl, Vec, SubVec.getOperand(0), Mask);
+    }
   }
 
   // Fold two 16-byte or 32-byte subvector loads into one 32-byte or 64-byte
author	Craig Topper <craig.topper@gmail.com>	2017-02-13 04:53:29 +0000
committer	Craig Topper <craig.topper@gmail.com>	2017-02-13 04:53:29 +0000
commit	680c73e7ab086ea7d5844f711186f12d590dab13 (patch)
tree	9a8cf5826e4f087ff0e19a85ed7625e176a42d09 /llvm/lib
parent	aa46204ed9f36077c4bf138288a391ac395cfcce (diff)
download	bcm5719-llvm-680c73e7ab086ea7d5844f711186f12d590dab13.tar.gz bcm5719-llvm-680c73e7ab086ea7d5844f711186f12d590dab13.zip