1 files changed, 156 insertions, 123 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8783bb24dae..73faff2f582 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -378,6 +378,7 @@ namespace {
     SDValue TransformFPLoadStorePair(SDNode *N);
     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
     SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
+    SDValue reduceBuildVecToShuffle(SDNode *N);
 
     SDValue GetDemandedBits(SDValue V, const APInt &Mask);
 
@@ -12865,24 +12866,27 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
   return DAG.getNode(Opcode, dl, VT, BV);
 }
 
-SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
-  unsigned NumInScalars = N->getNumOperands();
-  SDLoc dl(N);
-  EVT VT = N->getValueType(0);
+// If Vec holds a reference to a non-null node, return Vec.
+// Otherwise, return either a zero or an undef node of the appropriate type.
+static SDValue getRightHandValue(SelectionDAG &DAG, SDLoc DL, SDValue Vec,
+                                 EVT VT, bool Zero) {
+  if (Vec.getNode())
+    return Vec;
 
-  // A vector built entirely of undefs is undef.
-  if (ISD::allOperandsUndef(N))
-    return DAG.getUNDEF(VT);
+  if (Zero)
+    return VT.isInteger() ? DAG.getConstant(0, DL, VT)
+                          : DAG.getConstantFP(0.0, DL, VT);
 
-  if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
-    return V;
-
-  if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
-    return V;
+  return DAG.getUNDEF(VT);
+}
 
-  // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
-  // operations.  If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
-  // at most two distinct vectors, turn this into a shuffle node.
+// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
+// operations.  If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
+// at most two distinct vectors, turn this into a shuffle node.
+// TODO: Support more than two inputs by constructing a tree of shuffles.
+SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
+  SDLoc dl(N);
+  EVT VT = N->getValueType(0);
 
   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
   if (!isTypeLegal(VT))
@@ -12894,148 +12898,177 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
 
   SDValue VecIn1, VecIn2;
   bool UsesZeroVector = false;
-  for (unsigned i = 0; i != NumInScalars; ++i) {
+  unsigned NumElems = N->getNumOperands();
+
+  // Record, for each element of newly built vector, which input it uses.
+  // 0 stands for the zero vector, 1 and 2 for the two input vectors, and -1
+  // for undef.
+  SmallVector<int, 8> VectorMask;
+  for (unsigned i = 0; i != NumElems; ++i) {
     SDValue Op = N->getOperand(i);
-    // Ignore undef inputs.
-    if (Op.isUndef()) continue;
 
-    // See if we can combine this build_vector into a blend with a zero vector.
+    if (Op.isUndef()) {
+      VectorMask.push_back(-1);
+      continue;
+    }
+
+    // See if we can combine this into a blend with a zero vector.
     if (!VecIn2.getNode() && (isNullConstant(Op) || isNullFPConstant(Op))) {
       UsesZeroVector = true;
+      VectorMask.push_back(0);
       continue;
     }
 
-    // If this input is something other than a EXTRACT_VECTOR_ELT with a
-    // constant index, bail out.
+    // Not an undef or zero. If the input is something other than an
+    // EXTRACT_VECTOR_ELT with a constant index, bail out.
     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
-        !isa<ConstantSDNode>(Op.getOperand(1))) {
-      VecIn1 = VecIn2 = SDValue(nullptr, 0);
-      break;
-    }
+        !isa<ConstantSDNode>(Op.getOperand(1)))
+      return SDValue();
 
-    // We allow up to two distinct input vectors.
+    // We only allow up to two distinct input vectors.
     SDValue ExtractedFromVec = Op.getOperand(0);
-    if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
+    if (ExtractedFromVec == VecIn1) {
+      VectorMask.push_back(1);
+      continue;
+    }
+    if (ExtractedFromVec == VecIn2) {
+      VectorMask.push_back(2);
       continue;
+    }
 
     if (!VecIn1.getNode()) {
       VecIn1 = ExtractedFromVec;
+      VectorMask.push_back(1);
     } else if (!VecIn2.getNode() && !UsesZeroVector) {
       VecIn2 = ExtractedFromVec;
+      VectorMask.push_back(2);
     } else {
-      // Too many inputs.
-      VecIn1 = VecIn2 = SDValue(nullptr, 0);
-      break;
+      return SDValue();
     }
   }
 
-  // If everything is good, we can make a shuffle operation.
-  if (VecIn1.getNode()) {
-    unsigned InNumElements = VecIn1.getValueType().getVectorNumElements();
-    SmallVector<int, 8> Mask;
-    for (unsigned i = 0; i != NumInScalars; ++i) {
-      unsigned Opcode = N->getOperand(i).getOpcode();
-      if (Opcode == ISD::UNDEF) {
-        Mask.push_back(-1);
-        continue;
-      }
+  // If we didn't find at least one input vector, bail out.
+  if (!VecIn1.getNode())
+    return SDValue();
 
-      // Operands can also be zero.
-      if (Opcode != ISD::EXTRACT_VECTOR_ELT) {
-        assert(UsesZeroVector &&
-               (Opcode == ISD::Constant || Opcode == ISD::ConstantFP) &&
-               "Unexpected node found!");
-        Mask.push_back(NumInScalars+i);
-        continue;
-      }
+  EVT InVT1 = VecIn1.getValueType();
+  EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
+  unsigned Vec2Offset = InVT1.getVectorNumElements();
+
+  // We can't generate a shuffle node with mismatched input and output types.
+  // Try to make the types match.
+  if (InVT1 != VT || InVT2 != VT) {
+    // Both inputs and the output must have the same base element type.
+    EVT ElemType = VT.getVectorElementType();
+    if (ElemType != InVT1.getVectorElementType() ||
+        ElemType != InVT2.getVectorElementType())
+      return SDValue();
 
-      // If extracting from the first vector, just use the index directly.
-      SDValue Extract = N->getOperand(i);
-      SDValue ExtVal = Extract.getOperand(1);
-      unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
-      if (Extract.getOperand(0) == VecIn1) {
-        Mask.push_back(ExtIndex);
-        continue;
-      }
+    // The element types match, now figure out the lengths.
+    if (InVT1.getSizeInBits() * 2 == VT.getSizeInBits() && InVT1 == InVT2) {
+      // If both input vectors are exactly half the size of the output, concat
+      // them. If we have only one (non-zero) input, concat it with undef.
+      VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1,
+                           getRightHandValue(DAG, dl, VecIn2, InVT1, false));
+      VecIn2 = SDValue();
+      // If we have one "real" input and are blending with zero, we need the
+      // zero elements to come from the second input, not the undef part of the
+      // first input.
+      if (UsesZeroVector)
+        Vec2Offset = NumElems;
+    } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
+      // If we only have one input vector, and it's twice the size of the
+      // output, split it in two.
+      if (!TLI.isExtractSubvectorCheap(VT, NumElems))
+        return SDValue();
 
-      // Otherwise, use InIdx + InputVecSize
-      Mask.push_back(InNumElements + ExtIndex);
-    }
+      // TODO: Support the case where we have one input that's too wide, and
+      // another input which is wide/"correct"/narrow. We can do this by
+      // widening the narrow input, shuffling the wide vectors, and then
+      // extracting the low subvector.
+      if (UsesZeroVector || VecIn2.getNode())
+        return SDValue();
 
-    // Avoid introducing illegal shuffles with zero.
-    if (UsesZeroVector && !TLI.isVectorClearMaskLegal(Mask, VT))
+      MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+      VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
+                           DAG.getConstant(NumElems, dl, IdxTy));
+      VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
+                           DAG.getConstant(0, dl, IdxTy));
+      // Since we now have shorter input vectors, adjust the offset of the
+      // second vector's start.
+      Vec2Offset = NumElems;
+    } else {
+      // TODO: Support cases where the length mismatch isn't exactly by a
+      // factor of 2.
       return SDValue();
+    }
+  }
 
-    // We can't generate a shuffle node with mismatched input and output types.
-    // Attempt to transform a single input vector to the correct type.
-    if ((VT != VecIn1.getValueType())) {
-      // If the input vector type has a different base type to the output
-      // vector type, bail out.
-      EVT VTElemType = VT.getVectorElementType();
-      if ((VecIn1.getValueType().getVectorElementType() != VTElemType) ||
-          (VecIn2.getNode() &&
-           (VecIn2.getValueType().getVectorElementType() != VTElemType)))
-        return SDValue();
-
-      // If the input vector is too small, widen it.
-      // We only support widening of vectors which are half the size of the
-      // output registers. For example XMM->YMM widening on X86 with AVX.
-      EVT VecInT = VecIn1.getValueType();
-      if (VecInT.getSizeInBits() * 2 == VT.getSizeInBits()) {
-        // If we only have one small input, widen it by adding undef values.
-        if (!VecIn2.getNode())
-          VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1,
-                               DAG.getUNDEF(VecIn1.getValueType()));
-        else if (VecIn1.getValueType() == VecIn2.getValueType()) {
-          // If we have two small inputs of the same type, try to concat them.
-          VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, VecIn2);
-          VecIn2 = SDValue(nullptr, 0);
-        } else
-          return SDValue();
-      } else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) {
-        // If the input vector is too large, try to split it.
-        // We don't support having two input vectors that are too large.
-        // If the zero vector was used, we can not split the vector,
-        // since we'd need 3 inputs.
-        if (UsesZeroVector || VecIn2.getNode())
-          return SDValue();
+  SmallVector<int, 8> Mask;
 
-        if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements()))
-          return SDValue();
+  for (unsigned i = 0; i != NumElems; ++i) {
+    if (VectorMask[i] == -1) {
+      Mask.push_back(-1);
+      continue;
+    }
 
-        // Try to replace VecIn1 with two extract_subvectors
-        // No need to update the masks, they should still be correct.
-        VecIn2 = DAG.getNode(
-            ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
-            DAG.getConstant(VT.getVectorNumElements(), dl,
-                            TLI.getVectorIdxTy(DAG.getDataLayout())));
-        VecIn1 = DAG.getNode(
-            ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
-            DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
-      } else
-        return SDValue();
+    // If we are trying to blend with zero, we need to take a zero from the
+    // correct position in the second input.
+    if (VectorMask[i] == 0) {
+      Mask.push_back(Vec2Offset + i);
+      continue;
     }
 
-    if (UsesZeroVector)
-      VecIn2 = VT.isInteger() ? DAG.getConstant(0, dl, VT) :
-                                DAG.getConstantFP(0.0, dl, VT);
-    else
-      // If VecIn2 is unused then change it to undef.
-      VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+    SDValue Extract = N->getOperand(i);
+    unsigned ExtIndex =
+        cast<ConstantSDNode>(Extract.getOperand(1))->getZExtValue();
 
-    // Check that we were able to transform all incoming values to the same
-    // type.
-    if (VecIn2.getValueType() != VecIn1.getValueType() ||
-        VecIn1.getValueType() != VT)
-          return SDValue();
+    if (VectorMask[i] == 1) {
+      Mask.push_back(ExtIndex);
+      continue;
+    }
 
-    // Return the new VECTOR_SHUFFLE node.
-    SDValue Ops[2];
-    Ops[0] = VecIn1;
-    Ops[1] = VecIn2;
-    return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], Mask);
+    assert(VectorMask[i] == 2 && "Expected input to be from second vector");
+    Mask.push_back(Vec2Offset + ExtIndex);
   }
 
+  // Avoid introducing illegal shuffles with zero.
+  // TODO: This doesn't actually do anything smart at the moment.
+  // We should either delete this, or check legality for all the shuffles
+  // we create.
+  if (UsesZeroVector && !TLI.isVectorClearMaskLegal(Mask, VT))
+    return SDValue();
+
+  // If we already have a VecIn2, it should have the same type as VecIn1.
+  // If we don't, get an undef/zero vector of the appropriate type.
+  VecIn2 =
+      getRightHandValue(DAG, dl, VecIn2, VecIn1.getValueType(), UsesZeroVector);
+  assert(VecIn1.getValueType() == VecIn2.getValueType() &&
+         "Unexpected second input type.");
+
+  // Return the new VECTOR_SHUFFLE node.
+  SDValue Ops[2];
+  Ops[0] = VecIn1;
+  Ops[1] = VecIn2;
+  return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], Mask);
+}
+
+SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
+  EVT VT = N->getValueType(0);
+
+  // A vector built entirely of undefs is undef.
+  if (ISD::allOperandsUndef(N))
+    return DAG.getUNDEF(VT);
+
+  if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
+    return V;
+
+  if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
+    return V;
+
+  if (SDValue V = reduceBuildVecToShuffle(N))
+    return V;
+
   return SDValue();
 }