diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 58 | 
1 files changed, 58 insertions, 0 deletions
| diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c7b4fa5ec6a..3ffd6666f55 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -4332,6 +4332,17 @@ static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,    return true;  } +/// Return true if every element in Mask, beginning +/// from position Pos and ending in Pos+Size, falls within the specified +/// sequential range (Low, Low+Size], or is undef or is zero. +static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos, +                                             unsigned Size, int Low) { +  for (unsigned i = Pos, e = Pos + Size; i != e; ++i, ++Low) +    if (!isUndefOrZero(Mask[i]) && Mask[i] != Low) +      return false; +  return true; +} +  /// Return true if the specified EXTRACT_SUBVECTOR operand specifies a vector  /// extract that is suitable for instruction that extract 128 or 256 bit vectors  static bool isVEXTRACTIndex(SDNode *N, unsigned vecWidth) { @@ -23666,6 +23677,53 @@ static bool combineX86ShuffleChain(SDValue Input, SDValue Root,      return true;    } +  // Attempt to blend with zero. +  if (VT.getVectorNumElements() <= 8 && +      ((Subtarget.hasSSE41() && VT.is128BitVector()) || +       (Subtarget.hasAVX() && VT.is256BitVector()))) { +    // Convert VT to a type compatible with X86ISD::BLENDI. +    // TODO - add 16i16 support (requires lane duplication). +    MVT ShuffleVT = VT; +    if (Subtarget.hasAVX2()) { +      if (VT == MVT::v4i64) +        ShuffleVT = MVT::v8i32; +      else if (VT == MVT::v2i64) +        ShuffleVT = MVT::v4i32; +    } else { +      if (VT == MVT::v2i64 || VT == MVT::v4i32) +        ShuffleVT = MVT::v8i16; +      else if (VT == MVT::v4i64) +        ShuffleVT = MVT::v4f64; +      else if (VT == MVT::v8i32) +        ShuffleVT = MVT::v8f32; +    } + +    if (isSequentialOrUndefOrZeroInRange(Mask, /*Pos*/ 0, /*Size*/ Mask.size(), +                                         /*Low*/ 0) && +        Mask.size() <= ShuffleVT.getVectorNumElements()) { +      unsigned BlendMask = 0; +      unsigned ShuffleSize = ShuffleVT.getVectorNumElements(); +      unsigned MaskRatio = ShuffleSize / Mask.size(); + +      for (unsigned i = 0; i != ShuffleSize; ++i) +        if (Mask[i / MaskRatio] < 0) +          BlendMask |= 1u << i; + +      if (Root.getOpcode() != X86ISD::BLENDI || +          Root->getConstantOperandVal(2) != BlendMask) { +        SDValue Zero = getZeroVector(ShuffleVT, Subtarget, DAG, DL); +        Res = DAG.getBitcast(ShuffleVT, Input); +        DCI.AddToWorklist(Res.getNode()); +        Res = DAG.getNode(X86ISD::BLENDI, DL, ShuffleVT, Res, Zero, +                          DAG.getConstant(BlendMask, DL, MVT::i8)); +        DCI.AddToWorklist(Res.getNode()); +        DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), +                      /*AddTo*/ true); +        return true; +      } +    } +  } +    // Don't try to re-form single instruction chains under any circumstances now    // that we've done encoding canonicalization for them.    if (Depth < 2) | 

