diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 152 | 
1 files changed, 82 insertions, 70 deletions
| diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 74a8fb189ac..93da540cd6e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -8207,6 +8207,24 @@ static bool isTargetShuffleEquivalent(ArrayRef<int> Mask,    return true;  } +// Merges a general DAG shuffle mask and zeroable bit mask into a target shuffle +// mask. +static SmallVector<int, 64> createTargetShuffleMask(ArrayRef<int> Mask, +                                                    const APInt &Zeroable) { +  int NumElts = Mask.size(); +  assert(NumElts == Zeroable.getBitWidth() && "Mismatch mask sizes"); + +  SmallVector<int, 64> TargetMask(NumElts, SM_SentinelUndef); +  for (unsigned i = 0; i != NumElts; ++i) { +    int M = Mask[i]; +    if (M == SM_SentinelUndef) +      continue; +    assert(0 <= M && M < (2 * NumElts) && "Out of range shuffle index"); +    TargetMask[i] = (Zeroable[i] ? SM_SentinelZero : M); +  } +  return TargetMask; +} +  // Check if the shuffle mask is suitable for the AVX vpunpcklwd or vpunpckhwd  // instructions.  static bool isUnpackWdShuffleMask(ArrayRef<int> Mask, MVT VT) { @@ -8626,29 +8644,25 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,                                      const X86Subtarget &Subtarget,                                      SelectionDAG &DAG); -/// \brief Try to emit a blend instruction for a shuffle. -/// -/// This doesn't do any checks for the availability of instructions for blending -/// these values. It relies on the availability of the X86ISD::BLENDI pattern to -/// be matched in the backend with the type given. What it does check for is -/// that the shuffle mask is a blend, or convertible into a blend with zero. -static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, -                                         SDValue V2, ArrayRef<int> Original, -                                         const APInt &Zeroable, -                                         const X86Subtarget &Subtarget, -                                         SelectionDAG &DAG) { -  bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode()); -  bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode()); -  SmallVector<int, 8> Mask(Original.begin(), Original.end()); -  bool ForceV1Zero = false, ForceV2Zero = false; +static bool matchVectorShuffleAsBlend(SDValue V1, SDValue V2, +                                      MutableArrayRef<int> TargetMask, +                                      bool &ForceV1Zero, bool &ForceV2Zero, +                                      uint64_t &BlendMask) { +  bool V1IsZeroOrUndef = +      V1.isUndef() || ISD::isBuildVectorAllZeros(V1.getNode()); +  bool V2IsZeroOrUndef = +      V2.isUndef() || ISD::isBuildVectorAllZeros(V2.getNode()); + +  BlendMask = 0; +  ForceV1Zero = false, ForceV2Zero = false; +  assert(TargetMask.size() <= 64 && "Shuffle mask too big for blend mask");    // Attempt to generate the binary blend mask. If an input is zero then    // we can use any lane.    // TODO: generalize the zero matching to any scalar like isShuffleEquivalent. -  uint64_t BlendMask = 0; -  for (int i = 0, Size = Mask.size(); i < Size; ++i) { -    int M = Mask[i]; -    if (M < 0) +  for (int i = 0, Size = TargetMask.size(); i < Size; ++i) { +    int M = TargetMask[i]; +    if (M == SM_SentinelUndef)        continue;      if (M == i)        continue; @@ -8656,21 +8670,50 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,        BlendMask |= 1ull << i;        continue;      } -    if (Zeroable[i]) { -      if (V1IsZero) { +    if (M == SM_SentinelZero) { +      if (V1IsZeroOrUndef) {          ForceV1Zero = true; -        Mask[i] = i; +        TargetMask[i] = i;          continue;        } -      if (V2IsZero) { +      if (V2IsZeroOrUndef) {          ForceV2Zero = true;          BlendMask |= 1ull << i; -        Mask[i] = i + Size; +        TargetMask[i] = i + Size;          continue;        }      } -    return SDValue(); // Shuffled input! +    return false;    } +  return true; +} + +uint64_t scaleVectorShuffleBlendMask(uint64_t BlendMask, int Size, int Scale) { +  uint64_t ScaledMask = 0; +  for (int i = 0; i != Size; ++i) +    if (BlendMask & (1ull << i)) +      ScaledMask |= ((1ull << Scale) - 1) << (i * Scale); +  return ScaledMask; +}; + +/// \brief Try to emit a blend instruction for a shuffle. +/// +/// This doesn't do any checks for the availability of instructions for blending +/// these values. It relies on the availability of the X86ISD::BLENDI pattern to +/// be matched in the backend with the type given. What it does check for is +/// that the shuffle mask is a blend, or convertible into a blend with zero. +static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, +                                         SDValue V2, ArrayRef<int> Original, +                                         const APInt &Zeroable, +                                         const X86Subtarget &Subtarget, +                                         SelectionDAG &DAG) { +  SmallVector<int, 64> Mask = createTargetShuffleMask(Original, Zeroable); + +  uint64_t BlendMask = 0; +  bool ForceV1Zero = false, ForceV2Zero = false; +  if (!matchVectorShuffleAsBlend(V1, V2, Mask, ForceV1Zero, ForceV2Zero, +                                 BlendMask)) +    return SDValue();    // Create a REAL zero vector - ISD::isBuildVectorAllZeros allows UNDEFs.    if (ForceV1Zero) @@ -8678,14 +8721,6 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,    if (ForceV2Zero)      V2 = getZeroVector(VT, Subtarget, DAG, DL); -  auto ScaleBlendMask = [](uint64_t BlendMask, int Size, int Scale) { -    uint64_t ScaledMask = 0; -    for (int i = 0; i != Size; ++i) -      if (BlendMask & (1ull << i)) -        ScaledMask |= ((1ull << Scale) - 1) << (i * Scale); -    return ScaledMask; -  }; -    switch (VT.SimpleTy) {    case MVT::v2f64:    case MVT::v4f32: @@ -8705,7 +8740,7 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,      if (Subtarget.hasAVX2()) {        // Scale the blend by the number of 32-bit dwords per element.        int Scale =  VT.getScalarSizeInBits() / 32; -      BlendMask = ScaleBlendMask(BlendMask, Mask.size(), Scale); +      BlendMask = scaleVectorShuffleBlendMask(BlendMask, Mask.size(), Scale);        MVT BlendVT = VT.getSizeInBits() > 128 ? MVT::v8i32 : MVT::v4i32;        V1 = DAG.getBitcast(BlendVT, V1);        V2 = DAG.getBitcast(BlendVT, V2); @@ -8718,7 +8753,7 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,      // For integer shuffles we need to expand the mask and cast the inputs to      // v8i16s prior to blending.      int Scale = 8 / VT.getVectorNumElements(); -    BlendMask = ScaleBlendMask(BlendMask, Mask.size(), Scale); +    BlendMask = scaleVectorShuffleBlendMask(BlendMask, Mask.size(), Scale);      V1 = DAG.getBitcast(MVT::v8i16, V1);      V2 = DAG.getBitcast(MVT::v8i16, V2);      return DAG.getBitcast(VT, @@ -27124,44 +27159,21 @@ static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,          BlendVT = MVT::v8f32;      } -    unsigned BlendSize = BlendVT.getVectorNumElements(); -    unsigned MaskRatio = BlendSize / NumMaskElts; - -    // Can we blend with zero? -    if (isSequentialOrUndefOrZeroInRange(Mask, /*Pos*/ 0, /*Size*/ NumMaskElts, -                                         /*Low*/ 0) && -        NumMaskElts <= BlendVT.getVectorNumElements()) { -      PermuteImm = 0; -      for (unsigned i = 0; i != BlendSize; ++i) -        if (Mask[i / MaskRatio] < 0) -          PermuteImm |= 1u << i; - -      V2 = getZeroVector(BlendVT, Subtarget, DAG, DL); -      Shuffle = X86ISD::BLENDI; -      ShuffleVT = BlendVT; -      return true; -    } - -    // Attempt to match as a binary blend.      if (NumMaskElts <= BlendVT.getVectorNumElements()) { -      bool MatchBlend = true; -      for (int i = 0; i != (int)NumMaskElts; ++i) { -        int M = Mask[i]; -        if (M == SM_SentinelUndef) -          continue; -        if ((M == SM_SentinelZero) || -            ((M != i) && (M != (i + (int)NumMaskElts)))) { -          MatchBlend = false; -          break; +      uint64_t BlendMask = 0; +      bool ForceV1Zero = false, ForceV2Zero = false; +      SmallVector<int, 8> TargetMask(Mask.begin(), Mask.end()); +      if (matchVectorShuffleAsBlend(V1, V2, TargetMask, ForceV1Zero, +                                    ForceV2Zero, BlendMask)) { +        if (NumMaskElts < BlendVT.getVectorNumElements()) { +          int Scale = BlendVT.getVectorNumElements() / NumMaskElts; +          BlendMask = +              scaleVectorShuffleBlendMask(BlendMask, NumMaskElts, Scale);          } -      } - -      if (MatchBlend) { -        PermuteImm = 0; -        for (unsigned i = 0; i != BlendSize; ++i) -          if ((int)NumMaskElts <= Mask[i / MaskRatio]) -            PermuteImm |= 1u << i; +        V1 = ForceV1Zero ? getZeroVector(BlendVT, Subtarget, DAG, DL) : V1; +        V2 = ForceV2Zero ? getZeroVector(BlendVT, Subtarget, DAG, DL) : V2; +        PermuteImm = (unsigned)BlendMask;          Shuffle = X86ISD::BLENDI;          ShuffleVT = BlendVT;          return true; | 

