diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 77 | 
1 files changed, 54 insertions, 23 deletions
| diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 4d090529784..17f925bd3e8 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -9863,14 +9863,48 @@ static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,    }  } -/// \brief Tiny helper function to test whether a shuffle mask could be +/// \brief Helper function to test whether a shuffle mask could be  /// simplified by widening the elements being shuffled. -static bool canWidenShuffleElements(ArrayRef<int> Mask) { -  for (int i = 0, Size = Mask.size(); i < Size; i += 2) -    if ((Mask[i] != -1 && Mask[i] % 2 != 0) || -        (Mask[i + 1] != -1 && (Mask[i + 1] % 2 != 1 || -                               (Mask[i] != -1 && Mask[i] + 1 != Mask[i + 1])))) -      return false; +/// +/// Appends the mask for wider elements in WidenedMask if valid. Otherwise +/// leaves it in an unspecified state. +/// +/// NOTE: This must handle normal vector shuffle masks and *target* vector +/// shuffle masks. The latter have the special property of a '-2' representing +/// a zero-ed lane of a vector. +static bool canWidenShuffleElements(ArrayRef<int> Mask, +                                    SmallVectorImpl<int> &WidenedMask) { +  for (int i = 0, Size = Mask.size(); i < Size; i += 2) { +    // Check for any of the sentinel values (negative) and if they are the same, +    // we can widen to that. +    if (Mask[i] < 0 && Mask[i] == Mask[i + 1]) { +      WidenedMask.push_back(Mask[i]); +      continue; +    } + +    // Check for an undef mask and a mask value properly aligned to fit with +    // a pair of values. If we find such a case, use the non-undef mask's value. +    if (Mask[i] == -1 && Mask[i + 1] % 2 == 1) { +      WidenedMask.push_back(Mask[i + 1] / 2); +      continue; +    } +    if (Mask[i + 1] == -1 && Mask[i] % 2 == 0) { +      WidenedMask.push_back(Mask[i] / 2); +      continue; +    } + +    // Finally check if the two mask values are adjacent and aligned with +    // a pair. +    if (Mask[i] != -1 && Mask[i] % 2 == 0 && Mask[i] + 1 == Mask[i + 1]) { +      WidenedMask.push_back(Mask[i] / 2); +      continue; +    } + +    // Otherwise we can't safely widen the elements used in this shuffle. +    return false; +  } +  assert(WidenedMask.size() == Mask.size() / 2 && +         "Incorrect size of mask after widening the elements!");    return true;  } @@ -9922,20 +9956,16 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,    // lanes but wider integers. We cap this to not form integers larger than i64    // but it might be interesting to form i128 integers to handle flipping the    // low and high halves of AVX 256-bit vectors. +  SmallVector<int, 16> WidenedMask;    if (VT.isInteger() && VT.getScalarSizeInBits() < 64 && -      canWidenShuffleElements(Mask)) { -    SmallVector<int, 8> NewMask; -    for (int i = 0, Size = Mask.size(); i < Size; i += 2) -      NewMask.push_back(Mask[i] != -1 -                            ? Mask[i] / 2 -                            : (Mask[i + 1] != -1 ? Mask[i + 1] / 2 : -1)); +      canWidenShuffleElements(Mask, WidenedMask)) {      MVT NewVT =          MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits() * 2),                           VT.getVectorNumElements() / 2);      V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, V1);      V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, V2);      return DAG.getNode(ISD::BITCAST, dl, VT, -                       DAG.getVectorShuffle(NewVT, dl, V1, V2, NewMask)); +                       DAG.getVectorShuffle(NewVT, dl, V1, V2, WidenedMask));    }    int NumV1Elements = 0, NumUndefElements = 0, NumV2Elements = 0; @@ -20697,10 +20727,10 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,    // elements, and shrink them to the half-width mask. It does this in a loop    // so it will reduce the size of the mask to the minimal width mask which    // performs an equivalent shuffle. -  while (Mask.size() > 1 && canWidenShuffleElements(Mask)) { -    for (int i = 0, e = Mask.size() / 2; i < e; ++i) -      Mask[i] = Mask[2 * i] / 2; -    Mask.resize(Mask.size() / 2); +  SmallVector<int, 16> WidenedMask; +  while (Mask.size() > 1 && canWidenShuffleElements(Mask, WidenedMask)) { +    Mask = std::move(WidenedMask); +    WidenedMask.clear();    }    return combineX86ShuffleChain(Op, Root, Mask, Depth, HasPSHUFB, DAG, DCI, @@ -20971,12 +21001,13 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,        return SDValue(); // We combined away this shuffle, so we're done.      // See if this reduces to a PSHUFD which is no more expensive and can -    // combine with more operations. -    if (canWidenShuffleElements(Mask)) { -      int DMask[] = {-1, -1, -1, -1}; +    // combine with more operations. Note that it has to at least flip the +    // dwords as otherwise it would have been removed as a no-op. +    if (Mask[0] == 2 && Mask[1] == 3 && Mask[2] == 0 && Mask[3]) { +      int DMask[] = {0, 1, 2, 3};        int DOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 2; -      DMask[DOffset + 0] = DOffset + Mask[0] / 2; -      DMask[DOffset + 1] = DOffset + Mask[2] / 2; +      DMask[DOffset + 0] = DOffset + 1; +      DMask[DOffset + 1] = DOffset + 0;        V = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V);        DCI.AddToWorklist(V.getNode());        V = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V, | 

