diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 77 |
1 files changed, 54 insertions, 23 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 4d090529784..17f925bd3e8 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -9863,14 +9863,48 @@ static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, } } -/// \brief Tiny helper function to test whether a shuffle mask could be +/// \brief Helper function to test whether a shuffle mask could be /// simplified by widening the elements being shuffled. -static bool canWidenShuffleElements(ArrayRef<int> Mask) { - for (int i = 0, Size = Mask.size(); i < Size; i += 2) - if ((Mask[i] != -1 && Mask[i] % 2 != 0) || - (Mask[i + 1] != -1 && (Mask[i + 1] % 2 != 1 || - (Mask[i] != -1 && Mask[i] + 1 != Mask[i + 1])))) - return false; +/// +/// Appends the mask for wider elements in WidenedMask if valid. Otherwise +/// leaves it in an unspecified state. +/// +/// NOTE: This must handle normal vector shuffle masks and *target* vector +/// shuffle masks. The latter have the special property of a '-2' representing +/// a zero-ed lane of a vector. +static bool canWidenShuffleElements(ArrayRef<int> Mask, + SmallVectorImpl<int> &WidenedMask) { + for (int i = 0, Size = Mask.size(); i < Size; i += 2) { + // Check for any of the sentinel values (negative) and if they are the same, + // we can widen to that. + if (Mask[i] < 0 && Mask[i] == Mask[i + 1]) { + WidenedMask.push_back(Mask[i]); + continue; + } + + // Check for an undef mask and a mask value properly aligned to fit with + // a pair of values. If we find such a case, use the non-undef mask's value. + if (Mask[i] == -1 && Mask[i + 1] % 2 == 1) { + WidenedMask.push_back(Mask[i + 1] / 2); + continue; + } + if (Mask[i + 1] == -1 && Mask[i] % 2 == 0) { + WidenedMask.push_back(Mask[i] / 2); + continue; + } + + // Finally check if the two mask values are adjacent and aligned with + // a pair. + if (Mask[i] != -1 && Mask[i] % 2 == 0 && Mask[i] + 1 == Mask[i + 1]) { + WidenedMask.push_back(Mask[i] / 2); + continue; + } + + // Otherwise we can't safely widen the elements used in this shuffle. + return false; + } + assert(WidenedMask.size() == Mask.size() / 2 && + "Incorrect size of mask after widening the elements!"); return true; } @@ -9922,20 +9956,16 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget, // lanes but wider integers. We cap this to not form integers larger than i64 // but it might be interesting to form i128 integers to handle flipping the // low and high halves of AVX 256-bit vectors. + SmallVector<int, 16> WidenedMask; if (VT.isInteger() && VT.getScalarSizeInBits() < 64 && - canWidenShuffleElements(Mask)) { - SmallVector<int, 8> NewMask; - for (int i = 0, Size = Mask.size(); i < Size; i += 2) - NewMask.push_back(Mask[i] != -1 - ? Mask[i] / 2 - : (Mask[i + 1] != -1 ? Mask[i + 1] / 2 : -1)); + canWidenShuffleElements(Mask, WidenedMask)) { MVT NewVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits() * 2), VT.getVectorNumElements() / 2); V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, V1); V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, V2); return DAG.getNode(ISD::BITCAST, dl, VT, - DAG.getVectorShuffle(NewVT, dl, V1, V2, NewMask)); + DAG.getVectorShuffle(NewVT, dl, V1, V2, WidenedMask)); } int NumV1Elements = 0, NumUndefElements = 0, NumV2Elements = 0; @@ -20697,10 +20727,10 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root, // elements, and shrink them to the half-width mask. It does this in a loop // so it will reduce the size of the mask to the minimal width mask which // performs an equivalent shuffle. - while (Mask.size() > 1 && canWidenShuffleElements(Mask)) { - for (int i = 0, e = Mask.size() / 2; i < e; ++i) - Mask[i] = Mask[2 * i] / 2; - Mask.resize(Mask.size() / 2); + SmallVector<int, 16> WidenedMask; + while (Mask.size() > 1 && canWidenShuffleElements(Mask, WidenedMask)) { + Mask = std::move(WidenedMask); + WidenedMask.clear(); } return combineX86ShuffleChain(Op, Root, Mask, Depth, HasPSHUFB, DAG, DCI, @@ -20971,12 +21001,13 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, return SDValue(); // We combined away this shuffle, so we're done. // See if this reduces to a PSHUFD which is no more expensive and can - // combine with more operations. - if (canWidenShuffleElements(Mask)) { - int DMask[] = {-1, -1, -1, -1}; + // combine with more operations. Note that it has to at least flip the + // dwords as otherwise it would have been removed as a no-op. + if (Mask[0] == 2 && Mask[1] == 3 && Mask[2] == 0 && Mask[3]) { + int DMask[] = {0, 1, 2, 3}; int DOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 2; - DMask[DOffset + 0] = DOffset + Mask[0] / 2; - DMask[DOffset + 1] = DOffset + Mask[2] / 2; + DMask[DOffset + 0] = DOffset + 1; + DMask[DOffset + 1] = DOffset + 0; V = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V); DCI.AddToWorklist(V.getNode()); V = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V, |

