summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp77
1 files changed, 54 insertions, 23 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4d090529784..17f925bd3e8 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -9863,14 +9863,48 @@ static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
}
}
-/// \brief Tiny helper function to test whether a shuffle mask could be
+/// \brief Helper function to test whether a shuffle mask could be
/// simplified by widening the elements being shuffled.
-static bool canWidenShuffleElements(ArrayRef<int> Mask) {
- for (int i = 0, Size = Mask.size(); i < Size; i += 2)
- if ((Mask[i] != -1 && Mask[i] % 2 != 0) ||
- (Mask[i + 1] != -1 && (Mask[i + 1] % 2 != 1 ||
- (Mask[i] != -1 && Mask[i] + 1 != Mask[i + 1]))))
- return false;
+///
+/// Appends the mask for wider elements in WidenedMask if valid. Otherwise
+/// leaves it in an unspecified state.
+///
+/// NOTE: This must handle normal vector shuffle masks and *target* vector
+/// shuffle masks. The latter have the special property of a '-2' representing
+/// a zero-ed lane of a vector.
+static bool canWidenShuffleElements(ArrayRef<int> Mask,
+ SmallVectorImpl<int> &WidenedMask) {
+ for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
+ // Check for any of the sentinel values (negative) and if they are the same,
+ // we can widen to that.
+ if (Mask[i] < 0 && Mask[i] == Mask[i + 1]) {
+ WidenedMask.push_back(Mask[i]);
+ continue;
+ }
+
+ // Check for an undef mask and a mask value properly aligned to fit with
+ // a pair of values. If we find such a case, use the non-undef mask's value.
+ if (Mask[i] == -1 && Mask[i + 1] % 2 == 1) {
+ WidenedMask.push_back(Mask[i + 1] / 2);
+ continue;
+ }
+ if (Mask[i + 1] == -1 && Mask[i] % 2 == 0) {
+ WidenedMask.push_back(Mask[i] / 2);
+ continue;
+ }
+
+ // Finally check if the two mask values are adjacent and aligned with
+ // a pair.
+ if (Mask[i] != -1 && Mask[i] % 2 == 0 && Mask[i] + 1 == Mask[i + 1]) {
+ WidenedMask.push_back(Mask[i] / 2);
+ continue;
+ }
+
+ // Otherwise we can't safely widen the elements used in this shuffle.
+ return false;
+ }
+ assert(WidenedMask.size() == Mask.size() / 2 &&
+ "Incorrect size of mask after widening the elements!");
return true;
}
@@ -9922,20 +9956,16 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
// lanes but wider integers. We cap this to not form integers larger than i64
// but it might be interesting to form i128 integers to handle flipping the
// low and high halves of AVX 256-bit vectors.
+ SmallVector<int, 16> WidenedMask;
if (VT.isInteger() && VT.getScalarSizeInBits() < 64 &&
- canWidenShuffleElements(Mask)) {
- SmallVector<int, 8> NewMask;
- for (int i = 0, Size = Mask.size(); i < Size; i += 2)
- NewMask.push_back(Mask[i] != -1
- ? Mask[i] / 2
- : (Mask[i + 1] != -1 ? Mask[i + 1] / 2 : -1));
+ canWidenShuffleElements(Mask, WidenedMask)) {
MVT NewVT =
MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits() * 2),
VT.getVectorNumElements() / 2);
V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, V1);
V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, V2);
return DAG.getNode(ISD::BITCAST, dl, VT,
- DAG.getVectorShuffle(NewVT, dl, V1, V2, NewMask));
+ DAG.getVectorShuffle(NewVT, dl, V1, V2, WidenedMask));
}
int NumV1Elements = 0, NumUndefElements = 0, NumV2Elements = 0;
@@ -20697,10 +20727,10 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
// elements, and shrink them to the half-width mask. It does this in a loop
// so it will reduce the size of the mask to the minimal width mask which
// performs an equivalent shuffle.
- while (Mask.size() > 1 && canWidenShuffleElements(Mask)) {
- for (int i = 0, e = Mask.size() / 2; i < e; ++i)
- Mask[i] = Mask[2 * i] / 2;
- Mask.resize(Mask.size() / 2);
+ SmallVector<int, 16> WidenedMask;
+ while (Mask.size() > 1 && canWidenShuffleElements(Mask, WidenedMask)) {
+ Mask = std::move(WidenedMask);
+ WidenedMask.clear();
}
return combineX86ShuffleChain(Op, Root, Mask, Depth, HasPSHUFB, DAG, DCI,
@@ -20971,12 +21001,13 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
return SDValue(); // We combined away this shuffle, so we're done.
// See if this reduces to a PSHUFD which is no more expensive and can
- // combine with more operations.
- if (canWidenShuffleElements(Mask)) {
- int DMask[] = {-1, -1, -1, -1};
+ // combine with more operations. Note that it has to at least flip the
+ // dwords as otherwise it would have been removed as a no-op.
+ if (Mask[0] == 2 && Mask[1] == 3 && Mask[2] == 0 && Mask[3]) {
+ int DMask[] = {0, 1, 2, 3};
int DOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 2;
- DMask[DOffset + 0] = DOffset + Mask[0] / 2;
- DMask[DOffset + 1] = DOffset + Mask[2] / 2;
+ DMask[DOffset + 0] = DOffset + 1;
+ DMask[DOffset + 1] = DOffset + 0;
V = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V);
DCI.AddToWorklist(V.getNode());
V = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V,
OpenPOWER on IntegriCloud