summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp101
1 files changed, 66 insertions, 35 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1d92d7b3392..f3b4c70b712 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -7170,6 +7170,59 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
return Zeroable;
}
+/// Mutate a shuffle mask, replacing zeroable elements with SM_SentinelZero.
+static void computeZeroableShuffleMask(MutableArrayRef<int> Mask,
+ SDValue V1, SDValue V2) {
+ SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+ for (int i = 0, Size = Mask.size(); i < Size; ++i) {
+ if (Mask[i] != SM_SentinelUndef && Zeroable[i])
+ Mask[i] = SM_SentinelZero;
+ }
+}
+
+/// Try to lower a shuffle with a single PSHUFB of V1.
+/// This is only possible if V2 is unused (at all, or only for zero elements).
+static SDValue lowerVectorShuffleWithPSHUFB(SDLoc DL, MVT VT,
+ ArrayRef<int> Mask, SDValue V1,
+ SDValue V2,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ const int NumBytes = VT.is128BitVector() ? 16 : 32;
+ const int NumEltBytes = VT.getScalarSizeInBits() / 8;
+
+ assert((Subtarget.hasSSSE3() && VT.is128BitVector()) ||
+ (Subtarget.hasAVX2() && VT.is256BitVector()));
+
+ SmallVector<int, 32> ZeroableMask(Mask.begin(), Mask.end());
+ computeZeroableShuffleMask(ZeroableMask, V1, V2);
+
+ if (!isSingleInputShuffleMask(ZeroableMask) ||
+ is128BitLaneCrossingShuffleMask(VT, Mask))
+ return SDValue();
+
+ SmallVector<SDValue, 32> PSHUFBMask(NumBytes);
+ // Sign bit set in i8 mask means zero element.
+ SDValue ZeroMask = DAG.getConstant(0x80, DL, MVT::i8);
+
+ for (int i = 0; i < NumBytes; ++i) {
+ int M = ZeroableMask[i / NumEltBytes];
+ if (M == SM_SentinelUndef) {
+ PSHUFBMask[i] = DAG.getUNDEF(MVT::i8);
+ } else if (M == SM_SentinelZero) {
+ PSHUFBMask[i] = ZeroMask;
+ } else {
+ M = M * NumEltBytes + (i % NumEltBytes);
+ M = i < 16 ? M : M - 16;
+ PSHUFBMask[i] = DAG.getConstant(M, DL, MVT::i8);
+ }
+ }
+
+ MVT I8VT = MVT::getVectorVT(MVT::i8, NumBytes);
+ return DAG.getBitcast(
+ VT, DAG.getNode(X86ISD::PSHUFB, DL, I8VT, DAG.getBitcast(I8VT, V1),
+ DAG.getBuildVector(I8VT, DL, PSHUFBMask)));
+}
+
// X86 has dedicated unpack instructions that can handle specific blend
// operations: UNPCKH and UNPCKL.
static SDValue lowerVectorShuffleWithUNPCK(SDLoc DL, MVT VT, ArrayRef<int> Mask,
@@ -11389,26 +11442,12 @@ static SDValue lowerV16I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return lowerV8I16GeneralSingleInputVectorShuffle(
DL, MVT::v16i16, V1, RepeatedMask, Subtarget, DAG);
}
-
- SDValue PSHUFBMask[32];
- for (int i = 0; i < 16; ++i) {
- if (Mask[i] == -1) {
- PSHUFBMask[2 * i] = PSHUFBMask[2 * i + 1] = DAG.getUNDEF(MVT::i8);
- continue;
- }
-
- int M = i < 8 ? Mask[i] : Mask[i] - 8;
- assert(M >= 0 && M < 8 && "Invalid single-input mask!");
- PSHUFBMask[2 * i] = DAG.getConstant(2 * M, DL, MVT::i8);
- PSHUFBMask[2 * i + 1] = DAG.getConstant(2 * M + 1, DL, MVT::i8);
- }
- return DAG.getBitcast(
- MVT::v16i16,
- DAG.getNode(X86ISD::PSHUFB, DL, MVT::v32i8,
- DAG.getBitcast(MVT::v32i8, V1),
- DAG.getBuildVector(MVT::v32i8, DL, PSHUFBMask)));
}
+ if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(DL, MVT::v16i16, Mask, V1,
+ V2, Subtarget, DAG))
+ return PSHUFB;
+
// Try to simplify this by merging 128-bit lanes to enable a lane-based
// shuffle.
if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
@@ -11471,24 +11510,16 @@ static SDValue lowerV32I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG))
return V;
- if (isSingleInputShuffleMask(Mask)) {
- // There are no generalized cross-lane shuffle operations available on i8
- // element types.
- if (is128BitLaneCrossingShuffleMask(MVT::v32i8, Mask))
- return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v32i8, V1, V2,
- Mask, DAG);
-
- SDValue PSHUFBMask[32];
- for (int i = 0; i < 32; ++i)
- PSHUFBMask[i] =
- Mask[i] < 0
- ? DAG.getUNDEF(MVT::i8)
- : DAG.getConstant(Mask[i] < 16 ? Mask[i] : Mask[i] - 16, DL,
- MVT::i8);
+ // There are no generalized cross-lane shuffle operations available on i8
+ // element types.
+ if (isSingleInputShuffleMask(Mask) &&
+ is128BitLaneCrossingShuffleMask(MVT::v32i8, Mask))
+ return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v32i8, V1, V2, Mask,
+ DAG);
- return DAG.getNode(X86ISD::PSHUFB, DL, MVT::v32i8, V1,
- DAG.getBuildVector(MVT::v32i8, DL, PSHUFBMask));
- }
+ if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(DL, MVT::v32i8, Mask, V1,
+ V2, Subtarget, DAG))
+ return PSHUFB;
// Try to simplify this by merging 128-bit lanes to enable a lane-based
// shuffle.
OpenPOWER on IntegriCloud