diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 73 |
1 files changed, 73 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 36d3fe1dd60..3247fc8f7b4 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -10935,6 +10935,69 @@ static SDValue lowerShuffleAsRotate(const SDLoc &DL, MVT VT, SDValue V1, DAG.getConstant(Rotation, DL, MVT::i8)); } +/// Try to lower a vector shuffle as a byte shift sequence. +static SDValue lowerVectorShuffleAsByteShiftMask( + const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, + const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { + assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!"); + assert(VT.is128BitVector() && "Only 128-bit vectors supported"); + + // We need a shuffle that has zeros at one/both ends and a sequential + // shuffle from one source within. + unsigned ZeroLo = Zeroable.countTrailingOnes(); + unsigned ZeroHi = Zeroable.countLeadingOnes(); + if (!ZeroLo && !ZeroHi) + return SDValue(); + + unsigned NumElts = Mask.size(); + unsigned Len = NumElts - (ZeroLo + ZeroHi); + if (!isSequentialOrUndefInRange(Mask, ZeroLo, Len, Mask[ZeroLo])) + return SDValue(); + + unsigned Scale = VT.getScalarSizeInBits() / 8; + ArrayRef<int> StubMask = Mask.slice(ZeroLo, Len); + if (!isUndefOrInRange(StubMask, 0, NumElts) && + !isUndefOrInRange(StubMask, NumElts, 2 * NumElts)) + return SDValue(); + + SDValue Res = Mask[ZeroLo] < (int)NumElts ? V1 : V2; + Res = DAG.getBitcast(MVT::v16i8, Res); + + // Use VSHLDQ/VSRLDQ ops to zero the ends of a vector and leave an + // inner sequential set of elements, possibly offset: + // 01234567 --> zzzzzz01 --> 1zzzzzzz + // 01234567 --> 4567zzzz --> zzzzz456 + // 01234567 --> z0123456 --> 3456zzzz --> zz3456zz + if (ZeroLo == 0) { + unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts); + Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res, + DAG.getConstant(Scale * Shift, DL, MVT::i8)); + Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res, + DAG.getConstant(Scale * ZeroHi, DL, MVT::i8)); + } else if (ZeroHi == 0) { + unsigned Shift = Mask[ZeroLo] % NumElts; + Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res, + DAG.getConstant(Scale * Shift, DL, MVT::i8)); + Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res, + DAG.getConstant(Scale * ZeroLo, DL, MVT::i8)); + } else if (!Subtarget.hasSSSE3()) { + // If we don't have PSHUFB then its worth avoiding an AND constant mask + // by performing 3 byte shifts. Shuffle combining can kick in above that. + // TODO: There may be some cases where VSH{LR}DQ+PAND is still better. + unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts); + Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res, + DAG.getConstant(Scale * Shift, DL, MVT::i8)); + Shift += Mask[ZeroLo] % NumElts; + Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res, + DAG.getConstant(Scale * Shift, DL, MVT::i8)); + Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res, + DAG.getConstant(Scale * ZeroLo, DL, MVT::i8)); + } else + return SDValue(); + + return DAG.getBitcast(VT, Res); +} + /// Try to lower a vector shuffle as a bit shift (shifts in zeros). /// /// Attempts to match a shuffle mask against the PSLL(W/D/Q/DQ) and @@ -13339,6 +13402,11 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask, lowerShuffleAsBitBlend(DL, MVT::v8i16, V1, V2, Mask, DAG)) return BitBlend; + // Try to use byte shift instructions to mask. + if (SDValue V = lowerVectorShuffleAsByteShiftMask( + DL, MVT::v8i16, V1, V2, Mask, Zeroable, Subtarget, DAG)) + return V; + // Try to lower by permuting the inputs into an unpack instruction. if (SDValue Unpack = lowerShuffleAsPermuteAndUnpack(DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG)) @@ -13588,6 +13656,11 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask, if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG)) return V; + // Try to use byte shift instructions to mask. + if (SDValue V = lowerVectorShuffleAsByteShiftMask( + DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG)) + return V; + // Check for SSSE3 which lets us lower all v16i8 shuffles much more directly // with PSHUFB. It is important to do this before we attempt to generate any // blends but after all of the single-input lowerings. If the single input |