diff options
author | Craig Topper <craig.topper@gmail.com> | 2016-11-12 05:05:27 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2016-11-12 05:05:27 +0000 |
commit | 5cb13062d27719879a6a58f18f458976df0ef102 (patch) | |
tree | 27ece8a59c9beacdf2db9330a58c1f6bd2207c8e /llvm/lib | |
parent | c557191b210650b1726d64d67a8b824b4d47cd5f (diff) | |
download | bcm5719-llvm-5cb13062d27719879a6a58f18f458976df0ef102.tar.gz bcm5719-llvm-5cb13062d27719879a6a58f18f458976df0ef102.zip |
[AVX-512] Add support for lowering shuffles to VALIGND/VALIGNQ
Summary: VALIGND and VALIGNQ are similar to PALIGNR but instead of working on a 128-bit lane they work on the entire vector register. This change leverages the shuffle rotate detection code used for PALIGNR to detect these cases.
Reviewers: delena, RKSimon
Subscribers: Farhana, llvm-commits
Differential Revision: https://reviews.llvm.org/D26297
llvm-svn: 286709
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 124 |
1 files changed, 96 insertions, 28 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 5f4a29d9812..cd1ce178112 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7788,34 +7788,12 @@ static SDValue lowerVectorShuffleAsDecomposedShuffleBlend(const SDLoc &DL, return DAG.getVectorShuffle(VT, DL, V1, V2, BlendMask); } -/// \brief Try to lower a vector shuffle as a byte rotation. -/// -/// SSSE3 has a generic PALIGNR instruction in x86 that will do an arbitrary -/// byte-rotation of the concatenation of two vectors; pre-SSSE3 can use -/// a PSRLDQ/PSLLDQ/POR pattern to get a similar effect. This routine will -/// try to generically lower a vector shuffle through such an pattern. It -/// does not check for the profitability of lowering either as PALIGNR or -/// PSRLDQ/PSLLDQ/POR, only whether the mask is valid to lower in that form. -/// This matches shuffle vectors that look like: -/// -/// v8i16 [11, 12, 13, 14, 15, 0, 1, 2] +/// \brief Try to lower a vector shuffle as a rotation. /// -/// Essentially it concatenates V1 and V2, shifts right by some number of -/// elements, and takes the low elements as the result. Note that while this is -/// specified as a *right shift* because x86 is little-endian, it is a *left -/// rotate* of the vector lanes. -static int matchVectorShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, - ArrayRef<int> Mask) { - // Don't accept any shuffles with zero elements. - if (any_of(Mask, [](int M) { return M == SM_SentinelZero; })) - return -1; - - // PALIGNR works on 128-bit lanes. - SmallVector<int, 16> RepeatedMask; - if (!is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask)) - return -1; - - int NumElts = RepeatedMask.size(); +/// This is used for support PALIGNR for SSSE3 or VALIGND/Q for AVX512. +static int matchVectorShuffleAsRotate(SDValue &V1, SDValue &V2, + ArrayRef<int> Mask) { + int NumElts = Mask.size(); // We need to detect various ways of spelling a rotation: // [11, 12, 13, 14, 15, 0, 1, 2] @@ -7827,7 +7805,7 @@ static int matchVectorShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, int Rotation = 0; SDValue Lo, Hi; for (int i = 0; i < NumElts; ++i) { - int M = RepeatedMask[i]; + int M = Mask[i]; assert((M == SM_SentinelUndef || (0 <= M && M < (2*NumElts))) && "Unexpected mask index."); if (M < 0) @@ -7879,8 +7857,43 @@ static int matchVectorShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, V1 = Lo; V2 = Hi; + return Rotation; +} + +/// \brief Try to lower a vector shuffle as a byte rotation. +/// +/// SSSE3 has a generic PALIGNR instruction in x86 that will do an arbitrary +/// byte-rotation of the concatenation of two vectors; pre-SSSE3 can use +/// a PSRLDQ/PSLLDQ/POR pattern to get a similar effect. This routine will +/// try to generically lower a vector shuffle through such an pattern. It +/// does not check for the profitability of lowering either as PALIGNR or +/// PSRLDQ/PSLLDQ/POR, only whether the mask is valid to lower in that form. +/// This matches shuffle vectors that look like: +/// +/// v8i16 [11, 12, 13, 14, 15, 0, 1, 2] +/// +/// Essentially it concatenates V1 and V2, shifts right by some number of +/// elements, and takes the low elements as the result. Note that while this is +/// specified as a *right shift* because x86 is little-endian, it is a *left +/// rotate* of the vector lanes. +static int matchVectorShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, + ArrayRef<int> Mask) { + // Don't accept any shuffles with zero elements. + if (any_of(Mask, [](int M) { return M == SM_SentinelZero; })) + return -1; + + // PALIGNR works on 128-bit lanes. + SmallVector<int, 16> RepeatedMask; + if (!is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask)) + return -1; + + int Rotation = matchVectorShuffleAsRotate(V1, V2, RepeatedMask); + if (Rotation <= 0) + return -1; + // PALIGNR rotates bytes, so we need to scale the // rotation based on how many bytes are in the vector lane. + int NumElts = RepeatedMask.size(); int Scale = 16 / NumElts; return Rotation * Scale; } @@ -7931,6 +7944,37 @@ static SDValue lowerVectorShuffleAsByteRotate(const SDLoc &DL, MVT VT, DAG.getNode(ISD::OR, DL, MVT::v16i8, LoShift, HiShift)); } +/// \brief Try to lower a vector shuffle as a dword/qword rotation. +/// +/// AVX512 has a VALIGND/VALIGNQ instructions that will do an arbitrary +/// rotation of the concatenation of two vectors; This routine will +/// try to generically lower a vector shuffle through such an pattern. +/// +/// Essentially it concatenates V1 and V2, shifts right by some number of +/// elements, and takes the low elements as the result. Note that while this is +/// specified as a *right shift* because x86 is little-endian, it is a *left +/// rotate* of the vector lanes. +static SDValue lowerVectorShuffleAsRotate(const SDLoc &DL, MVT VT, + SDValue V1, SDValue V2, + ArrayRef<int> Mask, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { + assert((VT.getScalarType() == MVT::i32 || VT.getScalarType() == MVT::i64) && + "Only 32-bit and 64-bit elements are supported!"); + + // 128/256-bit vectors are only supported with VLX. + assert((Subtarget.hasVLX() || (!VT.is128BitVector() && !VT.is256BitVector())) + && "VLX required for 128/256-bit vectors"); + + SDValue Lo = V1, Hi = V2; + int Rotation = matchVectorShuffleAsRotate(Lo, Hi, Mask); + if (Rotation <= 0) + return SDValue(); + + return DAG.getNode(X86ISD::VALIGN, DL, VT, Lo, Hi, + DAG.getConstant(Rotation, DL, MVT::i8)); +} + /// \brief Try to lower a vector shuffle as a bit shift (shifts in zeros). /// /// Attempts to match a shuffle mask against the PSLL(W/D/Q/DQ) and @@ -11505,6 +11549,13 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask, Zeroable, Subtarget, DAG)) return Shift; + // If we have VLX support, we can use VALIGN. + if (Subtarget.hasVLX()) + if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v4i64, V1, V2, + Mask, Subtarget, DAG)) + return Rotate; + + // Try to use PALIGNR. if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG)) return Rotate; @@ -11666,6 +11717,12 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask, Zeroable, Subtarget, DAG)) return Shift; + // If we have VLX support, we can use VALIGN. + if (Subtarget.hasVLX()) + if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v8i32, V1, V2, + Mask, Subtarget, DAG)) + return Rotate; + // Try to use byte rotation instructions. if (SDValue Rotate = lowerVectorShuffleAsByteRotate( DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG)) @@ -12094,6 +12151,12 @@ static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask, Zeroable, Subtarget, DAG)) return Shift; + // Try to use VALIGN. + if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v8i64, V1, V2, + Mask, Subtarget, DAG)) + return Rotate; + + // Try to use PALIGNR. if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v8i64, V1, V2, Mask, Subtarget, DAG)) return Rotate; @@ -12143,6 +12206,11 @@ static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask, Zeroable, Subtarget, DAG)) return Shift; + // Try to use VALIGN. + if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v16i32, V1, V2, + Mask, Subtarget, DAG)) + return Rotate; + // Try to use byte rotation instructions. if (Subtarget.hasBWI()) if (SDValue Rotate = lowerVectorShuffleAsByteRotate( |