diff options
| author | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2015-06-22 09:01:15 +0000 |
|---|---|---|
| committer | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2015-06-22 09:01:15 +0000 |
| commit | d78609a7acb3b06e70bf7e0bd38223faf1db1a43 (patch) | |
| tree | ac266ce8fbd36adf24194035e786a4c211b6e8ea /llvm/lib/Target | |
| parent | fc21951cd7fb7a680e81dd138e6e97c8248785a5 (diff) | |
| download | bcm5719-llvm-d78609a7acb3b06e70bf7e0bd38223faf1db1a43.tar.gz bcm5719-llvm-d78609a7acb3b06e70bf7e0bd38223faf1db1a43.zip | |
Reverted AVX-512 vector shuffle
llvm-svn: 240258
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 244 |
1 files changed, 64 insertions, 180 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d41f7f64411..67e733384ab 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -6259,42 +6259,6 @@ is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask, return true; } -/// \brief Test whether a shuffle mask is equivalent within each 256-bit lane. -/// -/// This checks a shuffle mask to see if it is performing the same -/// 256-bit lane-relative shuffle in each 256-bit lane. This trivially implies -/// that it is also not lane-crossing. It may however involve a blend from the -/// same lane of a second vector. -/// -/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is -/// non-trivial to compute in the face of undef lanes. The representation is -/// *not* suitable for use with existing 256-bit shuffles as it will contain -/// entries from both V1 and V2 inputs to the wider mask. -static bool -is256BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask, - SmallVectorImpl<int> &RepeatedMask) { - int LaneSize = 256 / VT.getScalarSizeInBits(); - RepeatedMask.resize(LaneSize, -1); - int Size = Mask.size(); - for (int i = 0; i < Size; ++i) { - if (Mask[i] < 0) - continue; - if ((Mask[i] % Size) / LaneSize != i / LaneSize) - // This entry crosses lanes, so there is no way to model this shuffle. - return false; - - // Ok, handle the in-lane shuffles by detecting if and when they repeat. - if (RepeatedMask[i % LaneSize] == -1) - // This is the first non-undef entry in this slot of a 256-bit lane. - RepeatedMask[i % LaneSize] = - Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + Size; - else if (RepeatedMask[i % LaneSize] + (i / LaneSize) * LaneSize != Mask[i]) - // Found a mismatch with the repeated mask. - return false; - } - return true; -} - /// \brief Checks whether a shuffle mask is equivalent to an explicit list of /// arguments. /// @@ -6354,22 +6318,6 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, SDLoc DL, return DAG.getConstant(Imm, DL, MVT::i8); } -/// \brief Get a 8-bit shuffle, 1 bit per lane, immediate for a mask. -/// -/// This helper function produces an 8-bit shuffle immediate corresponding to -/// the ubiquitous shuffle encoding scheme used in x86 instructions for -/// shuffling 8 lanes. -static SDValue get1bitLaneShuffleImm8ForMask(ArrayRef<int> Mask, SDLoc DL, - SelectionDAG &DAG) { - assert(Mask.size() <= 8 && - "Up to 8 elts may be in Imm8 1-bit lane shuffle mask"); - unsigned Imm = 0; - for (unsigned i = 0; i < Mask.size(); ++i) - if (Mask[i] >= 0) - Imm |= (Mask[i] % 2) << i; - return DAG.getConstant(Imm, DL, MVT::i8); -} - /// \brief Try to emit a blend instruction for a shuffle using bit math. /// /// This is used as a fallback approach when first class blend instructions are @@ -9385,30 +9333,6 @@ static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1, DAG.getConstant(PermMask, DL, MVT::i8)); } -/// \brief Handle lowering 4-lane 128-bit shuffles. -static SDValue lowerV4X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1, - SDValue V2, ArrayRef<int> WidenedMask, - SelectionDAG &DAG) { - - assert(WidenedMask.size() == 4 && "Unexpected mask size for 128bit shuffle!"); - // form a 128-bit permutation. - // convert the 64-bit shuffle mask selection values into 128-bit selection - // bits defined by a vshuf64x2 instruction's immediate control byte. - unsigned PermMask = 0, Imm = 0; - - for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) { - if(WidenedMask[i] == SM_SentinelZero) - return SDValue(); - - // use first element in place of undef musk - Imm = (WidenedMask[i] == SM_SentinelUndef) ? 0 : WidenedMask[i]; - PermMask |= (Imm % 4) << (i * 2); - } - - return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2, - DAG.getConstant(PermMask, DL, MVT::i8)); -} - /// \brief Lower a vector shuffle by first fixing the 128-bit lanes and then /// shuffling each lane. /// @@ -10144,105 +10068,86 @@ static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, } } -static SDValue lowerVectorShuffleWithVALIGN(SDLoc DL, MVT VT, - ArrayRef<int> Mask, SDValue V1, - SDValue V2, SelectionDAG &DAG) { - - assert(VT.getScalarSizeInBits() >= 32 && "Unexpected data type for VALIGN"); - // VALIGN pattern 2, 3, 4, 5, .. (sequential, shifted right) - int AlignVal = -1; - for (int i = 0; i < (signed)VT.getVectorNumElements(); ++i) { - if (Mask[i] < 0) - continue; - if (Mask[i] < i) - return SDValue(); - if (AlignVal == -1) - AlignVal = Mask[i] - i; - else if (Mask[i] - i != AlignVal) - return SDValue(); - } - // Vector source operands should be swapped - return DAG.getNode(X86ISD::VALIGN, DL, VT, V2, V1, - DAG.getConstant(AlignVal, DL, MVT::i8)); -} +/// \brief Handle lowering of 8-lane 64-bit floating point shuffles. +static SDValue lowerV8F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, + const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + SDLoc DL(Op); + assert(V1.getSimpleValueType() == MVT::v8f64 && "Bad operand type!"); + assert(V2.getSimpleValueType() == MVT::v8f64 && "Bad operand type!"); + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); + ArrayRef<int> Mask = SVOp->getMask(); + assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); -static SDValue lowerVectorShuffleWithPERMV(SDLoc DL, MVT VT, - ArrayRef<int> Mask, SDValue V1, - SDValue V2, SelectionDAG &DAG) { + // X86 has dedicated unpack instructions that can handle specific blend + // operations: UNPCKH and UNPCKL. + if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14})) + return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f64, V1, V2); + if (isShuffleEquivalent(V1, V2, Mask, {1, 9, 3, 11, 5, 13, 7, 15})) + return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f64, V1, V2); - assert(VT.getScalarSizeInBits() >= 16 && "Unexpected data type for PERMV"); + // FIXME: Implement direct support for this type! + return splitAndLowerVectorShuffle(DL, MVT::v8f64, V1, V2, Mask, DAG); +} - MVT MaskEltVT = MVT::getIntegerVT(VT.getScalarSizeInBits()); - MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements()); +/// \brief Handle lowering of 16-lane 32-bit floating point shuffles. +static SDValue lowerV16F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, + const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + SDLoc DL(Op); + assert(V1.getSimpleValueType() == MVT::v16f32 && "Bad operand type!"); + assert(V2.getSimpleValueType() == MVT::v16f32 && "Bad operand type!"); + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); + ArrayRef<int> Mask = SVOp->getMask(); + assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); - SmallVector<SDValue, 32> VPermMask; - for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) - VPermMask.push_back(Mask[i] < 0 ? DAG.getUNDEF(MaskEltVT) : - DAG.getConstant(Mask[i], DL,MaskEltVT)); - SDValue MaskNode = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVecVT, - VPermMask); - if (isSingleInputShuffleMask(Mask)) - return DAG.getNode(X86ISD::VPERMV, DL, VT, MaskNode, V1); + // Use dedicated unpack instructions for masks that match their pattern. + if (isShuffleEquivalent(V1, V2, Mask, + {// First 128-bit lane. + 0, 16, 1, 17, 4, 20, 5, 21, + // Second 128-bit lane. + 8, 24, 9, 25, 12, 28, 13, 29})) + return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16f32, V1, V2); + if (isShuffleEquivalent(V1, V2, Mask, + {// First 128-bit lane. + 2, 18, 3, 19, 6, 22, 7, 23, + // Second 128-bit lane. + 10, 26, 11, 27, 14, 30, 15, 31})) + return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16f32, V1, V2); - return DAG.getNode(X86ISD::VPERMV3, DL, VT, MaskNode, V1, V2); + // FIXME: Implement direct support for this type! + return splitAndLowerVectorShuffle(DL, MVT::v16f32, V1, V2, Mask, DAG); } - -/// \brief Handle lowering of 8-lane 64-bit floating point shuffles. -static SDValue lowerV8X64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, +/// \brief Handle lowering of 8-lane 64-bit integer shuffles. +static SDValue lowerV8I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); - MVT VT = Op.getSimpleValueType(); - assert((V1.getSimpleValueType() == MVT::v8f64 || - V1.getSimpleValueType() == MVT::v8i64) && "Bad operand type!"); - assert((V2.getSimpleValueType() == MVT::v8f64 || - V2.getSimpleValueType() == MVT::v8i64) && "Bad operand type!"); + assert(V1.getSimpleValueType() == MVT::v8i64 && "Bad operand type!"); + assert(V2.getSimpleValueType() == MVT::v8i64 && "Bad operand type!"); ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); ArrayRef<int> Mask = SVOp->getMask(); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); - SmallVector<int, 4> WidenedMask; - if (canWidenShuffleElements(Mask, WidenedMask)) - if(SDValue Op = lowerV4X128VectorShuffle(DL, VT, V1, V2, WidenedMask, DAG)) - return Op; // X86 has dedicated unpack instructions that can handle specific blend // operations: UNPCKH and UNPCKL. if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14})) - return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2); + return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i64, V1, V2); if (isShuffleEquivalent(V1, V2, Mask, {1, 9, 3, 11, 5, 13, 7, 15})) - return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2); - - if (SDValue Op = lowerVectorShuffleWithVALIGN(DL, VT, Mask, V1, V2, DAG)) - return Op; + return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i64, V1, V2); - if (SDValue Op = lowerVectorShuffleWithSHUFPD(DL, VT, Mask, V1, V2, DAG)) - return Op; - - // PERMILPD instruction - mask 0/1, 0/1, 2/3, 2/3, 4/5, 4/5, 6/7, 6/7 - if (isSingleInputShuffleMask(Mask)) { - if (!is128BitLaneCrossingShuffleMask(VT, Mask)) - return DAG.getNode(X86ISD::VPERMILPI, DL, VT, V1, - get1bitLaneShuffleImm8ForMask(Mask, DL, DAG)); - - SmallVector<int, 4> RepeatedMask; - if (is256BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask)) - return DAG.getNode(X86ISD::VPERMI, DL, VT, V1, - getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); - } - return lowerVectorShuffleWithPERMV(DL, VT, Mask, V1, V2, DAG); + // FIXME: Implement direct support for this type! + return splitAndLowerVectorShuffle(DL, MVT::v8i64, V1, V2, Mask, DAG); } /// \brief Handle lowering of 16-lane 32-bit integer shuffles. -static SDValue lowerV16X32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, +static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG) { - MVT VT = Op.getSimpleValueType(); SDLoc DL(Op); - assert((V1.getSimpleValueType() == MVT::v16i32 || - V1.getSimpleValueType() == MVT::v16f32) && "Bad operand type!"); - assert((V2.getSimpleValueType() == MVT::v16i32 || - V2.getSimpleValueType() == MVT::v16f32) && "Bad operand type!"); + assert(V1.getSimpleValueType() == MVT::v16i32 && "Bad operand type!"); + assert(V2.getSimpleValueType() == MVT::v16i32 && "Bad operand type!"); ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); ArrayRef<int> Mask = SVOp->getMask(); assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); @@ -10253,39 +10158,16 @@ static SDValue lowerV16X32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, 0, 16, 1, 17, 4, 20, 5, 21, // Second 128-bit lane. 8, 24, 9, 25, 12, 28, 13, 29})) - return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2); + return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i32, V1, V2); if (isShuffleEquivalent(V1, V2, Mask, {// First 128-bit lane. 2, 18, 3, 19, 6, 22, 7, 23, // Second 128-bit lane. 10, 26, 11, 27, 14, 30, 15, 31})) - return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2); - - if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, - 12, 12, 14, 14})) - return DAG.getNode(X86ISD::MOVSLDUP, DL, VT, V1); - if (isShuffleEquivalent(V1, V2, Mask, {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, - 13, 13, 15, 15})) - return DAG.getNode(X86ISD::MOVSHDUP, DL, VT, V1); - - SmallVector<int, 4> RepeatedMask; - if (is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask)) { - if (isSingleInputShuffleMask(Mask)) { - unsigned Opc = VT.isInteger() ? X86ISD::PSHUFD : X86ISD::VPERMILPI; - return DAG.getNode(Opc, DL, VT, V1, - getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); - } + return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i32, V1, V2); - for (int i = 0; i < 4; ++i) - if (RepeatedMask[i] >= 16) - RepeatedMask[i] -= 12; - return lowerVectorShuffleWithSHUFPS(DL, VT, RepeatedMask, V1, V2, DAG); - } - - if (SDValue Op = lowerVectorShuffleWithVALIGN(DL, VT, Mask, V1, V2, DAG)) - return Op; - - return lowerVectorShuffleWithPERMV(DL, VT, Mask, V1, V2, DAG); + // FIXME: Implement direct support for this type! + return splitAndLowerVectorShuffle(DL, MVT::v16i32, V1, V2, Mask, DAG); } /// \brief Handle lowering of 32-lane 16-bit integer shuffles. @@ -10345,11 +10227,13 @@ static SDValue lower512BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, // the requisite ISA extensions for that element type are available. switch (VT.SimpleTy) { case MVT::v8f64: - case MVT::v8i64: - return lowerV8X64VectorShuffle(Op, V1, V2, Subtarget, DAG); + return lowerV8F64VectorShuffle(Op, V1, V2, Subtarget, DAG); case MVT::v16f32: + return lowerV16F32VectorShuffle(Op, V1, V2, Subtarget, DAG); + case MVT::v8i64: + return lowerV8I64VectorShuffle(Op, V1, V2, Subtarget, DAG); case MVT::v16i32: - return lowerV16X32VectorShuffle(Op, V1, V2, Subtarget, DAG); + return lowerV16I32VectorShuffle(Op, V1, V2, Subtarget, DAG); case MVT::v32i16: if (Subtarget->hasBWI()) return lowerV32I16VectorShuffle(Op, V1, V2, Subtarget, DAG); |

