diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 68 |
1 files changed, 52 insertions, 16 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 70bb7d2f337..92db12abfdf 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -8480,10 +8480,15 @@ static SDValue lowerVectorShuffleAsInsertPS(SDValue Op, SDValue V1, SDValue V2, /// /// This specifically targets cases where we end up with alternating between /// the two inputs, and so can permute them into something that feeds a single -/// UNPCK instruction. +/// UNPCK instruction. Note that this routine only targets integer vectors +/// because for floating point vectors we have a generalized SHUFPS lowering +/// strategy that handles everything that doesn't *exactly* match an unpack, +/// making this clever lowering unnecessary. static SDValue lowerVectorShuffleAsUnpack(MVT VT, SDLoc DL, SDValue V1, SDValue V2, ArrayRef<int> Mask, SelectionDAG &DAG) { + assert(!VT.isFloatingPoint() && + "This routine only supports integer vectors."); assert(!isSingleInputShuffleMask(Mask) && "This routine should only be used when blending two inputs."); assert(Mask.size() >= 2 && "Single element masks are invalid."); @@ -8498,25 +8503,56 @@ static SDValue lowerVectorShuffleAsUnpack(MVT VT, SDLoc DL, SDValue V1, bool UnpackLo = NumLoInputs >= NumHiInputs; - SmallVector<int, 32> V1Mask(Mask.size(), -1); - SmallVector<int, 32> V2Mask(Mask.size(), -1); - for (int i = 0; i < Size; ++i) { - if (Mask[i] < 0) - continue; + auto TryUnpack = [&](MVT UnpackVT, int Scale) { + SmallVector<int, 32> V1Mask(Mask.size(), -1); + SmallVector<int, 32> V2Mask(Mask.size(), -1); - // We only handle the case where V1 feeds even mask slots and V2 feeds odd - // mask slots. We rely on canonicalization to ensure this is the case. - if ((i % 2 == 0) != (Mask[i] < Size)) - return SDValue(); + for (int i = 0; i < Size; ++i) { + if (Mask[i] < 0) + continue; + + // Each element of the unpack contains Scale elements from this mask. + int UnpackIdx = i / Scale; + + // We only handle the case where V1 feeds the first slots of the unpack. + // We rely on canonicalization to ensure this is the case. + if ((UnpackIdx % 2 == 0) != (Mask[i] < Size)) + return SDValue(); + + // Setup the mask for this input. The indexing is tricky as we have to + // handle the unpack stride. + SmallVectorImpl<int> &VMask = (UnpackIdx % 2 == 0) ? V1Mask : V2Mask; + VMask[(UnpackIdx / 2) * Scale + i % Scale + (UnpackLo ? 0 : Size / 2)] = + Mask[i] % Size; + } - SmallVectorImpl<int> &VMask = (i % 2 == 0) ? V1Mask : V2Mask; - VMask[i / 2 + (UnpackLo ? 0 : Size / 2)] = Mask[i] % Size; + // Shuffle the inputs into place. + V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask); + V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask); + + // Cast the inputs to the type we will use to unpack them. + V1 = DAG.getNode(ISD::BITCAST, DL, UnpackVT, V1); + V2 = DAG.getNode(ISD::BITCAST, DL, UnpackVT, V2); + + // Unpack the inputs and cast the result back to the desired type. + return DAG.getNode(ISD::BITCAST, DL, VT, + DAG.getNode(UnpackLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, + DL, UnpackVT, V1, V2)); + }; + + // We try each unpack from the largest to the smallest to try and find one + // that fits this mask. + int OrigNumElements = VT.getVectorNumElements(); + int OrigScalarSize = VT.getScalarSizeInBits(); + for (int ScalarSize = 64; ScalarSize >= OrigScalarSize; ScalarSize /= 2) { + int Scale = ScalarSize / OrigScalarSize; + int NumElements = OrigNumElements / Scale; + MVT UnpackVT = MVT::getVectorVT(MVT::getIntegerVT(ScalarSize), NumElements); + if (SDValue Unpack = TryUnpack(UnpackVT, Scale)) + return Unpack; } - V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask); - V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask); - return DAG.getNode(UnpackLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL, VT, V1, - V2); + return SDValue(); } /// \brief Handle lowering of 2-lane 64-bit floating point shuffles. |

