diff options
author | Chandler Carruth <chandlerc@gmail.com> | 2015-02-16 12:28:18 +0000 |
---|---|---|
committer | Chandler Carruth <chandlerc@gmail.com> | 2015-02-16 12:28:18 +0000 |
commit | 1e57e2deb84aa812e15a09bdced71682d814e538 (patch) | |
tree | a19358c851f4d06b86ad756d341606f035a68ac4 /llvm/lib | |
parent | 50dc783d75247022bdba75eb8408cdff2149cfae (diff) | |
download | bcm5719-llvm-1e57e2deb84aa812e15a09bdced71682d814e538.tar.gz bcm5719-llvm-1e57e2deb84aa812e15a09bdced71682d814e538.zip |
[x86] Add a generic unpack-targeted lowering technique. This can be used
to generically lower blends and is particularly nice because it is
available frome SSE2 onward. This removes a lot of the remaining domain
crossing blends in SSE2 code.
I'm hoping to replace some of the "interleaved" lowering hacks with
something closer to this which should be more principled. First, this
needs to learn how to detect and use other interleavings besides that of
the natural type provided. That will be a follow-up patch though.
llvm-svn: 229378
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 54 |
1 files changed, 54 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 067ad6dcc5b..00095f04c67 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -8463,6 +8463,50 @@ static SDValue lowerVectorShuffleAsInsertPS(SDValue Op, SDValue V1, SDValue V2, DAG.getConstant(InsertPSMask, MVT::i8)); } +/// \brief Try to lower a shuffle as a permute of the inputs followed by an +/// UNPCK instruction. +/// +/// This specifically targets cases where we end up with alternating between +/// the two inputs, and so can permute them into something that feeds a single +/// UNPCK instruction. +static SDValue lowerVectorShuffleAsUnpack(MVT VT, SDLoc DL, SDValue V1, + SDValue V2, ArrayRef<int> Mask, + SelectionDAG &DAG) { + assert(!isSingleInputShuffleMask(Mask) && + "This routine should only be used when blending two inputs."); + assert(Mask.size() >= 2 && "Single element masks are invalid."); + + int Size = Mask.size(); + + int NumLoInputs = std::count_if(Mask.begin(), Mask.end(), [Size](int M) { + return M >= 0 && M % Size < Size / 2; + }); + int NumHiInputs = std::count_if( + Mask.begin(), Mask.end(), [Size](int M) { return M % Size > Size / 2; }); + + bool UnpackLo = NumLoInputs >= NumHiInputs; + + SmallVector<int, 32> V1Mask(Mask.size(), -1); + SmallVector<int, 32> V2Mask(Mask.size(), -1); + for (int i = 0; i < Size; ++i) { + if (Mask[i] < 0) + continue; + + // We only handle the case where V1 feeds even mask slots and V2 feeds odd + // mask slots. We rely on canonicalization to ensure this is the case. + if ((i % 2 == 0) != (Mask[i] < Size)) + return SDValue(); + + SmallVectorImpl<int> &VMask = (i % 2 == 0) ? V1Mask : V2Mask; + VMask[i / 2 + (UnpackLo ? 0 : Size / 2)] = Mask[i] % Size; + } + + V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask); + V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask); + return DAG.getNode(UnpackLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL, VT, V1, + V2); +} + /// \brief Handle lowering of 2-lane 64-bit floating point shuffles. /// /// This is the basis function for the 2-lane 64-bit shuffles as we have full @@ -8921,6 +8965,11 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i32, V1, V2, Mask, DAG); + // Try to lower by permuting the inputs into an unpack instruction. + if (SDValue Unpack = + lowerVectorShuffleAsUnpack(MVT::v4i32, DL, V1, V2, Mask, DAG)) + return Unpack; + // We implement this with SHUFPS because it can blend from two vectors. // Because we're going to eventually use SHUFPS, we use SHUFPS even to build // up the inputs, bypassing domain shift penalties that we would encur if we @@ -9670,6 +9719,11 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2, return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8i16, V1, V2, Mask, DAG); + // Try to lower by permuting the inputs into an unpack instruction. + if (SDValue Unpack = + lowerVectorShuffleAsUnpack(MVT::v8i16, DL, V1, V2, Mask, DAG)) + return Unpack; + int LoBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1}; int HiBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1}; |