From bcb6c5f62d375e45927a0b6e1177f45286dbcfab Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Thu, 19 Feb 2015 10:46:52 +0000 Subject: [x86] Add support for bit-wise blending and use it in the v8 and v16 lowering paths. I'm going to be leveraging this to simplify a lot of the overly complex lowering of v8 and v16 shuffles in pre-SSSE3 modes. Sadly, this isn't profitable on v4i32 and v2i64. There, the float and double blending instructions for pre-SSE4.1 are actually pretty good, and we can't beat them with bit math. And once SSE4.1 comes around we have direct blending support and this ceases to be relevant. Also, some of the test cases look odd because the domain fixer canonicalizes these to floating point domain. That's OK, it'll use the integer domain when it matters and some day I may be able to update enough of LLVM to canonicalize the other way. This restores almost all of the regressions from teaching x86's vselect lowering to always use vector shuffle lowering for blends. The remaining problems are because the v16 lowering path is still doing crazy things. I'll be re-arranging that strategy in more detail in subsequent commits to finish recovering the performance here. llvm-svn: 229836 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 40 ++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp') diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d00b09e71d1..a07718b9bd8 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7438,6 +7438,37 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef Mask, return DAG.getConstant(Imm, MVT::i8); } +/// \brief Try to emit a blend instruction for a shuffle using bit math. +/// +/// This is used as a fallback approach when first class blend instructions are +/// unavailable. Currently it is only suitable for integer vectors, but could +/// be generalized for floating point vectors if desirable. +static SDValue lowerVectorShuffleAsBitBlend(SDLoc DL, MVT VT, SDValue V1, + SDValue V2, ArrayRef Mask, + SelectionDAG &DAG) { + assert(VT.isInteger() && "Only supports integer vector types!"); + MVT EltVT = VT.getScalarType(); + int NumEltBits = EltVT.getSizeInBits(); + SDValue Zero = DAG.getConstant(0, EltVT); + SDValue AllOnes = DAG.getConstant(APInt::getAllOnesValue(NumEltBits), EltVT); + SmallVector MaskOps; + for (int i = 0, Size = Mask.size(); i < Size; ++i) { + if (Mask[i] != -1 && Mask[i] != i && Mask[i] != i + Size) + return SDValue(); // Shuffled input! + MaskOps.push_back(Mask[i] < Size ? AllOnes : Zero); + } + + SDValue V1Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, MaskOps); + V1 = DAG.getNode(ISD::AND, DL, VT, V1, V1Mask); + // We have to cast V2 around. + MVT MaskVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64); + V2 = DAG.getNode(ISD::BITCAST, DL, VT, + DAG.getNode(X86ISD::ANDNP, DL, MaskVT, + DAG.getNode(ISD::BITCAST, DL, MaskVT, V1Mask), + DAG.getNode(ISD::BITCAST, DL, MaskVT, V2))); + return DAG.getNode(ISD::OR, DL, VT, V1, V2); +} + /// \brief Try to emit a blend instruction for a shuffle. /// /// This doesn't do any checks for the availability of instructions for blending @@ -7448,7 +7479,6 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, const X86Subtarget *Subtarget, SelectionDAG &DAG) { - unsigned BlendMask = 0; for (int i = 0, Size = Mask.size(); i < Size; ++i) { if (Mask[i] >= Size) { @@ -9667,6 +9697,10 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2, DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG)) return Rotate; + if (SDValue BitBlend = + lowerVectorShuffleAsBitBlend(DL, MVT::v8i16, V1, V2, Mask, DAG)) + return BitBlend; + if (NumV1Inputs + NumV2Inputs <= 4) return lowerV8I16BasicBlendVectorShuffle(DL, V1, V2, Mask, Subtarget, DAG); @@ -10034,6 +10068,10 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, Mask, Subtarget, DAG)) return V; + if (SDValue BitBlend = + lowerVectorShuffleAsBitBlend(DL, MVT::v16i8, V1, V2, Mask, DAG)) + return BitBlend; + // Check whether a compaction lowering can be done. This handles shuffles // which take every Nth element for some even N. See the helper function for // details. -- cgit v1.2.3