summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2015-02-19 10:46:52 +0000
committerChandler Carruth <chandlerc@gmail.com>2015-02-19 10:46:52 +0000
commitbcb6c5f62d375e45927a0b6e1177f45286dbcfab (patch)
tree9fde5c65d8bfeaba8198dd9780f02bb74903c697 /llvm/lib
parentb89464a9b637bd57daa03507d14a7824ecc63e8e (diff)
downloadbcm5719-llvm-bcb6c5f62d375e45927a0b6e1177f45286dbcfab.tar.gz
bcm5719-llvm-bcb6c5f62d375e45927a0b6e1177f45286dbcfab.zip
[x86] Add support for bit-wise blending and use it in the v8 and v16
lowering paths. I'm going to be leveraging this to simplify a lot of the overly complex lowering of v8 and v16 shuffles in pre-SSSE3 modes. Sadly, this isn't profitable on v4i32 and v2i64. There, the float and double blending instructions for pre-SSE4.1 are actually pretty good, and we can't beat them with bit math. And once SSE4.1 comes around we have direct blending support and this ceases to be relevant. Also, some of the test cases look odd because the domain fixer canonicalizes these to floating point domain. That's OK, it'll use the integer domain when it matters and some day I may be able to update enough of LLVM to canonicalize the other way. This restores almost all of the regressions from teaching x86's vselect lowering to always use vector shuffle lowering for blends. The remaining problems are because the v16 lowering path is still doing crazy things. I'll be re-arranging that strategy in more detail in subsequent commits to finish recovering the performance here. llvm-svn: 229836
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp40
1 files changed, 39 insertions, 1 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d00b09e71d1..a07718b9bd8 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -7438,6 +7438,37 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask,
return DAG.getConstant(Imm, MVT::i8);
}
+/// \brief Try to emit a blend instruction for a shuffle using bit math.
+///
+/// This is used as a fallback approach when first class blend instructions are
+/// unavailable. Currently it is only suitable for integer vectors, but could
+/// be generalized for floating point vectors if desirable.
+static SDValue lowerVectorShuffleAsBitBlend(SDLoc DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ SelectionDAG &DAG) {
+ assert(VT.isInteger() && "Only supports integer vector types!");
+ MVT EltVT = VT.getScalarType();
+ int NumEltBits = EltVT.getSizeInBits();
+ SDValue Zero = DAG.getConstant(0, EltVT);
+ SDValue AllOnes = DAG.getConstant(APInt::getAllOnesValue(NumEltBits), EltVT);
+ SmallVector<SDValue, 16> MaskOps;
+ for (int i = 0, Size = Mask.size(); i < Size; ++i) {
+ if (Mask[i] != -1 && Mask[i] != i && Mask[i] != i + Size)
+ return SDValue(); // Shuffled input!
+ MaskOps.push_back(Mask[i] < Size ? AllOnes : Zero);
+ }
+
+ SDValue V1Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, MaskOps);
+ V1 = DAG.getNode(ISD::AND, DL, VT, V1, V1Mask);
+ // We have to cast V2 around.
+ MVT MaskVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
+ V2 = DAG.getNode(ISD::BITCAST, DL, VT,
+ DAG.getNode(X86ISD::ANDNP, DL, MaskVT,
+ DAG.getNode(ISD::BITCAST, DL, MaskVT, V1Mask),
+ DAG.getNode(ISD::BITCAST, DL, MaskVT, V2)));
+ return DAG.getNode(ISD::OR, DL, VT, V1, V2);
+}
+
/// \brief Try to emit a blend instruction for a shuffle.
///
/// This doesn't do any checks for the availability of instructions for blending
@@ -7448,7 +7479,6 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
-
unsigned BlendMask = 0;
for (int i = 0, Size = Mask.size(); i < Size; ++i) {
if (Mask[i] >= Size) {
@@ -9667,6 +9697,10 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG))
return Rotate;
+ if (SDValue BitBlend =
+ lowerVectorShuffleAsBitBlend(DL, MVT::v8i16, V1, V2, Mask, DAG))
+ return BitBlend;
+
if (NumV1Inputs + NumV2Inputs <= 4)
return lowerV8I16BasicBlendVectorShuffle(DL, V1, V2, Mask, Subtarget, DAG);
@@ -10034,6 +10068,10 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
Mask, Subtarget, DAG))
return V;
+ if (SDValue BitBlend =
+ lowerVectorShuffleAsBitBlend(DL, MVT::v16i8, V1, V2, Mask, DAG))
+ return BitBlend;
+
// Check whether a compaction lowering can be done. This handles shuffles
// which take every Nth element for some even N. See the helper function for
// details.
OpenPOWER on IntegriCloud