[x86] Add a generic unpack-targeted lowering technique. This can be used

to generically lower blends and is particularly nice because it is available frome SSE2 onward. This removes a lot of the remaining domain crossing blends in SSE2 code. I'm hoping to replace some of the "interleaved" lowering hacks with something closer to this which should be more principled. First, this needs to learn how to detect and use other interleavings besides that of the natural type provided. That will be a follow-up patch though. llvm-svn: 229378
author: Chandler Carruth <chandlerc@gmail.com> 2015-02-16 12:28:18 +0000
committer: Chandler Carruth <chandlerc@gmail.com> 2015-02-16 12:28:18 +0000
commit: 1e57e2deb84aa812e15a09bdced71682d814e538 (patch)
tree: a19358c851f4d06b86ad756d341606f035a68ac4 /llvm/lib
parent: 50dc783d75247022bdba75eb8408cdff2149cfae (diff)
download: bcm5719-llvm-1e57e2deb84aa812e15a09bdced71682d814e538.tar.gz
bcm5719-llvm-1e57e2deb84aa812e15a09bdced71682d814e538.zip
1 files changed, 54 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 067ad6dcc5b..00095f04c67 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -8463,6 +8463,50 @@ static SDValue lowerVectorShuffleAsInsertPS(SDValue Op, SDValue V1, SDValue V2,
                      DAG.getConstant(InsertPSMask, MVT::i8));
 }
 
+/// \brief Try to lower a shuffle as a permute of the inputs followed by an
+/// UNPCK instruction.
+///
+/// This specifically targets cases where we end up with alternating between
+/// the two inputs, and so can permute them into something that feeds a single
+/// UNPCK instruction.
+static SDValue lowerVectorShuffleAsUnpack(MVT VT, SDLoc DL, SDValue V1,
+                                          SDValue V2, ArrayRef<int> Mask,
+                                          SelectionDAG &DAG) {
+  assert(!isSingleInputShuffleMask(Mask) &&
+         "This routine should only be used when blending two inputs.");
+  assert(Mask.size() >= 2 && "Single element masks are invalid.");
+
+  int Size = Mask.size();
+
+  int NumLoInputs = std::count_if(Mask.begin(), Mask.end(), [Size](int M) {
+    return M >= 0 && M % Size < Size / 2;
+  });
+  int NumHiInputs = std::count_if(
+      Mask.begin(), Mask.end(), [Size](int M) { return M % Size > Size / 2; });
+
+  bool UnpackLo = NumLoInputs >= NumHiInputs;
+
+  SmallVector<int, 32> V1Mask(Mask.size(), -1);
+  SmallVector<int, 32> V2Mask(Mask.size(), -1);
+  for (int i = 0; i < Size; ++i) {
+    if (Mask[i] < 0)
+      continue;
+
+    // We only handle the case where V1 feeds even mask slots and V2 feeds odd
+    // mask slots. We rely on canonicalization to ensure this is the case.
+    if ((i % 2 == 0) != (Mask[i] < Size))
+      return SDValue();
+
+    SmallVectorImpl<int> &VMask = (i % 2 == 0) ? V1Mask : V2Mask;
+    VMask[i / 2 + (UnpackLo ? 0 : Size / 2)] = Mask[i] % Size;
+  }
+
+  V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask);
+  V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask);
+  return DAG.getNode(UnpackLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL, VT, V1,
+                     V2);
+}
+
 /// \brief Handle lowering of 2-lane 64-bit floating point shuffles.
 ///
 /// This is the basis function for the 2-lane 64-bit shuffles as we have full
@@ -8921,6 +8965,11 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
     return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i32, V1, V2,
                                                       Mask, DAG);
 
+  // Try to lower by permuting the inputs into an unpack instruction.
+  if (SDValue Unpack =
+          lowerVectorShuffleAsUnpack(MVT::v4i32, DL, V1, V2, Mask, DAG))
+    return Unpack;
+
   // We implement this with SHUFPS because it can blend from two vectors.
   // Because we're going to eventually use SHUFPS, we use SHUFPS even to build
   // up the inputs, bypassing domain shift penalties that we would encur if we
@@ -9670,6 +9719,11 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
     return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8i16, V1, V2,
                                                       Mask, DAG);
 
+  // Try to lower by permuting the inputs into an unpack instruction.
+  if (SDValue Unpack =
+          lowerVectorShuffleAsUnpack(MVT::v8i16, DL, V1, V2, Mask, DAG))
+    return Unpack;
+
   int LoBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
   int HiBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
author	Chandler Carruth <chandlerc@gmail.com>	2015-02-16 12:28:18 +0000
committer	Chandler Carruth <chandlerc@gmail.com>	2015-02-16 12:28:18 +0000
commit	1e57e2deb84aa812e15a09bdced71682d814e538 (patch)
tree	a19358c851f4d06b86ad756d341606f035a68ac4 /llvm/lib
parent	50dc783d75247022bdba75eb8408cdff2149cfae (diff)
download	bcm5719-llvm-1e57e2deb84aa812e15a09bdced71682d814e538.tar.gz bcm5719-llvm-1e57e2deb84aa812e15a09bdced71682d814e538.zip