[x86] Initial improvements to the new shuffle lowering for v16i8

shuffles specifically for cases where a small subset of the elements in the input vector are actually used. This is specifically targetted at improving the shuffles generated for trunc operations, but also helps out splat-like operations. There is still some really low-hanging fruit here that I want to address but this is a huge step in the right direction. llvm-svn: 212680
author: Chandler Carruth <chandlerc@gmail.com> 2014-07-10 04:34:06 +0000
committer: Chandler Carruth <chandlerc@gmail.com> 2014-07-10 04:34:06 +0000
commit: 7d2ffb549285b814da284cfced222c827bffa90f (patch)
tree: f1fd9dc3ad057ad0b3eb4b56b38cf8a990f41142 /llvm/lib/Target/X86/X86ISelLowering.cpp
parent: 05b9ebf2f96b7d6f4d8110195d4d728e892e84c8 (diff)
download: bcm5719-llvm-7d2ffb549285b814da284cfced222c827bffa90f.tar.gz
bcm5719-llvm-7d2ffb549285b814da284cfced222c827bffa90f.zip
1 files changed, 36 insertions, 10 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 140b2a830ce..57b21cce12c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -7815,16 +7815,42 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
 
   SDValue Zero = getZeroVector(MVT::v8i16, Subtarget, DAG, DL);
 
-  auto buildLoAndHiV8s =
-      [&](SDValue V, ArrayRef<int> LoBlendMask, ArrayRef<int> HiBlendMask) {
-    SDValue LoV =
-        DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
-                    DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V, Zero));
-    SDValue HiV =
-        DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
-                    DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V, Zero));
-    SDValue BlendedLo = DAG.getVectorShuffle(MVT::v8i16, DL, LoV, HiV, LoBlendMask);
-    SDValue BlendedHi = DAG.getVectorShuffle(MVT::v8i16, DL, LoV, HiV, HiBlendMask);
+  auto buildLoAndHiV8s = [&](SDValue V, MutableArrayRef<int> LoBlendMask,
+                             MutableArrayRef<int> HiBlendMask) {
+    SDValue V1, V2;
+    // Check if any of the odd lanes in the v16i8 are used. If not, we can mask
+    // them out and avoid using UNPCK{L,H} to extract the elements of V as
+    // i16s.
+    if (std::none_of(LoBlendMask.begin(), LoBlendMask.end(),
+                     [](int M) { return M >= 0 && M % 2 == 1; }) &&
+        std::none_of(HiBlendMask.begin(), HiBlendMask.end(),
+                     [](int M) { return M >= 0 && M % 2 == 1; })) {
+      // Use a mask to drop the high bytes.
+      V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V);
+      V1 = DAG.getNode(ISD::AND, DL, MVT::v8i16, V1,
+                       DAG.getConstant(0x00FF, MVT::v8i16));
+
+      // This will be a single vector shuffle instead of a blend so nuke V2.
+      V2 = DAG.getUNDEF(MVT::v8i16);
+
+      // Squash the masks to point directly into V1.
+      for (int &M : LoBlendMask)
+        if (M >= 0)
+          M /= 2;
+      for (int &M : HiBlendMask)
+        if (M >= 0)
+          M /= 2;
+    } else {
+      // Otherwise just unpack the low half of V into V1 and the high half into
+      // V2 so that we can blend them as i16s.
+      V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
+                       DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V, Zero));
+      V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
+                       DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V, Zero));
+    }
+
+    SDValue BlendedLo = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, LoBlendMask);
+    SDValue BlendedHi = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, HiBlendMask);
     return std::make_pair(BlendedLo, BlendedHi);
   };
   SDValue V1Lo, V1Hi, V2Lo, V2Hi;
author	Chandler Carruth <chandlerc@gmail.com>	2014-07-10 04:34:06 +0000
committer	Chandler Carruth <chandlerc@gmail.com>	2014-07-10 04:34:06 +0000
commit	7d2ffb549285b814da284cfced222c827bffa90f (patch)
tree	f1fd9dc3ad057ad0b3eb4b56b38cf8a990f41142 /llvm/lib/Target/X86/X86ISelLowering.cpp
parent	05b9ebf2f96b7d6f4d8110195d4d728e892e84c8 (diff)
download	bcm5719-llvm-7d2ffb549285b814da284cfced222c827bffa90f.tar.gz bcm5719-llvm-7d2ffb549285b814da284cfced222c827bffa90f.zip