[X86][SSE] Combine UNPCKL with vector_shuffle into UNPCKH to save one instruction for sext from v16i8 to v16i16 and v8i16 to v8i32.

This patch is enabling combining UNPCKL with vector_shuffle that moves the upper half of a vector into the lower half, into a UNPCKH instruction. For example: t2: v16i8 = vector_shuffle<8,9,10,11,12,13,14,15,u,u,u,u,u,u,u,u> t1, undef:v16i8 t3: v16i8 = X86ISD::UNPCKL undef:v16i8, t2 will be combined to: t3: v16i8 = X86ISD::UNPCKH undef:v16i8, t1 Differential revision: http://reviews.llvm.org/D14399 llvm-svn: 253067
author: Cong Hou <congh@google.com> 2015-11-13 19:47:43 +0000
committer: Cong Hou <congh@google.com> 2015-11-13 19:47:43 +0000
commit: ef4074bac29bcc69451d13835aa0bf4cf0f6c1a2 (patch)
tree: 5264097280a42ad51fd190d68aa119efccf5dc46 /llvm/lib
parent: 8759cd393004ab675331af96dddb6ed2d11c30ac (diff)
download: bcm5719-llvm-ef4074bac29bcc69451d13835aa0bf4cf0f6c1a2.tar.gz
bcm5719-llvm-ef4074bac29bcc69451d13835aa0bf4cf0f6c1a2.zip
1 files changed, 35 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 93a4dc291f5..aef70af08a4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -22860,6 +22860,41 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
     Mask = getPSHUFShuffleMask(N);
     assert(Mask.size() == 4);
     break;
+  case X86ISD::UNPCKL: {
+    // Combine X86ISD::UNPCKL and ISD::VECTOR_SHUFFLE into X86ISD::UNPCKH, in
+    // which X86ISD::UNPCKL has a ISD::UNDEF operand, and ISD::VECTOR_SHUFFLE
+    // moves upper half elements into the lower half part. For example:
+    //
+    // t2: v16i8 = vector_shuffle<8,9,10,11,12,13,14,15,u,u,u,u,u,u,u,u> t1,
+    //     undef:v16i8
+    // t3: v16i8 = X86ISD::UNPCKL undef:v16i8, t2
+    //
+    // will be combined to:
+    //
+    // t3: v16i8 = X86ISD::UNPCKH undef:v16i8, t1
+
+    // This is only for 128-bit vectors. From SSE4.1 onward this combine may not
+    // happen due to advanced instructions.
+    if (!VT.is128BitVector())
+      return SDValue();
+
+    auto Op0 = N.getOperand(0);
+    auto Op1 = N.getOperand(1);
+    if (Op0.getOpcode() == ISD::UNDEF &&
+        Op1.getNode()->getOpcode() == ISD::VECTOR_SHUFFLE) {
+      ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op1.getNode())->getMask();
+
+      unsigned NumElts = VT.getVectorNumElements();
+      SmallVector<int, 8> ExpectedMask(NumElts, -1);
+      std::iota(ExpectedMask.begin(), ExpectedMask.begin() + NumElts / 2,
+                NumElts / 2);
+
+      auto ShufOp = Op1.getOperand(0);
+      if (isShuffleEquivalent(Op1, ShufOp, Mask, ExpectedMask))
+        return DAG.getNode(X86ISD::UNPCKH, DL, VT, N.getOperand(0), ShufOp);
+    }
+    return SDValue();
+  }
   default:
     return SDValue();
   }
author	Cong Hou <congh@google.com>	2015-11-13 19:47:43 +0000
committer	Cong Hou <congh@google.com>	2015-11-13 19:47:43 +0000
commit	ef4074bac29bcc69451d13835aa0bf4cf0f6c1a2 (patch)
tree	5264097280a42ad51fd190d68aa119efccf5dc46 /llvm/lib
parent	8759cd393004ab675331af96dddb6ed2d11c30ac (diff)
download	bcm5719-llvm-ef4074bac29bcc69451d13835aa0bf4cf0f6c1a2.tar.gz bcm5719-llvm-ef4074bac29bcc69451d13835aa0bf4cf0f6c1a2.zip