[X86][XOP] Add support for combining target shuffles to VPPERM

llvm-svn: 278114
author: Simon Pilgrim <llvm-dev@redking.me.uk> 2016-08-09 10:56:29 +0000
committer: Simon Pilgrim <llvm-dev@redking.me.uk> 2016-08-09 10:56:29 +0000
commit: aae7d4a1b6726a91fcd6441f3bbd9ca49461399f (patch)
tree: ae418b007d9b1ac024d55d52db83b189b89b6f91 /llvm
parent: 3a25d84a510c7aec668138960394c3d300259cae (diff)
download: bcm5719-llvm-aae7d4a1b6726a91fcd6441f3bbd9ca49461399f.tar.gz
bcm5719-llvm-aae7d4a1b6726a91fcd6441f3bbd9ca49461399f.zip
2 files changed, 51 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 8a7dc3555b4..77fa9ff26f2 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3844,6 +3844,7 @@ static bool isTargetShuffleVariableMask(unsigned Opcode) {
   default: return false;
   case X86ISD::PSHUFB:
   case X86ISD::VPERMILPV:
+  case X86ISD::VPPERM:
     return true;
   }
 }
@@ -25325,6 +25326,44 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
     return true;
   }
 
+  // With XOP, if we have a 128-bit binary input shuffle we can always combine
+  // to VPPERM. We match the depth requirement of PSHUFB - VPPERM is never
+  // slower than PSHUFB on targets that support both.
+  if ((Depth >= 3 || HasVariableMask) && RootVT.is128BitVector() &&
+      Subtarget.hasXOP()) {
+    // VPPERM Mask Operation
+    // Bits[4:0] - Byte Index (0 - 31)
+    // Bits[7:5] - Permute Operation (0 - Source byte, 4 - ZERO)
+    SmallVector<SDValue, 16> VPPERMMask;
+    int NumBytes = 16;
+    int Ratio = NumBytes / NumMaskElts;
+    for (int i = 0; i < NumBytes; ++i) {
+      int M = Mask[i / Ratio];
+      if (M == SM_SentinelUndef) {
+        VPPERMMask.push_back(DAG.getUNDEF(MVT::i8));
+        continue;
+      }
+      if (M == SM_SentinelZero) {
+        VPPERMMask.push_back(DAG.getConstant(128, DL, MVT::i8));
+        continue;
+      }
+      M = Ratio * M + i % Ratio;
+      VPPERMMask.push_back(DAG.getConstant(M, DL, MVT::i8));
+    }
+    MVT ByteVT = MVT::v16i8;
+    V1 = DAG.getBitcast(ByteVT, V1);
+    DCI.AddToWorklist(V1.getNode());
+    V2 = DAG.getBitcast(ByteVT, V2);
+    DCI.AddToWorklist(V2.getNode());
+    SDValue VPPERMMaskOp = DAG.getBuildVector(ByteVT, DL, VPPERMMask);
+    DCI.AddToWorklist(VPPERMMaskOp.getNode());
+    Res = DAG.getNode(X86ISD::VPPERM, DL, ByteVT, V1, V2, VPPERMMaskOp);
+    DCI.AddToWorklist(Res.getNode());
+    DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
+                  /*AddTo*/ true);
+    return true;
+  }
+
   // Failed to find any combines.
   return false;
 }
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
index 76226065fd7..aefe3d03a19 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
@@ -131,3 +131,15 @@ define <16 x i8> @combine_vpperm_as_unpckhwd(<16 x i8> %a0, <16 x i8> %a1) {
   %res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> <i8 8, i8 24, i8 9, i8 25, i8 10, i8 26, i8 11, i8 27, i8 12, i8 28, i8 13, i8 29, i8 14, i8 30, i8 15, i8 31>)
   ret <16 x i8> %res0
 }
+
+define <4 x i32> @combine_vpperm_10zz32BA(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: combine_vpperm_10zz32BA:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[2,3,0,1],zero,zero,zero,zero,xmm0[6,7,4,5],xmm1[6,7,4,5]
+; CHECK-NEXT:    retq
+  %res0 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  %res1 = bitcast <4 x i32> %res0 to <16 x i8>
+  %res2 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %res1, <16 x i8> undef, <16 x i8> <i8 2, i8 3, i8 0, i8 1, i8 128, i8 128, i8 128, i8 128, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13>)
+  %res3 = bitcast <16 x i8> %res2 to <4 x i32>
+  ret <4 x i32> %res3
+}
author	Simon Pilgrim <llvm-dev@redking.me.uk>	2016-08-09 10:56:29 +0000
committer	Simon Pilgrim <llvm-dev@redking.me.uk>	2016-08-09 10:56:29 +0000
commit	aae7d4a1b6726a91fcd6441f3bbd9ca49461399f (patch)
tree	ae418b007d9b1ac024d55d52db83b189b89b6f91 /llvm
parent	3a25d84a510c7aec668138960394c3d300259cae (diff)
download	bcm5719-llvm-aae7d4a1b6726a91fcd6441f3bbd9ca49461399f.tar.gz bcm5719-llvm-aae7d4a1b6726a91fcd6441f3bbd9ca49461399f.zip