[X86][SSE4A] Add support for shuffle combining to EXTRQ.

llvm-svn: 307254
author: Simon Pilgrim <llvm-dev@redking.me.uk> 2017-07-06 12:22:58 +0000
committer: Simon Pilgrim <llvm-dev@redking.me.uk> 2017-07-06 12:22:58 +0000
commit: cc0f785dcab367ad737e99ff12da5b79846b48eb (patch)
tree: 801365fb12d052112b8c9705f761c8254f29be56
parent: 21fb07b7157311fd2625805d83a3b23689e554fe (diff)
download: bcm5719-llvm-cc0f785dcab367ad737e99ff12da5b79846b48eb.tar.gz
bcm5719-llvm-cc0f785dcab367ad737e99ff12da5b79846b48eb.zip
2 files changed, 40 insertions, 25 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f1269822463..02c8ec4d8f7 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -9364,7 +9364,7 @@ static bool matchVectorShuffleAsEXTRQ(MVT VT, SDValue &V1, SDValue &V2,
   int Idx = -1;
   for (int i = 0; i != Len; ++i) {
     int M = Mask[i];
-    if (M < 0)
+    if (M == SM_SentinelUndef)
       continue;
     SDValue &V = (M < Size ? V1 : V2);
     M = M % Size;
@@ -27696,6 +27696,33 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
     return true;
   }
 
+  // Annoyingly, SSE4A instructions don't map into the above match helpers.
+  if (Subtarget.hasSSE4A() && AllowIntDomain && RootSizeInBits == 128) {
+    ShuffleVT = MVT::getIntegerVT(MaskEltSizeInBits);
+    ShuffleVT = MVT::getVectorVT(ShuffleVT, NumMaskElts);
+
+    APInt Zeroable(NumMaskElts, 0);
+    for (unsigned i = 0; i != NumMaskElts; ++i)
+      if (isUndefOrZero(Mask[i]))
+        Zeroable.setBit(i);
+
+    uint64_t BitLen, BitIdx;
+    if (matchVectorShuffleAsEXTRQ(ShuffleVT, V1, V2, Mask, BitLen, BitIdx,
+                                  Zeroable)) {
+      if (Depth == 1 && Root.getOpcode() == X86ISD::EXTRQI)
+        return false; // Nothing to do!
+      V1 = DAG.getBitcast(ShuffleVT, V1);
+      DCI.AddToWorklist(V1.getNode());
+      Res = DAG.getNode(X86ISD::EXTRQI, DL, ShuffleVT, V1,
+                        DAG.getConstant(BitLen, DL, MVT::i8),
+                        DAG.getConstant(BitIdx, DL, MVT::i8));
+      DCI.AddToWorklist(Res.getNode());
+      DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
+                    /*AddTo*/ true);
+      return true;
+    }
+  }
+
   // Don't try to re-form single instruction chains under any circumstances now
   // that we've done encoding canonicalization for them.
   if (Depth < 2)
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll
index 1669317f4aa..9d9c29121f8 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll
@@ -9,30 +9,20 @@
 declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
 
 define <16 x i8> @combine_extrqi_pshufb_16i8(<16 x i8> %a0) {
-; SSE-LABEL: combine_extrqi_pshufb_16i8:
-; SSE:       # BB#0:
-; SSE-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,2],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
-; SSE-NEXT:    retq
-;
-; AVX-LABEL: combine_extrqi_pshufb_16i8:
-; AVX:       # BB#0:
-; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,2],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
-; AVX-NEXT:    retq
+; ALL-LABEL: combine_extrqi_pshufb_16i8:
+; ALL:       # BB#0:
+; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[1,2],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
+; ALL-NEXT:    retq
   %1 = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 2, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 255, i8 255, i8 255, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
   ret <16 x i8> %2
 }
 
 define <8 x i16> @combine_extrqi_pshufb_8i16(<8 x i16> %a0) {
-; SSE-LABEL: combine_extrqi_pshufb_8i16:
-; SSE:       # BB#0:
-; SSE-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
-; SSE-NEXT:    retq
-;
-; AVX-LABEL: combine_extrqi_pshufb_8i16:
-; AVX:       # BB#0:
-; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
-; AVX-NEXT:    retq
+; ALL-LABEL: combine_extrqi_pshufb_8i16:
+; ALL:       # BB#0:
+; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
+; ALL-NEXT:    retq
   %1 = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 2, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef>
   %2 = bitcast <8 x i16> %1 to <16 x i8>
   %3 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %2, <16 x i8> <i8 0, i8 1, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
@@ -43,9 +33,8 @@ define <8 x i16> @combine_extrqi_pshufb_8i16(<8 x i16> %a0) {
 define <16 x i8> @combine_insertqi_pshufb_16i8(<16 x i8> %a0, <16 x i8> %a1) {
 ; SSSE3-LABEL: combine_insertqi_pshufb_16i8:
 ; SSSE3:       # BB#0:
-; SSSE3-NEXT:    movl $65535, %eax # imm = 0xFFFF
-; SSSE3-NEXT:    movd %eax, %xmm0
-; SSSE3-NEXT:    pand %xmm1, %xmm0
+; SSSE3-NEXT:    extrq {{.*#+}} xmm1 = xmm1[0,1],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm0
 ; SSSE3-NEXT:    retq
 ;
 ; SSE42-LABEL: combine_insertqi_pshufb_16i8:
@@ -65,9 +54,8 @@ define <16 x i8> @combine_insertqi_pshufb_16i8(<16 x i8> %a0, <16 x i8> %a1) {
 define <8 x i16> @combine_insertqi_pshufb_8i16(<8 x i16> %a0, <8 x i16> %a1) {
 ; SSSE3-LABEL: combine_insertqi_pshufb_8i16:
 ; SSSE3:       # BB#0:
-; SSSE3-NEXT:    movl $65535, %eax # imm = 0xFFFF
-; SSSE3-NEXT:    movd %eax, %xmm0
-; SSSE3-NEXT:    pand %xmm1, %xmm0
+; SSSE3-NEXT:    extrq {{.*#+}} xmm1 = xmm1[0,1],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm0
 ; SSSE3-NEXT:    retq
 ;
 ; SSE42-LABEL: combine_insertqi_pshufb_8i16:
author	Simon Pilgrim <llvm-dev@redking.me.uk>	2017-07-06 12:22:58 +0000
committer	Simon Pilgrim <llvm-dev@redking.me.uk>	2017-07-06 12:22:58 +0000
commit	cc0f785dcab367ad737e99ff12da5b79846b48eb (patch)
tree	801365fb12d052112b8c9705f761c8254f29be56
parent	21fb07b7157311fd2625805d83a3b23689e554fe (diff)
download	bcm5719-llvm-cc0f785dcab367ad737e99ff12da5b79846b48eb.tar.gz bcm5719-llvm-cc0f785dcab367ad737e99ff12da5b79846b48eb.zip