summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2017-07-06 15:34:17 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2017-07-06 15:34:17 +0000
commit713600747e93574c1b3ec76d7df5b40e5d19b2e3 (patch)
tree10897ce5eaf306ab905736e39431b411d5bea70f
parent0c37b19331ca1f5b10463c9a1303b09446d482e8 (diff)
downloadbcm5719-llvm-713600747e93574c1b3ec76d7df5b40e5d19b2e3.tar.gz
bcm5719-llvm-713600747e93574c1b3ec76d7df5b40e5d19b2e3.zip
[X86][SSE4A] Add support for shuffle combining to INSERTQI.
llvm-svn: 307268
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp16
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll17
2 files changed, 20 insertions, 13 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 60bc5a5c8e0..30353c85ccc 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -27714,6 +27714,22 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
/*AddTo*/ true);
return true;
}
+
+ if (matchVectorShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) {
+ if (Depth == 1 && Root.getOpcode() == X86ISD::INSERTQI)
+ return false; // Nothing to do!
+ V1 = DAG.getBitcast(IntMaskVT, V1);
+ DCI.AddToWorklist(V1.getNode());
+ V2 = DAG.getBitcast(IntMaskVT, V2);
+ DCI.AddToWorklist(V2.getNode());
+ Res = DAG.getNode(X86ISD::INSERTQI, DL, IntMaskVT, V1, V2,
+ DAG.getConstant(BitLen, DL, MVT::i8),
+ DAG.getConstant(BitIdx, DL, MVT::i8));
+ DCI.AddToWorklist(Res.getNode());
+ DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
+ /*AddTo*/ true);
+ return true;
+ }
}
// Don't try to re-form single instruction chains under any circumstances now
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll
index a63c77bad1e..af69a5ac228 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll
@@ -75,19 +75,10 @@ define <8 x i16> @combine_insertqi_pshufb_8i16(<8 x i16> %a0, <8 x i16> %a1) {
}
define <16 x i8> @combine_pshufb_insertqi_pshufb(<16 x i8> %a0, <16 x i8> %a1) {
-; SSE-LABEL: combine_pshufb_insertqi_pshufb:
-; SSE: # BB#0:
-; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,u,u,u,u,u,u,u,u]
-; SSE-NEXT: insertq {{.*#+}} xmm0 = xmm0[0],xmm1[0,1],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u]
-; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[7,1,2,4,3,u,u,0,u,u,u,u,u,u,u,u]
-; SSE-NEXT: retq
-;
-; AVX-LABEL: combine_pshufb_insertqi_pshufb:
-; AVX: # BB#0:
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,u,u,u,u,u,u,u,u]
-; AVX-NEXT: insertq {{.*#+}} xmm0 = xmm0[0],xmm1[0,1],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u]
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,1,2,4,3,u,u,0,u,u,u,u,u,u,u,u]
-; AVX-NEXT: retq
+; ALL-LABEL: combine_pshufb_insertqi_pshufb:
+; ALL: # BB#0:
+; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0],xmm1[0,1],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u]
+; ALL-NEXT: retq
%1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
%2 = shufflevector <16 x i8> %1, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 17, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%3 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %2, <16 x i8> <i8 7, i8 1, i8 2, i8 4, i8 3, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
OpenPOWER on IntegriCloud