diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-07-06 15:34:17 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-07-06 15:34:17 +0000 |
| commit | 713600747e93574c1b3ec76d7df5b40e5d19b2e3 (patch) | |
| tree | 10897ce5eaf306ab905736e39431b411d5bea70f | |
| parent | 0c37b19331ca1f5b10463c9a1303b09446d482e8 (diff) | |
| download | bcm5719-llvm-713600747e93574c1b3ec76d7df5b40e5d19b2e3.tar.gz bcm5719-llvm-713600747e93574c1b3ec76d7df5b40e5d19b2e3.zip | |
[X86][SSE4A] Add support for shuffle combining to INSERTQI.
llvm-svn: 307268
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 16 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll | 17 |
2 files changed, 20 insertions, 13 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 60bc5a5c8e0..30353c85ccc 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -27714,6 +27714,22 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, /*AddTo*/ true); return true; } + + if (matchVectorShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) { + if (Depth == 1 && Root.getOpcode() == X86ISD::INSERTQI) + return false; // Nothing to do! + V1 = DAG.getBitcast(IntMaskVT, V1); + DCI.AddToWorklist(V1.getNode()); + V2 = DAG.getBitcast(IntMaskVT, V2); + DCI.AddToWorklist(V2.getNode()); + Res = DAG.getNode(X86ISD::INSERTQI, DL, IntMaskVT, V1, V2, + DAG.getConstant(BitLen, DL, MVT::i8), + DAG.getConstant(BitIdx, DL, MVT::i8)); + DCI.AddToWorklist(Res.getNode()); + DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), + /*AddTo*/ true); + return true; + } } // Don't try to re-form single instruction chains under any circumstances now diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll index a63c77bad1e..af69a5ac228 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll @@ -75,19 +75,10 @@ define <8 x i16> @combine_insertqi_pshufb_8i16(<8 x i16> %a0, <8 x i16> %a1) { } define <16 x i8> @combine_pshufb_insertqi_pshufb(<16 x i8> %a0, <16 x i8> %a1) { -; SSE-LABEL: combine_pshufb_insertqi_pshufb: -; SSE: # BB#0: -; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,u,u,u,u,u,u,u,u] -; SSE-NEXT: insertq {{.*#+}} xmm0 = xmm0[0],xmm1[0,1],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u] -; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[7,1,2,4,3,u,u,0,u,u,u,u,u,u,u,u] -; SSE-NEXT: retq -; -; AVX-LABEL: combine_pshufb_insertqi_pshufb: -; AVX: # BB#0: -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,u,u,u,u,u,u,u,u] -; AVX-NEXT: insertq {{.*#+}} xmm0 = xmm0[0],xmm1[0,1],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u] -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,1,2,4,3,u,u,0,u,u,u,u,u,u,u,u] -; AVX-NEXT: retq +; ALL-LABEL: combine_pshufb_insertqi_pshufb: +; ALL: # BB#0: +; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0],xmm1[0,1],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u] +; ALL-NEXT: retq %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>) %2 = shufflevector <16 x i8> %1, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 17, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> %3 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %2, <16 x i8> <i8 7, i8 1, i8 2, i8 4, i8 3, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>) |

