diff options
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll | 18 |
2 files changed, 9 insertions, 12 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 3ffd6666f55..1c26f5789c2 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -28886,6 +28886,9 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::PSHUFD: case X86ISD::PSHUFHW: case X86ISD::PSHUFLW: + case X86ISD::MOVSHDUP: + case X86ISD::MOVSLDUP: + case X86ISD::MOVDDUP: case X86ISD::MOVSS: case X86ISD::MOVSD: case X86ISD::VPERMILPI: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll index fc8bdd01f11..d2cef39c99e 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll @@ -12,14 +12,12 @@ declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) define <4 x float> @combine_pshufb_movddup(<4 x float> %a0) { ; SSE-LABEL: combine_pshufb_movddup: ; SSE: # BB#0: -; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,7,7,7,7,1,1,1,1,3,3,3,3] -; SSE-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] +; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,7,7,7,7,5,5,5,5,7,7,7,7] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_pshufb_movddup: ; AVX: # BB#0: -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,7,7,7,7,1,1,1,1,3,3,3,3] -; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,7,7,7,7,5,5,5,5,7,7,7,7] ; AVX-NEXT: retq %1 = bitcast <4 x float> %a0 to <16 x i8> %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 5, i8 5, i8 5, i8 5, i8 7, i8 7, i8 7, i8 7, i8 1, i8 1, i8 1, i8 1, i8 3, i8 3, i8 3, i8 3>) @@ -31,14 +29,12 @@ define <4 x float> @combine_pshufb_movddup(<4 x float> %a0) { define <4 x float> @combine_pshufb_movshdup(<4 x float> %a0) { ; SSE-LABEL: combine_pshufb_movshdup: ; SSE: # BB#0: -; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,7,7,7,7,1,1,1,1,3,3,3,3] -; SSE-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[7,7,7,7,7,7,7,7,3,3,3,3,3,3,3,3] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_pshufb_movshdup: ; AVX: # BB#0: -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,7,7,7,7,1,1,1,1,3,3,3,3] -; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,7,7,7,7,7,7,7,3,3,3,3,3,3,3,3] ; AVX-NEXT: retq %1 = bitcast <4 x float> %a0 to <16 x i8> %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 5, i8 5, i8 5, i8 5, i8 7, i8 7, i8 7, i8 7, i8 1, i8 1, i8 1, i8 1, i8 3, i8 3, i8 3, i8 3>) @@ -50,14 +46,12 @@ define <4 x float> @combine_pshufb_movshdup(<4 x float> %a0) { define <4 x float> @combine_pshufb_movsldup(<4 x float> %a0) { ; SSE-LABEL: combine_pshufb_movsldup: ; SSE: # BB#0: -; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,7,7,7,7,1,1,1,1,3,3,3,3] -; SSE-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] +; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,1,1,1,1,1,1,1,1] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_pshufb_movsldup: ; AVX: # BB#0: -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,7,7,7,7,1,1,1,1,3,3,3,3] -; AVX-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,1,1,1,1,1,1,1,1] ; AVX-NEXT: retq %1 = bitcast <4 x float> %a0 to <16 x i8> %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 5, i8 5, i8 5, i8 5, i8 7, i8 7, i8 7, i8 7, i8 1, i8 1, i8 1, i8 1, i8 3, i8 3, i8 3, i8 3>) |