summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2017-04-03 21:06:51 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2017-04-03 21:06:51 +0000
commitaf33757b5dec5f99bc78f724a2eb2cd822c14b73 (patch)
treeb79baa363e9ca2afbdb8470ff7b522eba00312f8 /llvm/test/CodeGen/X86
parent3b392bb8d85d3cd6cf265e940884394d5f25d641 (diff)
downloadbcm5719-llvm-af33757b5dec5f99bc78f724a2eb2cd822c14b73.tar.gz
bcm5719-llvm-af33757b5dec5f99bc78f724a2eb2cd822c14b73.zip
[X86][SSE]] Lower BUILD_VECTOR with repeated elts as BUILD_VECTOR + VECTOR_SHUFFLE
It can be costly to transfer from the gprs to the xmm registers and can prevent loads merging. This patch splits vXi16/vXi32/vXi64 BUILD_VECTORS that use the same operand in multiple elements into a BUILD_VECTOR with only a single insertion of each of those elements and then performs an unary shuffle to duplicate the values. There are a couple of minor regressions this patch unearths due to some missing MOVDDUP/BROADCAST folds that I will address in a future patch. Note: Now that vector shuffle lowering and combining is pretty good we should be reusing that instead of duplicating so much in LowerBUILD_VECTOR - this is the first of several patches to address this. Differential Revision: https://reviews.llvm.org/D31373 llvm-svn: 299387
Diffstat (limited to 'llvm/test/CodeGen/X86')
-rw-r--r--llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll9
-rw-r--r--llvm/test/CodeGen/X86/avx-vbroadcast.ll48
-rw-r--r--llvm/test/CodeGen/X86/avx2-vbroadcast.ll35
-rw-r--r--llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll48
-rw-r--r--llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll3
-rw-r--r--llvm/test/CodeGen/X86/vec_fp_to_int.ll2
-rw-r--r--llvm/test/CodeGen/X86/vec_int_to_fp.ll4
-rw-r--r--llvm/test/CodeGen/X86/vector-sext.ll11
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll11
-rw-r--r--llvm/test/CodeGen/X86/vshift-1.ll9
-rw-r--r--llvm/test/CodeGen/X86/vshift-2.ll9
11 files changed, 87 insertions, 102 deletions
diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
index 4a86fa22f08..d8a92f8eedd 100644
--- a/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
@@ -2425,12 +2425,9 @@ define <4 x i64> @test_mm256_set1_epi32(i32 %a0) nounwind {
define <4 x i64> @test_mm256_set1_epi64x(i64 %a0) nounwind {
; X32-LABEL: test_mm256_set1_epi64x:
; X32: # BB#0:
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: vmovd %ecx, %xmm0
-; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
-; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
-; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
+; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-NEXT: retl
;
diff --git a/llvm/test/CodeGen/X86/avx-vbroadcast.ll b/llvm/test/CodeGen/X86/avx-vbroadcast.ll
index 37b8753097c..620603925d0 100644
--- a/llvm/test/CodeGen/X86/avx-vbroadcast.ll
+++ b/llvm/test/CodeGen/X86/avx-vbroadcast.ll
@@ -6,12 +6,8 @@ define <4 x i64> @A(i64* %ptr) nounwind uwtable readnone ssp {
; X32-LABEL: A:
; X32: ## BB#0: ## %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl (%eax), %ecx
-; X32-NEXT: movl 4(%eax), %eax
-; X32-NEXT: vmovd %ecx, %xmm0
-; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
-; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
-; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
+; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-NEXT: retl
;
@@ -31,17 +27,21 @@ entry:
define <4 x i64> @A2(i64* %ptr, i64* %ptr2) nounwind uwtable readnone ssp {
; X32-LABEL: A2:
; X32: ## BB#0: ## %entry
+; X32-NEXT: pushl %esi
+; X32-NEXT: Lcfi0:
+; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: Lcfi1:
+; X32-NEXT: .cfi_offset %esi, -8
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl (%ecx), %edx
-; X32-NEXT: movl 4(%ecx), %ecx
-; X32-NEXT: movl %ecx, 4(%eax)
+; X32-NEXT: movl 4(%ecx), %esi
+; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT: movl %esi, 4(%eax)
; X32-NEXT: movl %edx, (%eax)
-; X32-NEXT: vmovd %edx, %xmm0
-; X32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
-; X32-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0
-; X32-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0
+; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: popl %esi
; X32-NEXT: retl
;
; X64-LABEL: A2:
@@ -592,12 +592,8 @@ define <2 x i64> @G(i64* %ptr) nounwind uwtable readnone ssp {
; X32-LABEL: G:
; X32: ## BB#0: ## %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl (%eax), %ecx
-; X32-NEXT: movl 4(%eax), %eax
-; X32-NEXT: vmovd %ecx, %xmm0
-; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
-; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
-; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
+; X32-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; X32-NEXT: retl
;
; X64-LABEL: G:
@@ -615,16 +611,20 @@ entry:
define <2 x i64> @G2(i64* %ptr, i64* %ptr2) nounwind uwtable readnone ssp {
; X32-LABEL: G2:
; X32: ## BB#0: ## %entry
+; X32-NEXT: pushl %esi
+; X32-NEXT: Lcfi2:
+; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: Lcfi3:
+; X32-NEXT: .cfi_offset %esi, -8
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl (%ecx), %edx
-; X32-NEXT: movl 4(%ecx), %ecx
-; X32-NEXT: movl %ecx, 4(%eax)
+; X32-NEXT: movl 4(%ecx), %esi
+; X32-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT: movl %esi, 4(%eax)
; X32-NEXT: movl %edx, (%eax)
-; X32-NEXT: vmovd %edx, %xmm0
-; X32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
-; X32-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0
-; X32-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0
+; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; X32-NEXT: popl %esi
; X32-NEXT: retl
;
; X64-LABEL: G2:
diff --git a/llvm/test/CodeGen/X86/avx2-vbroadcast.ll b/llvm/test/CodeGen/X86/avx2-vbroadcast.ll
index ba47e2ba15c..5df58f9fbae 100644
--- a/llvm/test/CodeGen/X86/avx2-vbroadcast.ll
+++ b/llvm/test/CodeGen/X86/avx2-vbroadcast.ll
@@ -189,12 +189,7 @@ define <2 x i64> @Q64(i64* %ptr) nounwind uwtable readnone ssp {
; X32-LABEL: Q64:
; X32: ## BB#0: ## %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl (%eax), %ecx
-; X32-NEXT: movl 4(%eax), %eax
-; X32-NEXT: vmovd %ecx, %xmm0
-; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
-; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
-; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
+; X32-NEXT: vpbroadcastq (%eax), %xmm0
; X32-NEXT: retl
;
; X64-LABEL: Q64:
@@ -212,13 +207,8 @@ define <4 x i64> @QQ64(i64* %ptr) nounwind uwtable readnone ssp {
; X32-LABEL: QQ64:
; X32: ## BB#0: ## %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl (%eax), %ecx
-; X32-NEXT: movl 4(%eax), %eax
-; X32-NEXT: vmovd %ecx, %xmm0
-; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
-; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
-; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT: vbroadcastsd %xmm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: QQ64:
@@ -1440,12 +1430,8 @@ define void @isel_crash_2q(i64* %cV_R.addr) {
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-NEXT: vmovaps %xmm0, (%esp)
-; X32-NEXT: movl (%eax), %ecx
-; X32-NEXT: movl 4(%eax), %eax
-; X32-NEXT: vmovd %ecx, %xmm1
-; X32-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
-; X32-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
-; X32-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
+; X32-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X32-NEXT: vpbroadcastq %xmm1, %xmm1
; X32-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
; X32-NEXT: vmovdqa %xmm1, {{[0-9]+}}(%esp)
; X32-NEXT: addl $60, %esp
@@ -1501,15 +1487,10 @@ define void @isel_crash_4q(i64* %cV_R.addr) {
; X32-NEXT: movl 8(%ebp), %eax
; X32-NEXT: vxorps %ymm0, %ymm0, %ymm0
; X32-NEXT: vmovaps %ymm0, (%esp)
-; X32-NEXT: movl (%eax), %ecx
-; X32-NEXT: movl 4(%eax), %eax
-; X32-NEXT: vmovd %ecx, %xmm1
-; X32-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
-; X32-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
-; X32-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
-; X32-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm1
+; X32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; X32-NEXT: vbroadcastsd %xmm1, %ymm1
; X32-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
-; X32-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp)
+; X32-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll b/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll
index 71417694b0d..b4aae93143a 100644
--- a/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll
+++ b/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll
@@ -1102,28 +1102,44 @@ define <4 x float> @merge_4f32_f32_2345_volatile(float* %ptr) nounwind uwtable n
;
define <4 x float> @merge_4f32_f32_X0YY(float* %ptr0, float* %ptr1) nounwind uwtable noinline ssp {
-; SSE-LABEL: merge_4f32_f32_X0YY:
-; SSE: # BB#0:
-; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
-; SSE-NEXT: retq
+; SSE2-LABEL: merge_4f32_f32_X0YY:
+; SSE2: # BB#0:
+; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: merge_4f32_f32_X0YY:
+; SSE41: # BB#0:
+; SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,mem[0],zero
+; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,2,2]
+; SSE41-NEXT: retq
;
; AVX-LABEL: merge_4f32_f32_X0YY:
; AVX: # BB#0:
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0,0]
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],zero,mem[0],zero
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,2,2]
; AVX-NEXT: retq
;
-; X32-SSE-LABEL: merge_4f32_f32_X0YY:
-; X32-SSE: # BB#0:
-; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X32-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
-; X32-SSE-NEXT: retl
+; X32-SSE1-LABEL: merge_4f32_f32_X0YY:
+; X32-SSE1: # BB#0:
+; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; X32-SSE1-NEXT: retl
+;
+; X32-SSE41-LABEL: merge_4f32_f32_X0YY:
+; X32-SSE41: # BB#0:
+; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,mem[0],zero
+; X32-SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,2,2]
+; X32-SSE41-NEXT: retl
%val0 = load float, float* %ptr0, align 4
%val1 = load float, float* %ptr1, align 4
%res0 = insertelement <4 x float> undef, float %val0, i32 0
diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
index 3071155172e..030ad7683f0 100644
--- a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
@@ -2425,10 +2425,9 @@ define <2 x i64> @test_mm_set1_epi64x(i64 %a0) nounwind {
; X32-LABEL: test_mm_set1_epi64x:
; X32: # BB#0:
; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_set1_epi64x:
diff --git a/llvm/test/CodeGen/X86/vec_fp_to_int.ll b/llvm/test/CodeGen/X86/vec_fp_to_int.ll
index a345f78e18c..8baef924521 100644
--- a/llvm/test/CodeGen/X86/vec_fp_to_int.ll
+++ b/llvm/test/CodeGen/X86/vec_fp_to_int.ll
@@ -537,7 +537,7 @@ define <4 x i32> @fptoui_4f64_to_2i32(<2 x double> %a) {
; VEX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; VEX-NEXT: vcvttsd2si %xmm0, %rax
; VEX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
-; VEX-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
+; VEX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,2,2]
; VEX-NEXT: retq
;
; AVX512F-LABEL: fptoui_4f64_to_2i32:
diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp.ll b/llvm/test/CodeGen/X86/vec_int_to_fp.ll
index 649b45712f5..35d935b107f 100644
--- a/llvm/test/CodeGen/X86/vec_int_to_fp.ll
+++ b/llvm/test/CodeGen/X86/vec_int_to_fp.ll
@@ -1177,8 +1177,8 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; SSE-NEXT: movd %xmm0, %rax
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: cvtsi2ssq %rax, %xmm0
-; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1,2,2]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
@@ -1879,8 +1879,8 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; SSE-NEXT: cvtsi2ssq %rax, %xmm1
; SSE-NEXT: addss %xmm1, %xmm1
; SSE-NEXT: .LBB41_8:
-; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,2,2]
; SSE-NEXT: retq
;
; VEX-LABEL: uitofp_4i64_to_4f32_undef:
diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll
index e9f1d1d8522..914dc7423a2 100644
--- a/llvm/test/CodeGen/X86/vector-sext.ll
+++ b/llvm/test/CodeGen/X86/vector-sext.ll
@@ -1263,14 +1263,13 @@ define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) {
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE41-NEXT: movzbl (%eax), %eax
; X32-SSE41-NEXT: movl %eax, %ecx
-; X32-SSE41-NEXT: shll $31, %ecx
+; X32-SSE41-NEXT: shll $30, %ecx
; X32-SSE41-NEXT: sarl $31, %ecx
-; X32-SSE41-NEXT: movd %ecx, %xmm0
-; X32-SSE41-NEXT: pinsrd $1, %ecx, %xmm0
-; X32-SSE41-NEXT: shll $30, %eax
+; X32-SSE41-NEXT: shll $31, %eax
; X32-SSE41-NEXT: sarl $31, %eax
-; X32-SSE41-NEXT: pinsrd $2, %eax, %xmm0
-; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm0
+; X32-SSE41-NEXT: movd %eax, %xmm0
+; X32-SSE41-NEXT: pinsrd $2, %ecx, %xmm0
+; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
; X32-SSE41-NEXT: retl
entry:
%X = load <2 x i1>, <2 x i1>* %ptr
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
index a9dff916431..91d31173933 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
@@ -318,21 +318,20 @@ define <4 x i32> @combine_vpperm_10zz32BA(<4 x i32> %a0, <4 x i32> %a1) {
ret <4 x i32> %res3
}
-; FIXME: Duplicated load in i686
define void @buildvector_v4f32_0404(float %a, float %b, <4 x float>* %ptr) {
; X32-LABEL: buildvector_v4f32_0404:
; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
-; X32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
-; X32-NEXT: vmovaps %xmm0, (%eax)
+; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; X32-NEXT: vmovapd %xmm0, (%eax)
; X32-NEXT: retl
;
; X64-LABEL: buildvector_v4f32_0404:
; X64: # BB#0:
-; X64-NEXT: vpermil2ps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[0],xmm1[0]
-; X64-NEXT: vmovaps %xmm0, (%rdi)
+; X64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; X64-NEXT: vmovapd %xmm0, (%rdi)
; X64-NEXT: retq
%v0 = insertelement <4 x float> undef, float %a, i32 0
%v1 = insertelement <4 x float> %v0, float %b, i32 1
diff --git a/llvm/test/CodeGen/X86/vshift-1.ll b/llvm/test/CodeGen/X86/vshift-1.ll
index 7ad5706592e..4bc65908ed6 100644
--- a/llvm/test/CodeGen/X86/vshift-1.ll
+++ b/llvm/test/CodeGen/X86/vshift-1.ll
@@ -28,12 +28,9 @@ define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
; X32-LABEL: shift1b:
; X32: # BB#0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
-; X32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; X32-NEXT: psllq %xmm2, %xmm0
+; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
+; X32-NEXT: psllq %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
;
diff --git a/llvm/test/CodeGen/X86/vshift-2.ll b/llvm/test/CodeGen/X86/vshift-2.ll
index f79fc5bff96..024727323ac 100644
--- a/llvm/test/CodeGen/X86/vshift-2.ll
+++ b/llvm/test/CodeGen/X86/vshift-2.ll
@@ -28,12 +28,9 @@ define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
; X32-LABEL: shift1b:
; X32: # BB#0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
-; X32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; X32-NEXT: psrlq %xmm2, %xmm0
+; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
+; X32-NEXT: psrlq %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
;
OpenPOWER on IntegriCloud