diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-02-15 17:41:33 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-02-15 17:41:33 +0000 |
commit | da25d5c7b60aa32adc3e4175718aac5ac87cc0f7 (patch) | |
tree | 3d0f71f6490eddb2c45cac798488e8f1b2b14602 /llvm/test | |
parent | f9e7b3caba516685a52137878170e36e7e0fda51 (diff) | |
download | bcm5719-llvm-da25d5c7b60aa32adc3e4175718aac5ac87cc0f7.tar.gz bcm5719-llvm-da25d5c7b60aa32adc3e4175718aac5ac87cc0f7.zip |
[X86][SSE] Propagate undef upper elements from scalar_to_vector during shuffle combining
Only do this for integer types currently - floats types (in particular insertps) load folding often fails with this.
llvm-svn: 295208
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/CodeGen/X86/shrink_vmul.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vec_extract-mmx.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vec_int_to_fp.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-half-conversions.ll | 56 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-sext.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/widen_conv-3.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/widen_conv-4.ll | 4 |
7 files changed, 30 insertions, 46 deletions
diff --git a/llvm/test/CodeGen/X86/shrink_vmul.ll b/llvm/test/CodeGen/X86/shrink_vmul.ll index d7e99afb2f5..930af226b95 100644 --- a/llvm/test/CodeGen/X86/shrink_vmul.ll +++ b/llvm/test/CodeGen/X86/shrink_vmul.ll @@ -449,7 +449,7 @@ define void @mul_2xi16_sext_zext(i8* nocapture readonly %a, i8* nocapture readon ; CHECK: # BB#0: # %entry ; CHECK-NEXT: movq {{.*}}(%rip), %rax ; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] ; CHECK-NEXT: psrad $16, %xmm0 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] ; CHECK-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero @@ -835,7 +835,7 @@ define void @mul_2xi16_varconst4(i8* nocapture readonly %a, i64 %index) { ; CHECK: # BB#0: # %entry ; CHECK-NEXT: movq {{.*}}(%rip), %rax ; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] ; CHECK-NEXT: psrad $16, %xmm0 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] ; CHECK-NEXT: movl $32768, %ecx # imm = 0x8000 diff --git a/llvm/test/CodeGen/X86/vec_extract-mmx.ll b/llvm/test/CodeGen/X86/vec_extract-mmx.ll index ed957728aef..e99d6fd18f9 100644 --- a/llvm/test/CodeGen/X86/vec_extract-mmx.ll +++ b/llvm/test/CodeGen/X86/vec_extract-mmx.ll @@ -150,7 +150,7 @@ define i32 @test4(x86_mmx %a) nounwind { ; X32-NEXT: subl $8, %esp ; X32-NEXT: movq %mm0, (%esp) ; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3,0,1] +; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,0,1] ; X32-NEXT: movd %xmm0, %eax ; X32-NEXT: movl %ebp, %esp ; X32-NEXT: popl %ebp diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp.ll b/llvm/test/CodeGen/X86/vec_int_to_fp.ll index d3f286d9fae..1c2def445a5 100644 --- a/llvm/test/CodeGen/X86/vec_int_to_fp.ll +++ b/llvm/test/CodeGen/X86/vec_int_to_fp.ll @@ -2661,7 +2661,7 @@ define <2 x double> @sitofp_load_2i16_to_2f64(<2 x i16> *%a) { ; SSE-LABEL: sitofp_load_2i16_to_2f64: ; SSE: # BB#0: ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] ; SSE-NEXT: psrad $16, %xmm0 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 ; SSE-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-half-conversions.ll b/llvm/test/CodeGen/X86/vector-half-conversions.ll index c870022b26d..f62c25051e4 100644 --- a/llvm/test/CodeGen/X86/vector-half-conversions.ll +++ b/llvm/test/CodeGen/X86/vector-half-conversions.ll @@ -2822,7 +2822,7 @@ define <8 x i16> @cvt_4f32_to_8i16_undef(<4 x float> %a0) nounwind { ; AVX1-NEXT: shlq $32, %rdx ; AVX1-NEXT: orq %rcx, %rdx ; AVX1-NEXT: vmovq %rdx, %xmm0 -; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX1-NEXT: retq ; ; AVX2-LABEL: cvt_4f32_to_8i16_undef: @@ -2847,7 +2847,7 @@ define <8 x i16> @cvt_4f32_to_8i16_undef(<4 x float> %a0) nounwind { ; AVX2-NEXT: shlq $32, %rdx ; AVX2-NEXT: orq %rcx, %rdx ; AVX2-NEXT: vmovq %rdx, %xmm0 -; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX2-NEXT: retq ; ; AVX512F-LABEL: cvt_4f32_to_8i16_undef: @@ -2873,7 +2873,7 @@ define <8 x i16> @cvt_4f32_to_8i16_undef(<4 x float> %a0) nounwind { ; AVX512F-NEXT: shlq $32, %rdx ; AVX512F-NEXT: orq %rcx, %rdx ; AVX512F-NEXT: vmovq %rdx, %xmm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: cvt_4f32_to_8i16_undef: @@ -2899,7 +2899,6 @@ define <8 x i16> @cvt_4f32_to_8i16_undef(<4 x float> %a0) nounwind { ; AVX512VL-NEXT: orq %rcx, %rdx ; AVX512VL-NEXT: vmovq %rdx, %xmm0 ; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] -; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; AVX512VL-NEXT: retq %1 = fptrunc <4 x float> %a0 to <4 x half> @@ -2931,7 +2930,7 @@ define <8 x i16> @cvt_4f32_to_8i16_zero(<4 x float> %a0) nounwind { ; AVX1-NEXT: shlq $32, %rdx ; AVX1-NEXT: orq %rcx, %rdx ; AVX1-NEXT: vmovq %rdx, %xmm0 -; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero ; AVX1-NEXT: retq ; ; AVX2-LABEL: cvt_4f32_to_8i16_zero: @@ -2956,7 +2955,7 @@ define <8 x i16> @cvt_4f32_to_8i16_zero(<4 x float> %a0) nounwind { ; AVX2-NEXT: shlq $32, %rdx ; AVX2-NEXT: orq %rcx, %rdx ; AVX2-NEXT: vmovq %rdx, %xmm0 -; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: retq ; ; AVX512F-LABEL: cvt_4f32_to_8i16_zero: @@ -2982,7 +2981,7 @@ define <8 x i16> @cvt_4f32_to_8i16_zero(<4 x float> %a0) nounwind { ; AVX512F-NEXT: shlq $32, %rdx ; AVX512F-NEXT: orq %rcx, %rdx ; AVX512F-NEXT: vmovq %rdx, %xmm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero +; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: cvt_4f32_to_8i16_zero: @@ -3008,7 +3007,6 @@ define <8 x i16> @cvt_4f32_to_8i16_zero(<4 x float> %a0) nounwind { ; AVX512VL-NEXT: orq %rcx, %rdx ; AVX512VL-NEXT: vmovq %rdx, %xmm0 ; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] -; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2] ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] @@ -3631,7 +3629,7 @@ define void @store_cvt_4f32_to_8i16_undef(<4 x float> %a0, <8 x i16>* %a1) nounw ; AVX1-NEXT: shlq $32, %rdx ; AVX1-NEXT: orq %rcx, %rdx ; AVX1-NEXT: vmovq %rdx, %xmm0 -; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX1-NEXT: vmovdqa %xmm0, (%rdi) ; AVX1-NEXT: retq ; @@ -3657,7 +3655,7 @@ define void @store_cvt_4f32_to_8i16_undef(<4 x float> %a0, <8 x i16>* %a1) nounw ; AVX2-NEXT: shlq $32, %rdx ; AVX2-NEXT: orq %rcx, %rdx ; AVX2-NEXT: vmovq %rdx, %xmm0 -; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX2-NEXT: vmovdqa %xmm0, (%rdi) ; AVX2-NEXT: retq ; @@ -3684,7 +3682,7 @@ define void @store_cvt_4f32_to_8i16_undef(<4 x float> %a0, <8 x i16>* %a1) nounw ; AVX512F-NEXT: shlq $32, %rdx ; AVX512F-NEXT: orq %rcx, %rdx ; AVX512F-NEXT: vmovq %rdx, %xmm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512F-NEXT: vmovdqa %xmm0, (%rdi) ; AVX512F-NEXT: retq ; @@ -3711,7 +3709,6 @@ define void @store_cvt_4f32_to_8i16_undef(<4 x float> %a0, <8 x i16>* %a1) nounw ; AVX512VL-NEXT: orq %rcx, %rdx ; AVX512VL-NEXT: vmovq %rdx, %xmm0 ; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] -; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; AVX512VL-NEXT: vmovdqa %xmm0, (%rdi) ; AVX512VL-NEXT: retq @@ -3745,7 +3742,7 @@ define void @store_cvt_4f32_to_8i16_zero(<4 x float> %a0, <8 x i16>* %a1) nounwi ; AVX1-NEXT: shlq $32, %rdx ; AVX1-NEXT: orq %rcx, %rdx ; AVX1-NEXT: vmovq %rdx, %xmm0 -; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero ; AVX1-NEXT: vmovdqa %xmm0, (%rdi) ; AVX1-NEXT: retq ; @@ -3771,7 +3768,7 @@ define void @store_cvt_4f32_to_8i16_zero(<4 x float> %a0, <8 x i16>* %a1) nounwi ; AVX2-NEXT: shlq $32, %rdx ; AVX2-NEXT: orq %rcx, %rdx ; AVX2-NEXT: vmovq %rdx, %xmm0 -; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vmovdqa %xmm0, (%rdi) ; AVX2-NEXT: retq ; @@ -3798,7 +3795,7 @@ define void @store_cvt_4f32_to_8i16_zero(<4 x float> %a0, <8 x i16>* %a1) nounwi ; AVX512F-NEXT: shlq $32, %rdx ; AVX512F-NEXT: orq %rcx, %rdx ; AVX512F-NEXT: vmovq %rdx, %xmm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero +; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vmovdqa %xmm0, (%rdi) ; AVX512F-NEXT: retq ; @@ -3825,7 +3822,6 @@ define void @store_cvt_4f32_to_8i16_zero(<4 x float> %a0, <8 x i16>* %a1) nounwi ; AVX512VL-NEXT: orq %rcx, %rdx ; AVX512VL-NEXT: vmovq %rdx, %xmm0 ; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] -; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2] ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] @@ -4477,7 +4473,7 @@ define <8 x i16> @cvt_4f64_to_8i16_undef(<4 x double> %a0) nounwind { ; AVX1-NEXT: shlq $32, %rax ; AVX1-NEXT: orq %r14, %rax ; AVX1-NEXT: vmovq %rax, %xmm0 -; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX1-NEXT: addq $40, %rsp ; AVX1-NEXT: popq %rbx ; AVX1-NEXT: popq %r14 @@ -4515,7 +4511,7 @@ define <8 x i16> @cvt_4f64_to_8i16_undef(<4 x double> %a0) nounwind { ; AVX2-NEXT: shlq $32, %rax ; AVX2-NEXT: orq %r14, %rax ; AVX2-NEXT: vmovq %rax, %xmm0 -; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX2-NEXT: addq $40, %rsp ; AVX2-NEXT: popq %rbx ; AVX2-NEXT: popq %r14 @@ -4550,7 +4546,7 @@ define <8 x i16> @cvt_4f64_to_8i16_undef(<4 x double> %a0) nounwind { ; AVX512F-NEXT: shlq $32, %rax ; AVX512F-NEXT: orq %r14, %rax ; AVX512F-NEXT: vmovq %rax, %xmm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512F-NEXT: addq $40, %rsp ; AVX512F-NEXT: popq %rbx ; AVX512F-NEXT: popq %r14 @@ -4586,7 +4582,6 @@ define <8 x i16> @cvt_4f64_to_8i16_undef(<4 x double> %a0) nounwind { ; AVX512VL-NEXT: orq %r14, %rax ; AVX512VL-NEXT: vmovq %rax, %xmm0 ; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] -; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; AVX512VL-NEXT: addq $40, %rsp ; AVX512VL-NEXT: popq %rbx @@ -4631,7 +4626,7 @@ define <8 x i16> @cvt_4f64_to_8i16_zero(<4 x double> %a0) nounwind { ; AVX1-NEXT: shlq $32, %rax ; AVX1-NEXT: orq %r14, %rax ; AVX1-NEXT: vmovq %rax, %xmm0 -; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero ; AVX1-NEXT: addq $40, %rsp ; AVX1-NEXT: popq %rbx ; AVX1-NEXT: popq %r14 @@ -4669,7 +4664,7 @@ define <8 x i16> @cvt_4f64_to_8i16_zero(<4 x double> %a0) nounwind { ; AVX2-NEXT: shlq $32, %rax ; AVX2-NEXT: orq %r14, %rax ; AVX2-NEXT: vmovq %rax, %xmm0 -; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: addq $40, %rsp ; AVX2-NEXT: popq %rbx ; AVX2-NEXT: popq %r14 @@ -4704,7 +4699,7 @@ define <8 x i16> @cvt_4f64_to_8i16_zero(<4 x double> %a0) nounwind { ; AVX512F-NEXT: shlq $32, %rax ; AVX512F-NEXT: orq %r14, %rax ; AVX512F-NEXT: vmovq %rax, %xmm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero +; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: addq $40, %rsp ; AVX512F-NEXT: popq %rbx ; AVX512F-NEXT: popq %r14 @@ -4740,7 +4735,6 @@ define <8 x i16> @cvt_4f64_to_8i16_zero(<4 x double> %a0) nounwind { ; AVX512VL-NEXT: orq %r14, %rax ; AVX512VL-NEXT: vmovq %rax, %xmm0 ; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] -; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2] ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] @@ -5250,7 +5244,7 @@ define void @store_cvt_4f64_to_8i16_undef(<4 x double> %a0, <8 x i16>* %a1) noun ; AVX1-NEXT: shlq $32, %rax ; AVX1-NEXT: orq %rbx, %rax ; AVX1-NEXT: vmovq %rax, %xmm0 -; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX1-NEXT: vmovdqa %xmm0, (%r14) ; AVX1-NEXT: addq $32, %rsp ; AVX1-NEXT: popq %rbx @@ -5292,7 +5286,7 @@ define void @store_cvt_4f64_to_8i16_undef(<4 x double> %a0, <8 x i16>* %a1) noun ; AVX2-NEXT: shlq $32, %rax ; AVX2-NEXT: orq %rbx, %rax ; AVX2-NEXT: vmovq %rax, %xmm0 -; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX2-NEXT: vmovdqa %xmm0, (%r14) ; AVX2-NEXT: addq $32, %rsp ; AVX2-NEXT: popq %rbx @@ -5331,7 +5325,7 @@ define void @store_cvt_4f64_to_8i16_undef(<4 x double> %a0, <8 x i16>* %a1) noun ; AVX512F-NEXT: shlq $32, %rax ; AVX512F-NEXT: orq %rbx, %rax ; AVX512F-NEXT: vmovq %rax, %xmm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512F-NEXT: vmovdqa %xmm0, (%r14) ; AVX512F-NEXT: addq $32, %rsp ; AVX512F-NEXT: popq %rbx @@ -5371,7 +5365,6 @@ define void @store_cvt_4f64_to_8i16_undef(<4 x double> %a0, <8 x i16>* %a1) noun ; AVX512VL-NEXT: orq %rbx, %rax ; AVX512VL-NEXT: vmovq %rax, %xmm0 ; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] -; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; AVX512VL-NEXT: vmovdqa %xmm0, (%r14) ; AVX512VL-NEXT: addq $32, %rsp @@ -5421,7 +5414,7 @@ define void @store_cvt_4f64_to_8i16_zero(<4 x double> %a0, <8 x i16>* %a1) nounw ; AVX1-NEXT: shlq $32, %rax ; AVX1-NEXT: orq %rbx, %rax ; AVX1-NEXT: vmovq %rax, %xmm0 -; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero ; AVX1-NEXT: vmovdqa %xmm0, (%r14) ; AVX1-NEXT: addq $32, %rsp ; AVX1-NEXT: popq %rbx @@ -5463,7 +5456,7 @@ define void @store_cvt_4f64_to_8i16_zero(<4 x double> %a0, <8 x i16>* %a1) nounw ; AVX2-NEXT: shlq $32, %rax ; AVX2-NEXT: orq %rbx, %rax ; AVX2-NEXT: vmovq %rax, %xmm0 -; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vmovdqa %xmm0, (%r14) ; AVX2-NEXT: addq $32, %rsp ; AVX2-NEXT: popq %rbx @@ -5502,7 +5495,7 @@ define void @store_cvt_4f64_to_8i16_zero(<4 x double> %a0, <8 x i16>* %a1) nounw ; AVX512F-NEXT: shlq $32, %rax ; AVX512F-NEXT: orq %rbx, %rax ; AVX512F-NEXT: vmovq %rax, %xmm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero +; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vmovdqa %xmm0, (%r14) ; AVX512F-NEXT: addq $32, %rsp ; AVX512F-NEXT: popq %rbx @@ -5542,7 +5535,6 @@ define void @store_cvt_4f64_to_8i16_zero(<4 x double> %a0, <8 x i16>* %a1) nounw ; AVX512VL-NEXT: orq %rbx, %rax ; AVX512VL-NEXT: vmovq %rax, %xmm0 ; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] -; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2] ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll index 774d615ae89..3ab9e231c12 100644 --- a/llvm/test/CodeGen/X86/vector-sext.ll +++ b/llvm/test/CodeGen/X86/vector-sext.ll @@ -4435,7 +4435,7 @@ define <2 x i64> @load_sext_2i16_to_2i64(<2 x i16> *%ptr) { ; SSE2-LABEL: load_sext_2i16_to_2i64: ; SSE2: # BB#0: # %entry ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrad $31, %xmm1 ; SSE2-NEXT: psrad $16, %xmm0 @@ -4445,7 +4445,7 @@ define <2 x i64> @load_sext_2i16_to_2i64(<2 x i16> *%ptr) { ; SSSE3-LABEL: load_sext_2i16_to_2i64: ; SSSE3: # BB#0: # %entry ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] ; SSSE3-NEXT: movdqa %xmm0, %xmm1 ; SSSE3-NEXT: psrad $31, %xmm1 ; SSSE3-NEXT: psrad $16, %xmm0 diff --git a/llvm/test/CodeGen/X86/widen_conv-3.ll b/llvm/test/CodeGen/X86/widen_conv-3.ll index 4ae19b8f5d2..504485440ef 100644 --- a/llvm/test/CodeGen/X86/widen_conv-3.ll +++ b/llvm/test/CodeGen/X86/widen_conv-3.ll @@ -106,8 +106,6 @@ define void @convert_v3i8_to_v3f32(<3 x float>* %dst.addr, <3 x i8>* %src.addr) ; X64-SSE2: # BB#0: # %entry ; X64-SSE2-NEXT: movzwl (%rsi), %eax ; X64-SSE2-NEXT: movd %rax, %xmm0 -; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; X64-SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; X64-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; X64-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; X64-SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) @@ -132,8 +130,6 @@ define void @convert_v3i8_to_v3f32(<3 x float>* %dst.addr, <3 x i8>* %src.addr) ; X64-SSE42-NEXT: movzbl 2(%rsi), %eax ; X64-SSE42-NEXT: movzwl (%rsi), %ecx ; X64-SSE42-NEXT: movd %rcx, %xmm0 -; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; X64-SSE42-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; X64-SSE42-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; X64-SSE42-NEXT: pinsrd $2, %eax, %xmm0 ; X64-SSE42-NEXT: pslld $24, %xmm0 diff --git a/llvm/test/CodeGen/X86/widen_conv-4.ll b/llvm/test/CodeGen/X86/widen_conv-4.ll index e574407f980..ef56692e947 100644 --- a/llvm/test/CodeGen/X86/widen_conv-4.ll +++ b/llvm/test/CodeGen/X86/widen_conv-4.ll @@ -131,8 +131,6 @@ define void @convert_v3i8_to_v3f32(<3 x float>* %dst.addr, <3 x i8>* %src.addr) ; X64-SSE2: # BB#0: # %entry ; X64-SSE2-NEXT: movzwl (%rsi), %eax ; X64-SSE2-NEXT: movd %rax, %xmm0 -; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; X64-SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; X64-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; X64-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; X64-SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) @@ -157,8 +155,6 @@ define void @convert_v3i8_to_v3f32(<3 x float>* %dst.addr, <3 x i8>* %src.addr) ; X64-SSE42-NEXT: movzbl 2(%rsi), %eax ; X64-SSE42-NEXT: movzwl (%rsi), %ecx ; X64-SSE42-NEXT: movd %rcx, %xmm0 -; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; X64-SSE42-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; X64-SSE42-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; X64-SSE42-NEXT: pinsrd $2, %eax, %xmm0 ; X64-SSE42-NEXT: pand {{.*}}(%rip), %xmm0 |