diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-02-09 05:54:34 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-02-09 05:54:34 +0000 |
| commit | 28166a877d5e4e886c2c254e8f68197b644b62a6 (patch) | |
| tree | 103ee20b1d0f1d0a18df0e7a4b42b6e5bbf873a4 /llvm/test/CodeGen | |
| parent | 090e41d0cc657ac03c638ceb1610b628a9ef65d1 (diff) | |
| download | bcm5719-llvm-28166a877d5e4e886c2c254e8f68197b644b62a6.tar.gz bcm5719-llvm-28166a877d5e4e886c2c254e8f68197b644b62a6.zip | |
[X86] Teach shuffle lowering to recognize 128/256 bit insertions into a zero vector.
This regresses a couple cases in the shuffle combining test. But those cases use intrinsics that InstCombine knows how to turn into a generic shuffle earlier. This should give opportunities to fold this earlier in InstCombine or DAG combine.
llvm-svn: 324709
Diffstat (limited to 'llvm/test/CodeGen')
| -rw-r--r-- | llvm/test/CodeGen/X86/avx-vperm2x128.ll | 12 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-mask-op.ll | 12 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll | 10 |
4 files changed, 16 insertions, 24 deletions
diff --git a/llvm/test/CodeGen/X86/avx-vperm2x128.ll b/llvm/test/CodeGen/X86/avx-vperm2x128.ll index 9d61f743875..d86d2cc4129 100644 --- a/llvm/test/CodeGen/X86/avx-vperm2x128.ll +++ b/llvm/test/CodeGen/X86/avx-vperm2x128.ll @@ -435,8 +435,7 @@ define <4 x double> @shuffle_v4f64_zz67_optsize(<4 x double> %a) optsize { define <4 x double> @shuffle_v4f64_01zz(<4 x double> %a) { ; ALL-LABEL: shuffle_v4f64_01zz: ; ALL: # %bb.0: -; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: vmovaps %xmm0, %xmm0 ; ALL-NEXT: retq %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 0, i32 1, i32 4, i32 5> ret <4 x double> %s @@ -444,8 +443,7 @@ define <4 x double> @shuffle_v4f64_01zz(<4 x double> %a) { define <4 x double> @shuffle_v4f64_01zz_optsize(<4 x double> %a) optsize { ; ALL-LABEL: shuffle_v4f64_01zz_optsize: ; ALL: # %bb.0: -; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: vmovaps %xmm0, %xmm0 ; ALL-NEXT: retq %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 0, i32 1, i32 4, i32 5> ret <4 x double> %s @@ -471,8 +469,7 @@ define <4 x double> @shuffle_v4f64_23zz_optsize(<4 x double> %a) optsize { define <4 x double> @shuffle_v4f64_45zz(<4 x double> %a) { ; ALL-LABEL: shuffle_v4f64_45zz: ; ALL: # %bb.0: -; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: vmovaps %xmm0, %xmm0 ; ALL-NEXT: retq %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 4, i32 5, i32 0, i32 1> ret <4 x double> %s @@ -480,8 +477,7 @@ define <4 x double> @shuffle_v4f64_45zz(<4 x double> %a) { define <4 x double> @shuffle_v4f64_45zz_optsize(<4 x double> %a) optsize { ; ALL-LABEL: shuffle_v4f64_45zz_optsize: ; ALL: # %bb.0: -; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: vmovaps %xmm0, %xmm0 ; ALL-NEXT: retq %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 4, i32 5, i32 0, i32 1> ret <4 x double> %s diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll index 6e9b286810f..40d91356843 100644 --- a/llvm/test/CodeGen/X86/avx512-mask-op.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll @@ -2759,8 +2759,7 @@ define <8 x i64> @mask_widening(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i ; KNL-NEXT: kshiftlw $12, %k0, %k0 ; KNL-NEXT: kshiftrw $12, %k0, %k1 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; KNL-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,7],zmm1[8,9,10,11,12,13,14,15] +; KNL-NEXT: vmovdqa %ymm0, %ymm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 ; KNL-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1} ; KNL-NEXT: retq @@ -2769,8 +2768,7 @@ define <8 x i64> @mask_widening(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i ; SKX: ## %bb.0: ## %entry ; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 ; SKX-NEXT: vpmovm2d %k0, %zmm0 -; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,7],zmm1[8,9,10,11,12,13,14,15] +; SKX-NEXT: vmovdqa %ymm0, %ymm0 ; SKX-NEXT: vpmovd2m %zmm0, %k1 ; SKX-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1} ; SKX-NEXT: retq @@ -2783,8 +2781,7 @@ define <8 x i64> @mask_widening(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i ; AVX512BW-NEXT: kshiftlw $12, %k0, %k0 ; AVX512BW-NEXT: kshiftrw $12, %k0, %k1 ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,7],zmm1[8,9,10,11,12,13,14,15] +; AVX512BW-NEXT: vmovdqa %ymm0, %ymm0 ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k1 ; AVX512BW-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1} ; AVX512BW-NEXT: retq @@ -2797,8 +2794,7 @@ define <8 x i64> @mask_widening(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i ; AVX512DQ-NEXT: kshiftlb $4, %k0, %k0 ; AVX512DQ-NEXT: kshiftrb $4, %k0, %k0 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 -; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512DQ-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,7],zmm1[8,9,10,11,12,13,14,15] +; AVX512DQ-NEXT: vmovdqa %ymm0, %ymm0 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1 ; AVX512DQ-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1} ; AVX512DQ-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll index ca72ea0eea6..7a288d9db4d 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll @@ -2783,14 +2783,12 @@ define <8 x i64> @test_v8i64_insert_zero_128(<8 x i64> %a) { define <8 x i64> @test_v8i64_insert_zero_256(<8 x i64> %a) { ; AVX512F-LABEL: test_v8i64_insert_zero_256: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-NEXT: vmovaps %ymm0, %ymm0 ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: test_v8i64_insert_zero_256: ; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[0,1,0,1] +; AVX512F-32-NEXT: vmovaps %ymm0, %ymm0 ; AVX512F-32-NEXT: retl %res = shufflevector <8 x i64> %a, <8 x i64> <i64 0, i64 0, i64 0, i64 0, i64 undef, i64 undef, i64 undef, i64 undef>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9> ret <8 x i64> %res diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll index cd2a25c69b1..2b503c3694a 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll @@ -203,14 +203,16 @@ define <8 x float> @combine_vpermilvar_vperm2f128_zero_8f32(<8 x float> %a0) { define <4 x double> @combine_vperm2f128_vpermilvar_as_vpblendpd(<4 x double> %a0) { ; X32-LABEL: combine_vperm2f128_vpermilvar_as_vpblendpd: ; X32: # %bb.0: -; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; X32-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; X32-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] +; X32-NEXT: vmovapd %xmm0, %xmm0 +; X32-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] ; X32-NEXT: retl ; ; X64-LABEL: combine_vperm2f128_vpermilvar_as_vpblendpd: ; X64: # %bb.0: -; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; X64-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] +; X64-NEXT: vmovapd %xmm0, %xmm0 +; X64-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] ; X64-NEXT: retq %1 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 2, i64 0>) %2 = shufflevector <4 x double> %1, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5> |

