diff options
Diffstat (limited to 'llvm/test/CodeGen/X86/prefer-avx256-mask-extend.ll')
-rw-r--r-- | llvm/test/CodeGen/X86/prefer-avx256-mask-extend.ll | 40 |
1 files changed, 23 insertions, 17 deletions
diff --git a/llvm/test/CodeGen/X86/prefer-avx256-mask-extend.ll b/llvm/test/CodeGen/X86/prefer-avx256-mask-extend.ll index e78c1d2d787..dc60fd5c672 100644 --- a/llvm/test/CodeGen/X86/prefer-avx256-mask-extend.ll +++ b/llvm/test/CodeGen/X86/prefer-avx256-mask-extend.ll @@ -135,8 +135,10 @@ define <8 x i16> @testv8i1_zext_v8i16(<8 x i32>* %p) { ; AVX256: # %bb.0: ; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1 -; AVX256-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} +; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX256-NEXT: vpmovdw %ymm0, %xmm0 +; AVX256-NEXT: vpsrlw $15, %xmm0, %xmm0 ; AVX256-NEXT: vzeroupper ; AVX256-NEXT: retq ; @@ -144,8 +146,10 @@ define <8 x i16> @testv8i1_zext_v8i16(<8 x i32>* %p) { ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k1 -; AVX512VL-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} +; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0 +; AVX512VL-NEXT: vpsrlw $15, %xmm0, %xmm0 ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq ; @@ -169,15 +173,14 @@ define <16 x i8> @testv16i1_zext_v16i8(<8 x i32>* %p, <8 x i32>* %q) { ; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1 ; AVX256-NEXT: vpcmpeqd (%rsi), %ymm0, %k2 -; AVX256-NEXT: movl {{.*}}(%rip), %eax -; AVX256-NEXT: vpbroadcastd %eax, %ymm0 {%k2} {z} +; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; AVX256-NEXT: vmovdqa32 %ymm0, %ymm1 {%k2} {z} +; AVX256-NEXT: vpmovdw %ymm1, %xmm1 +; AVX256-NEXT: vpsrlw $15, %xmm1, %xmm1 +; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX256-NEXT: vpmovdw %ymm0, %xmm0 -; AVX256-NEXT: vmovdqa {{.*#+}} xmm1 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> -; AVX256-NEXT: vpshufb %xmm1, %xmm0, %xmm0 -; AVX256-NEXT: vpbroadcastd %eax, %ymm2 {%k1} {z} -; AVX256-NEXT: vpmovdw %ymm2, %xmm2 -; AVX256-NEXT: vpshufb %xmm1, %xmm2, %xmm1 -; AVX256-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX256-NEXT: vpsrlw $15, %xmm0, %xmm0 +; AVX256-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ; AVX256-NEXT: vzeroupper ; AVX256-NEXT: retq ; @@ -218,12 +221,13 @@ define <16 x i16> @testv16i1_zext_v16i16(<8 x i32>* %p, <8 x i32>* %q) { ; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1 ; AVX256-NEXT: vpcmpeqd (%rsi), %ymm0, %k2 -; AVX256-NEXT: movl {{.*}}(%rip), %eax -; AVX256-NEXT: vpbroadcastd %eax, %ymm0 {%k1} {z} -; AVX256-NEXT: vpmovdw %ymm0, %xmm0 -; AVX256-NEXT: vpbroadcastd %eax, %ymm1 {%k2} {z} +; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; AVX256-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} {z} ; AVX256-NEXT: vpmovdw %ymm1, %xmm1 -; AVX256-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k2} {z} +; AVX256-NEXT: vpmovdw %ymm0, %xmm0 +; AVX256-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; AVX256-NEXT: vpsrlw $15, %ymm0, %ymm0 ; AVX256-NEXT: retq ; ; AVX512VL-LABEL: testv16i1_zext_v16i16: @@ -232,8 +236,9 @@ define <16 x i16> @testv16i1_zext_v16i16(<8 x i32>* %p, <8 x i32>* %q) { ; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k0 ; AVX512VL-NEXT: vpcmpeqd (%rsi), %ymm0, %k1 ; AVX512VL-NEXT: kunpckbw %k0, %k1, %k1 -; AVX512VL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512VL-NEXT: vpsrlw $15, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512F-LABEL: testv16i1_zext_v16i16: @@ -243,8 +248,9 @@ define <16 x i16> @testv16i1_zext_v16i16(<8 x i32>* %p, <8 x i32>* %q) { ; AVX512F-NEXT: vmovdqa (%rsi), %ymm0 ; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k1 ; AVX512F-NEXT: kunpckbw %k0, %k1, %k1 -; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512F-NEXT: vpsrlw $15, %ymm0, %ymm0 ; AVX512F-NEXT: retq %in = load <8 x i32>, <8 x i32>* %p %cmp = icmp eq <8 x i32> %in, zeroinitializer |