diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-01-14 02:05:51 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-01-14 02:05:51 +0000 |
| commit | e9fc0cd920f98e6726f5874634c2a5d4fa0e6f5c (patch) | |
| tree | 8bd0dff17227e31e392a4de88f6d665b0ed28353 /llvm/test/CodeGen/X86 | |
| parent | 7a3b10184bd39d234e4c0a9ed41be2b110d0048d (diff) | |
| download | bcm5719-llvm-e9fc0cd920f98e6726f5874634c2a5d4fa0e6f5c.tar.gz bcm5719-llvm-e9fc0cd920f98e6726f5874634c2a5d4fa0e6f5c.zip | |
[X86] Improve legalization of vXi16/vXi8 selects.
Extend vXi1 conditions of vXi8/vXi16 selects even before type legalization gets a chance to split wide vectors. Previously we would only extend 128 and 256 bit vectors. But if we start with a 512 bit vector or wider that needs to be split we wouldn't extend until after the split had taken place. By extending early we improve the results of type legalization.
Don't widen condition of 128/256 bit vXi16/vXi8 selects when we have BWI but not VLX. We can still use a mask register by widening the select to 512-bits instead. This is similar to what we do for compares already.
llvm-svn: 322450
Diffstat (limited to 'llvm/test/CodeGen/X86')
| -rw-r--r-- | llvm/test/CodeGen/X86/avg-mask.ll | 26 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-ext.ll | 68 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-mask-op.ll | 20 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-vec-cmp.ll | 24 |
4 files changed, 70 insertions, 68 deletions
diff --git a/llvm/test/CodeGen/X86/avg-mask.ll b/llvm/test/CodeGen/X86/avg-mask.ll index d32b0e70791..ae466b68825 100644 --- a/llvm/test/CodeGen/X86/avg-mask.ll +++ b/llvm/test/CodeGen/X86/avg-mask.ll @@ -139,28 +139,27 @@ define <64 x i8> @avg_v64i8_mask(<64 x i8> %a, <64 x i8> %b, <64 x i8> %src, i64 ; AVX512F-NEXT: movq %rsp, %rbp ; AVX512F-NEXT: andq $-32, %rsp ; AVX512F-NEXT: subq $64, %rsp -; AVX512F-NEXT: movq %rdi, %rax -; AVX512F-NEXT: shrq $32, %rax -; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp) ; AVX512F-NEXT: movl %edi, (%rsp) +; AVX512F-NEXT: shrq $32, %rdi +; AVX512F-NEXT: movl %edi, {{[0-9]+}}(%rsp) ; AVX512F-NEXT: vpavgb %ymm3, %ymm1, %ymm1 ; AVX512F-NEXT: vpavgb %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 +; AVX512F-NEXT: kmovw (%rsp), %k1 ; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 ; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} ; AVX512F-NEXT: vpmovdb %zmm3, %xmm3 ; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 -; AVX512F-NEXT: vpblendvb %ymm2, %ymm1, %ymm5, %ymm1 -; AVX512F-NEXT: kmovw (%rsp), %k1 +; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm4, %ymm0 +; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 ; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} ; AVX512F-NEXT: vpmovdb %zmm3, %xmm3 ; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 -; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm4, %ymm0 +; AVX512F-NEXT: vpblendvb %ymm2, %ymm1, %ymm5, %ymm1 ; AVX512F-NEXT: movq %rbp, %rsp ; AVX512F-NEXT: popq %rbp ; AVX512F-NEXT: retq @@ -189,28 +188,27 @@ define <64 x i8> @avg_v64i8_maskz(<64 x i8> %a, <64 x i8> %b, i64 %mask) nounwin ; AVX512F-NEXT: movq %rsp, %rbp ; AVX512F-NEXT: andq $-32, %rsp ; AVX512F-NEXT: subq $64, %rsp -; AVX512F-NEXT: movq %rdi, %rax -; AVX512F-NEXT: shrq $32, %rax -; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp) ; AVX512F-NEXT: movl %edi, (%rsp) +; AVX512F-NEXT: shrq $32, %rdi +; AVX512F-NEXT: movl %edi, {{[0-9]+}}(%rsp) ; AVX512F-NEXT: vpavgb %ymm3, %ymm1, %ymm1 ; AVX512F-NEXT: vpavgb %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 +; AVX512F-NEXT: kmovw (%rsp), %k1 ; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 ; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} ; AVX512F-NEXT: vpmovdb %zmm3, %xmm3 ; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 -; AVX512F-NEXT: vpand %ymm1, %ymm2, %ymm1 -; AVX512F-NEXT: kmovw (%rsp), %k1 +; AVX512F-NEXT: vpand %ymm0, %ymm2, %ymm0 +; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 ; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} ; AVX512F-NEXT: vpmovdb %zmm3, %xmm3 ; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 -; AVX512F-NEXT: vpand %ymm0, %ymm2, %ymm0 +; AVX512F-NEXT: vpand %ymm1, %ymm2, %ymm1 ; AVX512F-NEXT: movq %rbp, %rsp ; AVX512F-NEXT: popq %rbp ; AVX512F-NEXT: retq diff --git a/llvm/test/CodeGen/X86/avx512-ext.ll b/llvm/test/CodeGen/X86/avx512-ext.ll index e3bde5d327e..5a6a2f09d77 100644 --- a/llvm/test/CodeGen/X86/avx512-ext.ll +++ b/llvm/test/CodeGen/X86/avx512-ext.ll @@ -152,18 +152,17 @@ define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwi define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone { ; KNL-LABEL: zext_32x8mem_to_32x16: ; KNL: # %bb.0: -; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero -; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero -; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; KNL-NEXT: vpsllw $15, %ymm3, %ymm3 -; KNL-NEXT: vpsraw $15, %ymm3, %ymm3 -; KNL-NEXT: vpand %ymm2, %ymm3, %ymm2 -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 +; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero +; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0 -; KNL-NEXT: vpand %ymm1, %ymm0, %ymm1 -; KNL-NEXT: vmovdqa %ymm2, %ymm0 +; KNL-NEXT: vpand %ymm3, %ymm0, %ymm0 +; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 +; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 +; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1 ; KNL-NEXT: retq ; ; SKX-LABEL: zext_32x8mem_to_32x16: @@ -181,18 +180,17 @@ define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwi define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone { ; KNL-LABEL: sext_32x8mem_to_32x16: ; KNL: # %bb.0: -; KNL-NEXT: vpmovsxbw 16(%rdi), %ymm1 -; KNL-NEXT: vpmovsxbw (%rdi), %ymm2 -; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; KNL-NEXT: vpsllw $15, %ymm3, %ymm3 -; KNL-NEXT: vpsraw $15, %ymm3, %ymm3 -; KNL-NEXT: vpand %ymm2, %ymm3, %ymm2 -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 +; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; KNL-NEXT: vpmovsxbw 16(%rdi), %ymm2 +; KNL-NEXT: vpmovsxbw (%rdi), %ymm3 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0 -; KNL-NEXT: vpand %ymm1, %ymm0, %ymm1 -; KNL-NEXT: vmovdqa %ymm2, %ymm0 +; KNL-NEXT: vpand %ymm3, %ymm0, %ymm0 +; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 +; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 +; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1 ; KNL-NEXT: retq ; ; SKX-LABEL: sext_32x8mem_to_32x16: @@ -227,18 +225,18 @@ define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone { define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone { ; KNL-LABEL: zext_32x8_to_32x16_mask: ; KNL: # %bb.0: -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm2 +; KNL-NEXT: vextracti128 $1, %ymm1, %xmm2 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero -; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; KNL-NEXT: vpsllw $15, %ymm3, %ymm3 -; KNL-NEXT: vpsraw $15, %ymm3, %ymm3 -; KNL-NEXT: vpand %ymm0, %ymm3, %ymm0 -; KNL-NEXT: vextracti128 $1, %ymm1, %xmm1 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero +; KNL-NEXT: vextracti128 $1, %ymm0, %xmm3 +; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero +; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 -; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1 +; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0 +; KNL-NEXT: vpsllw $15, %ymm2, %ymm1 +; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 +; KNL-NEXT: vpand %ymm3, %ymm1, %ymm1 ; KNL-NEXT: retq ; ; SKX-LABEL: zext_32x8_to_32x16_mask: @@ -272,18 +270,18 @@ define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone { define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone { ; KNL-LABEL: sext_32x8_to_32x16_mask: ; KNL: # %bb.0: -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm2 -; KNL-NEXT: vpmovsxbw %xmm2, %ymm2 -; KNL-NEXT: vpmovsxbw %xmm0, %ymm0 -; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; KNL-NEXT: vpsllw $15, %ymm3, %ymm3 -; KNL-NEXT: vpsraw $15, %ymm3, %ymm3 -; KNL-NEXT: vpand %ymm0, %ymm3, %ymm0 -; KNL-NEXT: vextracti128 $1, %ymm1, %xmm1 +; KNL-NEXT: vextracti128 $1, %ymm1, %xmm2 +; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero +; KNL-NEXT: vextracti128 $1, %ymm0, %xmm3 +; KNL-NEXT: vpmovsxbw %xmm3, %ymm3 +; KNL-NEXT: vpmovsxbw %xmm0, %ymm0 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 -; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1 +; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0 +; KNL-NEXT: vpsllw $15, %ymm2, %ymm1 +; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 +; KNL-NEXT: vpand %ymm3, %ymm1, %ymm1 ; KNL-NEXT: retq ; ; SKX-LABEL: sext_32x8_to_32x16_mask: diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll index 794ce0cacfd..df884bba5bc 100644 --- a/llvm/test/CodeGen/X86/avx512-mask-op.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll @@ -1281,14 +1281,14 @@ define <8 x i1> @test18(i8 %a, i16 %y) { define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone { ; KNL-LABEL: test21: ; KNL: ## %bb.0: -; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero -; KNL-NEXT: vpsllw $15, %ymm3, %ymm3 -; KNL-NEXT: vpsraw $15, %ymm3, %ymm3 -; KNL-NEXT: vpand %ymm0, %ymm3, %ymm0 -; KNL-NEXT: vextracti128 $1, %ymm2, %xmm2 +; KNL-NEXT: vextracti128 $1, %ymm2, %xmm3 +; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero ; KNL-NEXT: vpsllw $15, %ymm2, %ymm2 ; KNL-NEXT: vpsraw $15, %ymm2, %ymm2 +; KNL-NEXT: vpand %ymm0, %ymm2, %ymm0 +; KNL-NEXT: vpsllw $15, %ymm3, %ymm2 +; KNL-NEXT: vpsraw $15, %ymm2, %ymm2 ; KNL-NEXT: vpand %ymm1, %ymm2, %ymm1 ; KNL-NEXT: retq ; @@ -1308,14 +1308,14 @@ define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone { ; ; AVX512DQ-LABEL: test21: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero -; AVX512DQ-NEXT: vpsllw $15, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpsraw $15, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpand %ymm0, %ymm3, %ymm0 -; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm2 +; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero ; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero ; AVX512DQ-NEXT: vpsllw $15, %ymm2, %ymm2 ; AVX512DQ-NEXT: vpsraw $15, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpand %ymm0, %ymm2, %ymm0 +; AVX512DQ-NEXT: vpsllw $15, %ymm3, %ymm2 +; AVX512DQ-NEXT: vpsraw $15, %ymm2, %ymm2 ; AVX512DQ-NEXT: vpand %ymm1, %ymm2, %ymm1 ; AVX512DQ-NEXT: retq %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer diff --git a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll index a141b8773a0..61c013a4240 100644 --- a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll +++ b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll @@ -961,10 +961,12 @@ define <16 x i8> @test47(<16 x i32> %a, <16 x i8> %b, <16 x i8> %c) { ; ; AVX512BW-LABEL: test47: ; AVX512BW: ## %bb.0: +; AVX512BW-NEXT: ## kill: def %xmm2 killed %xmm2 def %zmm2 +; AVX512BW-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 ; AVX512BW-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512BW-NEXT: vpcmpeqd %zmm3, %zmm0, %k0 -; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 -; AVX512BW-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 +; AVX512BW-NEXT: vpcmpeqd %zmm3, %zmm0, %k1 +; AVX512BW-NEXT: vpblendmb %zmm1, %zmm2, %zmm0 {%k1} +; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; @@ -992,10 +994,12 @@ define <16 x i16> @test48(<16 x i32> %a, <16 x i16> %b, <16 x i16> %c) { ; ; AVX512BW-LABEL: test48: ; AVX512BW: ## %bb.0: +; AVX512BW-NEXT: ## kill: def %ymm2 killed %ymm2 def %zmm2 +; AVX512BW-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1 ; AVX512BW-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512BW-NEXT: vpcmpeqd %zmm3, %zmm0, %k0 -; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 -; AVX512BW-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 +; AVX512BW-NEXT: vpcmpeqd %zmm3, %zmm0, %k1 +; AVX512BW-NEXT: vpblendmw %zmm1, %zmm2, %zmm0 {%k1} +; AVX512BW-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0 ; AVX512BW-NEXT: retq ; ; SKX-LABEL: test48: @@ -1022,10 +1026,12 @@ define <8 x i16> @test49(<8 x i64> %a, <8 x i16> %b, <8 x i16> %c) { ; ; AVX512BW-LABEL: test49: ; AVX512BW: ## %bb.0: +; AVX512BW-NEXT: ## kill: def %xmm2 killed %xmm2 def %zmm2 +; AVX512BW-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 ; AVX512BW-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512BW-NEXT: vpcmpeqq %zmm3, %zmm0, %k0 -; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 -; AVX512BW-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 +; AVX512BW-NEXT: vpcmpeqq %zmm3, %zmm0, %k1 +; AVX512BW-NEXT: vpblendmw %zmm1, %zmm2, %zmm0 {%k1} +; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; |

