summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2017-12-14 08:25:58 +0000
committerCraig Topper <craig.topper@intel.com>2017-12-14 08:25:58 +0000
commiteab2d4665fdb6a28664c6936d58adc6b35796db1 (patch)
tree231e6e3bab5b6b848067ff49d9ee3607931da46b /llvm/test/CodeGen
parent271a5c72a0f43f8da540800dba016f5d1337c997 (diff)
downloadbcm5719-llvm-eab2d4665fdb6a28664c6936d58adc6b35796db1.tar.gz
bcm5719-llvm-eab2d4665fdb6a28664c6936d58adc6b35796db1.zip
[SelectionDAG][X86] Improve legalization of v32i1 CONCAT_VECTORS of v16i1 for AVX512F.
A v32i1 CONCAT_VECTORS of v16i1 uses promotion to v32i8 to legalize the v32i1. This results in a bunch of extract_vector_elts and a build_vector that ultimately gets scalarized. This patch checks to see if v16i8 is legal and inserts a any_extend to that so that we can concat v16i8 to v32i8 and avoid creating the extracts. llvm-svn: 320674
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/X86/avx512-mask-op.ll816
-rw-r--r--llvm/test/CodeGen/X86/avx512-vec-cmp.ll264
-rw-r--r--llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll5326
-rw-r--r--llvm/test/CodeGen/X86/bitcast-and-setcc-512.ll276
-rw-r--r--llvm/test/CodeGen/X86/bitcast-setcc-512.ll132
-rw-r--r--llvm/test/CodeGen/X86/vector-compare-results.ll544
6 files changed, 508 insertions, 6850 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index 3a4075b9d86..9b58ee5935d 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -1802,138 +1802,137 @@ define void @ktest_2(<32 x float> %in, float * %base) {
; KNL-NEXT: subq $32, %rsp
; KNL-NEXT: vmovups (%rdi), %zmm2
; KNL-NEXT: vmovups 64(%rdi), %zmm3
-; KNL-NEXT: vcmpltps %zmm1, %zmm3, %k1
+; KNL-NEXT: vcmpltps %zmm0, %zmm2, %k1
; KNL-NEXT: kshiftlw $14, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $15, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: vmovd %ecx, %xmm3
-; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
+; KNL-NEXT: vmovd %ecx, %xmm2
+; KNL-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
; KNL-NEXT: kshiftlw $13, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
; KNL-NEXT: kshiftlw $12, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
; KNL-NEXT: kshiftlw $11, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
; KNL-NEXT: kshiftlw $10, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
; KNL-NEXT: kshiftlw $9, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
; KNL-NEXT: kshiftlw $8, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
; KNL-NEXT: kshiftlw $7, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; KNL-NEXT: kshiftlw $6, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
; KNL-NEXT: kshiftlw $5, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
; KNL-NEXT: kshiftlw $4, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
; KNL-NEXT: kshiftlw $3, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
; KNL-NEXT: kshiftlw $2, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
; KNL-NEXT: kshiftlw $1, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
; KNL-NEXT: kshiftrw $15, %k1, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
-; KNL-NEXT: vcmpltps %zmm0, %zmm2, %k2
+; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; KNL-NEXT: vcmpltps %zmm1, %zmm3, %k2
; KNL-NEXT: kshiftlw $14, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $15, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: vmovd %ecx, %xmm2
-; KNL-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; KNL-NEXT: vmovd %ecx, %xmm3
+; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $13, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $12, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $11, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $10, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $9, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $8, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $7, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $6, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $5, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $4, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $3, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $2, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $1, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k2, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; KNL-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
-; KNL-NEXT: vmovups 4(%rdi), %zmm3 {%k2} {z}
-; KNL-NEXT: vmovups 68(%rdi), %zmm4 {%k1} {z}
-; KNL-NEXT: vcmpltps %zmm4, %zmm1, %k0
+; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
+; KNL-NEXT: vmovups 68(%rdi), %zmm5 {%k2} {z}
+; KNL-NEXT: vmovups 4(%rdi), %zmm4 {%k1} {z}
+; KNL-NEXT: vcmpltps %zmm4, %zmm0, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
@@ -1997,77 +1996,76 @@ define void @ktest_2(<32 x float> %in, float * %base) {
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4
-; KNL-NEXT: vcmpltps %zmm3, %zmm0, %k0
+; KNL-NEXT: vcmpltps %zmm5, %zmm1, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vmovd %ecx, %xmm3
-; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
+; KNL-NEXT: vmovd %ecx, %xmm5
+; KNL-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $13, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $12, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $11, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $10, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $9, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $8, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $7, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $6, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $5, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $4, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $3, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
-; KNL-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3
-; KNL-NEXT: vpor %ymm3, %ymm2, %ymm2
-; KNL-NEXT: vextracti128 $1, %ymm2, %xmm3
+; KNL-NEXT: vpinsrb $15, %eax, %xmm5, %xmm5
+; KNL-NEXT: vpor %xmm5, %xmm3, %xmm3
; KNL-NEXT: vpmovsxbd %xmm3, %zmm3
; KNL-NEXT: vpslld $31, %zmm3, %zmm3
; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; KNL-NEXT: vpor %xmm4, %xmm2, %xmm2
; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
; KNL-NEXT: vpslld $31, %zmm2, %zmm2
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0
@@ -2150,138 +2148,137 @@ define void @ktest_2(<32 x float> %in, float * %base) {
; AVX512DQ-NEXT: subq $32, %rsp
; AVX512DQ-NEXT: vmovups (%rdi), %zmm2
; AVX512DQ-NEXT: vmovups 64(%rdi), %zmm3
-; AVX512DQ-NEXT: vcmpltps %zmm1, %zmm3, %k1
+; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm2, %k1
; AVX512DQ-NEXT: kshiftlw $14, %k1, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %ecx
-; AVX512DQ-NEXT: vmovd %ecx, %xmm3
-; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vmovd %ecx, %xmm2
+; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $13, %k1, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $12, %k1, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $11, %k1, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $10, %k1, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $9, %k1, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $8, %k1, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $7, %k1, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $6, %k1, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $5, %k1, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $4, %k1, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $3, %k1, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $2, %k1, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $1, %k1, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm2, %k2
+; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; AVX512DQ-NEXT: vcmpltps %zmm1, %zmm3, %k2
; AVX512DQ-NEXT: kshiftlw $14, %k2, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %ecx
-; AVX512DQ-NEXT: vmovd %ecx, %xmm2
-; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; AVX512DQ-NEXT: vmovd %ecx, %xmm3
+; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $13, %k2, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $12, %k2, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $11, %k2, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $10, %k2, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $9, %k2, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $8, %k2, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $7, %k2, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $6, %k2, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $5, %k2, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $4, %k2, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $3, %k2, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $2, %k2, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $1, %k2, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftrw $15, %k2, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
-; AVX512DQ-NEXT: vmovups 4(%rdi), %zmm3 {%k2} {z}
-; AVX512DQ-NEXT: vmovups 68(%rdi), %zmm4 {%k1} {z}
-; AVX512DQ-NEXT: vcmpltps %zmm4, %zmm1, %k0
+; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vmovups 68(%rdi), %zmm5 {%k2} {z}
+; AVX512DQ-NEXT: vmovups 4(%rdi), %zmm4 {%k1} {z}
+; AVX512DQ-NEXT: vcmpltps %zmm4, %zmm0, %k0
; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
@@ -2345,77 +2342,76 @@ define void @ktest_2(<32 x float> %in, float * %base) {
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4
-; AVX512DQ-NEXT: vcmpltps %zmm3, %zmm0, %k0
+; AVX512DQ-NEXT: vcmpltps %zmm5, %zmm1, %k0
; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %ecx
-; AVX512DQ-NEXT: vmovd %ecx, %xmm3
-; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vmovd %ecx, %xmm5
+; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5
; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5
; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5
; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5
; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5
; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5
; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5
; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5
; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5
; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5
; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5
; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3
-; AVX512DQ-NEXT: vpor %ymm3, %ymm2, %ymm2
-; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm3
+; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm5, %xmm5
+; AVX512DQ-NEXT: vpor %xmm5, %xmm3, %xmm3
; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm3
; AVX512DQ-NEXT: vpslld $31, %zmm3, %zmm3
; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k0
; AVX512DQ-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; AVX512DQ-NEXT: vpor %xmm4, %xmm2, %xmm2
; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm2
; AVX512DQ-NEXT: vpslld $31, %zmm2, %zmm2
; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k0
@@ -2909,310 +2905,22 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
;
; KNL-LABEL: store_64i1:
; KNL: ## %bb.0:
-; KNL-NEXT: pushq %rbp
-; KNL-NEXT: .cfi_def_cfa_offset 16
-; KNL-NEXT: pushq %r15
-; KNL-NEXT: .cfi_def_cfa_offset 24
-; KNL-NEXT: pushq %r14
-; KNL-NEXT: .cfi_def_cfa_offset 32
-; KNL-NEXT: pushq %r13
-; KNL-NEXT: .cfi_def_cfa_offset 40
-; KNL-NEXT: pushq %r12
-; KNL-NEXT: .cfi_def_cfa_offset 48
-; KNL-NEXT: pushq %rbx
-; KNL-NEXT: .cfi_def_cfa_offset 56
-; KNL-NEXT: .cfi_offset %rbx, -56
-; KNL-NEXT: .cfi_offset %r12, -48
-; KNL-NEXT: .cfi_offset %r13, -40
-; KNL-NEXT: .cfi_offset %r14, -32
-; KNL-NEXT: .cfi_offset %r15, -24
-; KNL-NEXT: .cfi_offset %rbp, -16
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vpslld $31, %zmm0, %zmm0
-; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
-; KNL-NEXT: vpslld $31, %zmm1, %zmm1
-; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
-; KNL-NEXT: vpslld $31, %zmm2, %zmm2
; KNL-NEXT: vpmovsxbd %xmm3, %zmm3
; KNL-NEXT: vpslld $31, %zmm3, %zmm3
; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0
-; KNL-NEXT: kshiftlw $14, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r8d
-; KNL-NEXT: kshiftlw $15, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r9d
-; KNL-NEXT: kshiftlw $13, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r10d
-; KNL-NEXT: kshiftlw $12, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r11d
-; KNL-NEXT: kshiftlw $11, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r14d
-; KNL-NEXT: kshiftlw $10, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r15d
-; KNL-NEXT: kshiftlw $9, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r12d
-; KNL-NEXT: kshiftlw $8, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r13d
-; KNL-NEXT: kshiftlw $7, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ebx
-; KNL-NEXT: kshiftlw $6, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ebp
-; KNL-NEXT: kshiftlw $5, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: kshiftlw $4, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: kshiftlw $3, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %edx
-; KNL-NEXT: kshiftlw $2, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %esi
-; KNL-NEXT: kshiftlw $1, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vmovd %r9d, %xmm3
-; KNL-NEXT: kmovw %k1, %r9d
-; KNL-NEXT: vptestmd %zmm2, %zmm2, %k2
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $1, %r8d, %xmm3, %xmm2
-; KNL-NEXT: vpinsrb $2, %r10d, %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $3, %r11d, %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $4, %r14d, %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $5, %r15d, %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $6, %r12d, %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $7, %r13d, %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $8, %ebx, %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $9, %ebp, %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $12, %edx, %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $13, %esi, %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $14, %r9d, %xmm2, %xmm2
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; KNL-NEXT: kmovw %k0, 6(%rdi)
; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
; KNL-NEXT: vpslld $31, %zmm2, %zmm2
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0
-; KNL-NEXT: kmovw %k0, 6(%rdi)
-; KNL-NEXT: kshiftlw $14, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %r8d
-; KNL-NEXT: kshiftlw $15, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %r10d
-; KNL-NEXT: kshiftlw $13, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %r9d
-; KNL-NEXT: kshiftlw $12, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %r11d
-; KNL-NEXT: kshiftlw $11, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %r14d
-; KNL-NEXT: kshiftlw $10, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %r15d
-; KNL-NEXT: kshiftlw $9, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %r12d
-; KNL-NEXT: kshiftlw $8, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %r13d
-; KNL-NEXT: kshiftlw $7, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: kshiftlw $6, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %esi
-; KNL-NEXT: kshiftlw $5, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %ebp
-; KNL-NEXT: kshiftlw $4, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %ebx
-; KNL-NEXT: kshiftlw $3, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: kshiftlw $2, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %edx
-; KNL-NEXT: kshiftlw $1, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vmovd %r10d, %xmm2
-; KNL-NEXT: kmovw %k0, %r10d
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
-; KNL-NEXT: kshiftrw $15, %k2, %k0
-; KNL-NEXT: vpinsrb $1, %r8d, %xmm2, %xmm1
-; KNL-NEXT: vpinsrb $2, %r9d, %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $3, %r11d, %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $4, %r14d, %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $5, %r15d, %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $6, %r12d, %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $7, %r13d, %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $9, %esi, %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $10, %ebp, %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $11, %ebx, %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $13, %edx, %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $14, %r10d, %xmm1, %xmm1
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
+; KNL-NEXT: kmovw %k0, 4(%rdi)
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kmovw %k0, 4(%rdi)
-; KNL-NEXT: kshiftlw $14, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %r8d
-; KNL-NEXT: kshiftlw $15, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %r10d
-; KNL-NEXT: kshiftlw $13, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %r9d
-; KNL-NEXT: kshiftlw $12, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %r11d
-; KNL-NEXT: kshiftlw $11, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %r14d
-; KNL-NEXT: kshiftlw $10, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %r15d
-; KNL-NEXT: kshiftlw $9, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %r12d
-; KNL-NEXT: kshiftlw $8, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %r13d
-; KNL-NEXT: kshiftlw $7, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: kshiftlw $6, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %esi
-; KNL-NEXT: kshiftlw $5, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %ebp
-; KNL-NEXT: kshiftlw $4, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %ebx
-; KNL-NEXT: kshiftlw $3, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: kshiftlw $2, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %edx
-; KNL-NEXT: kshiftlw $1, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vmovd %r10d, %xmm1
-; KNL-NEXT: kmovw %k0, %r10d
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $1, %r8d, %xmm1, %xmm0
-; KNL-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $14, %r10d, %xmm0, %xmm0
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vpslld $31, %zmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
-; KNL-NEXT: kmovw %k1, 2(%rdi)
-; KNL-NEXT: kshiftlw $14, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r8d
-; KNL-NEXT: kshiftlw $15, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r9d
-; KNL-NEXT: kshiftlw $13, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r10d
-; KNL-NEXT: kshiftlw $12, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r11d
-; KNL-NEXT: kshiftlw $11, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r14d
-; KNL-NEXT: kshiftlw $10, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r15d
-; KNL-NEXT: kshiftlw $9, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r12d
-; KNL-NEXT: kshiftlw $8, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r13d
-; KNL-NEXT: kshiftlw $7, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %edx
-; KNL-NEXT: kshiftlw $6, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %esi
-; KNL-NEXT: kshiftlw $5, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ebp
-; KNL-NEXT: kshiftlw $4, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ebx
-; KNL-NEXT: kshiftlw $3, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: kshiftlw $2, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: kshiftlw $1, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vmovd %r9d, %xmm0
-; KNL-NEXT: kmovw %k1, %r9d
-; KNL-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $2, %r10d, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $8, %edx, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $14, %r9d, %xmm0, %xmm0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; KNL-NEXT: kmovw %k0, 2(%rdi)
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, (%rdi)
-; KNL-NEXT: popq %rbx
-; KNL-NEXT: popq %r12
-; KNL-NEXT: popq %r13
-; KNL-NEXT: popq %r14
-; KNL-NEXT: popq %r15
-; KNL-NEXT: popq %rbp
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
@@ -3234,310 +2942,22 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
;
; AVX512DQ-LABEL: store_64i1:
; AVX512DQ: ## %bb.0:
-; AVX512DQ-NEXT: pushq %rbp
-; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
-; AVX512DQ-NEXT: pushq %r15
-; AVX512DQ-NEXT: .cfi_def_cfa_offset 24
-; AVX512DQ-NEXT: pushq %r14
-; AVX512DQ-NEXT: .cfi_def_cfa_offset 32
-; AVX512DQ-NEXT: pushq %r13
-; AVX512DQ-NEXT: .cfi_def_cfa_offset 40
-; AVX512DQ-NEXT: pushq %r12
-; AVX512DQ-NEXT: .cfi_def_cfa_offset 48
-; AVX512DQ-NEXT: pushq %rbx
-; AVX512DQ-NEXT: .cfi_def_cfa_offset 56
-; AVX512DQ-NEXT: .cfi_offset %rbx, -56
-; AVX512DQ-NEXT: .cfi_offset %r12, -48
-; AVX512DQ-NEXT: .cfi_offset %r13, -40
-; AVX512DQ-NEXT: .cfi_offset %r14, -32
-; AVX512DQ-NEXT: .cfi_offset %r15, -24
-; AVX512DQ-NEXT: .cfi_offset %rbp, -16
-; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm1
-; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1
-; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm2
-; AVX512DQ-NEXT: vpslld $31, %zmm2, %zmm2
; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm3
; AVX512DQ-NEXT: vpslld $31, %zmm3, %zmm3
; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k0
-; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %r8d
-; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %r9d
-; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %r10d
-; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %r11d
-; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %r14d
-; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %r15d
-; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %r12d
-; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %r13d
-; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %ebx
-; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %ebp
-; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %ecx
-; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %edx
-; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %esi
-; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: vmovd %r9d, %xmm3
-; AVX512DQ-NEXT: kmovw %k1, %r9d
-; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k2
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: vpinsrb $1, %r8d, %xmm3, %xmm2
-; AVX512DQ-NEXT: vpinsrb $2, %r10d, %xmm2, %xmm2
-; AVX512DQ-NEXT: vpinsrb $3, %r11d, %xmm2, %xmm2
-; AVX512DQ-NEXT: vpinsrb $4, %r14d, %xmm2, %xmm2
-; AVX512DQ-NEXT: vpinsrb $5, %r15d, %xmm2, %xmm2
-; AVX512DQ-NEXT: vpinsrb $6, %r12d, %xmm2, %xmm2
-; AVX512DQ-NEXT: vpinsrb $7, %r13d, %xmm2, %xmm2
-; AVX512DQ-NEXT: vpinsrb $8, %ebx, %xmm2, %xmm2
-; AVX512DQ-NEXT: vpinsrb $9, %ebp, %xmm2, %xmm2
-; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2
-; AVX512DQ-NEXT: vpinsrb $12, %edx, %xmm2, %xmm2
-; AVX512DQ-NEXT: vpinsrb $13, %esi, %xmm2, %xmm2
-; AVX512DQ-NEXT: vpinsrb $14, %r9d, %xmm2, %xmm2
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; AVX512DQ-NEXT: kmovw %k0, 6(%rdi)
; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm2
; AVX512DQ-NEXT: vpslld $31, %zmm2, %zmm2
; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k0
-; AVX512DQ-NEXT: kmovw %k0, 6(%rdi)
-; AVX512DQ-NEXT: kshiftlw $14, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %r8d
-; AVX512DQ-NEXT: kshiftlw $15, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %r10d
-; AVX512DQ-NEXT: kshiftlw $13, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %r9d
-; AVX512DQ-NEXT: kshiftlw $12, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %r11d
-; AVX512DQ-NEXT: kshiftlw $11, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %r14d
-; AVX512DQ-NEXT: kshiftlw $10, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %r15d
-; AVX512DQ-NEXT: kshiftlw $9, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %r12d
-; AVX512DQ-NEXT: kshiftlw $8, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %r13d
-; AVX512DQ-NEXT: kshiftlw $7, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %ecx
-; AVX512DQ-NEXT: kshiftlw $6, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %esi
-; AVX512DQ-NEXT: kshiftlw $5, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %ebp
-; AVX512DQ-NEXT: kshiftlw $4, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %ebx
-; AVX512DQ-NEXT: kshiftlw $3, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: kshiftlw $2, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %edx
-; AVX512DQ-NEXT: kshiftlw $1, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: vmovd %r10d, %xmm2
-; AVX512DQ-NEXT: kmovw %k0, %r10d
-; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k2, %k0
-; AVX512DQ-NEXT: vpinsrb $1, %r8d, %xmm2, %xmm1
-; AVX512DQ-NEXT: vpinsrb $2, %r9d, %xmm1, %xmm1
-; AVX512DQ-NEXT: vpinsrb $3, %r11d, %xmm1, %xmm1
-; AVX512DQ-NEXT: vpinsrb $4, %r14d, %xmm1, %xmm1
-; AVX512DQ-NEXT: vpinsrb $5, %r15d, %xmm1, %xmm1
-; AVX512DQ-NEXT: vpinsrb $6, %r12d, %xmm1, %xmm1
-; AVX512DQ-NEXT: vpinsrb $7, %r13d, %xmm1, %xmm1
-; AVX512DQ-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
-; AVX512DQ-NEXT: vpinsrb $9, %esi, %xmm1, %xmm1
-; AVX512DQ-NEXT: vpinsrb $10, %ebp, %xmm1, %xmm1
-; AVX512DQ-NEXT: vpinsrb $11, %ebx, %xmm1, %xmm1
-; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: vpinsrb $13, %edx, %xmm1, %xmm1
-; AVX512DQ-NEXT: vpinsrb $14, %r10d, %xmm1, %xmm1
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
+; AVX512DQ-NEXT: kmovw %k0, 4(%rdi)
; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm1
; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1
; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512DQ-NEXT: kmovw %k0, 4(%rdi)
-; AVX512DQ-NEXT: kshiftlw $14, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %r8d
-; AVX512DQ-NEXT: kshiftlw $15, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %r10d
-; AVX512DQ-NEXT: kshiftlw $13, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %r9d
-; AVX512DQ-NEXT: kshiftlw $12, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %r11d
-; AVX512DQ-NEXT: kshiftlw $11, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %r14d
-; AVX512DQ-NEXT: kshiftlw $10, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %r15d
-; AVX512DQ-NEXT: kshiftlw $9, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %r12d
-; AVX512DQ-NEXT: kshiftlw $8, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %r13d
-; AVX512DQ-NEXT: kshiftlw $7, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %ecx
-; AVX512DQ-NEXT: kshiftlw $6, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %esi
-; AVX512DQ-NEXT: kshiftlw $5, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %ebp
-; AVX512DQ-NEXT: kshiftlw $4, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %ebx
-; AVX512DQ-NEXT: kshiftlw $3, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: kshiftlw $2, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %edx
-; AVX512DQ-NEXT: kshiftlw $1, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: vmovd %r10d, %xmm1
-; AVX512DQ-NEXT: kmovw %k0, %r10d
-; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: vpinsrb $1, %r8d, %xmm1, %xmm0
-; AVX512DQ-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $14, %r10d, %xmm0, %xmm0
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
-; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k1
-; AVX512DQ-NEXT: kmovw %k1, 2(%rdi)
-; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %r8d
-; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %r9d
-; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %r10d
-; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %r11d
-; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %r14d
-; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %r15d
-; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %r12d
-; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %r13d
-; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %edx
-; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %esi
-; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %ebp
-; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %ebx
-; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %ecx
-; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: vmovd %r9d, %xmm0
-; AVX512DQ-NEXT: kmovw %k1, %r9d
-; AVX512DQ-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $2, %r10d, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $8, %edx, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $14, %r9d, %xmm0, %xmm0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; AVX512DQ-NEXT: kmovw %k0, 2(%rdi)
; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0
; AVX512DQ-NEXT: kmovw %k0, (%rdi)
-; AVX512DQ-NEXT: popq %rbx
-; AVX512DQ-NEXT: popq %r12
-; AVX512DQ-NEXT: popq %r13
-; AVX512DQ-NEXT: popq %r14
-; AVX512DQ-NEXT: popq %r15
-; AVX512DQ-NEXT: popq %rbp
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
store <64 x i1> %v, <64 x i1>* %a
diff --git a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
index 9c25ba6c5b6..8c1fbd64539 100644
--- a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
@@ -355,284 +355,28 @@ define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind {
; KNL-NEXT: subq $64, %rsp
; KNL-NEXT: vpcmpeqw %ymm5, %ymm1, %ymm1
; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
-; KNL-NEXT: vpslld $31, %zmm1, %zmm1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kshiftlw $14, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: kshiftlw $15, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vmovd %ecx, %xmm1
-; KNL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $13, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $12, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $11, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $10, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $9, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $8, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $7, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $6, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $5, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $4, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $3, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $2, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $1, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
+; KNL-NEXT: vpmovdb %zmm1, %xmm1
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; KNL-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
-; KNL-NEXT: vpslld $31, %zmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kshiftlw $14, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: kshiftlw $15, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vmovd %ecx, %xmm0
-; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $13, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $12, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $11, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $10, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $9, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $8, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $7, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $6, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $5, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $4, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $3, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $2, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $1, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, (%rsp)
; KNL-NEXT: vpcmpeqw %ymm7, %ymm3, %ymm0
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
-; KNL-NEXT: vpslld $31, %zmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kshiftlw $14, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: kshiftlw $15, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vmovd %ecx, %xmm0
-; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $13, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $12, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $11, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $10, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $9, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $8, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $7, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $6, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $5, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $4, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $3, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $2, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $1, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; KNL-NEXT: vpcmpeqw %ymm6, %ymm2, %ymm0
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
-; KNL-NEXT: vpslld $31, %zmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kshiftlw $14, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: kshiftlw $15, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vmovd %ecx, %xmm0
-; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $13, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $12, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $11, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $10, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $9, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $8, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $7, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $6, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $5, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $4, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $3, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $2, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $1, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
diff --git a/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll b/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
index 82dba2993e7..de47780651b 100644
--- a/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
@@ -16,98 +16,17 @@ define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -134,98 +53,17 @@ define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -499,103 +337,22 @@ define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -622,103 +379,22 @@ define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -1963,98 +1639,19 @@ define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -2082,98 +1679,19 @@ define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -2450,89 +1968,15 @@ define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -2541,12 +1985,7 @@ define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -2574,89 +2013,15 @@ define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -2665,12 +2030,7 @@ define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -3132,141 +2492,13 @@ define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__
; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm3, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -3395,142 +2627,14 @@ define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpeqw 32(%rdi), %ymm1, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %eax, %xmm1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -3731,15 +2835,15 @@ define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm4
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm3
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
-; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
+; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm4, %xmm4
+; NoVLX-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1
+; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
@@ -3752,145 +2856,17 @@ define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm4, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
-; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
+; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
+; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -3999,174 +2975,46 @@ define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask_mem(i32 zeroext %__u,
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm5
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm4, %xmm4
+; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm3
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
+; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpeqw 32(%rsi), %ymm1, %ymm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm2
-; NoVLX-NEXT: vinserti128 $1, %xmm4, %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm2, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %eax, %xmm2
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpeqw 32(%rsi), %ymm3, %ymm3
-; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
-; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
+; NoVLX-NEXT: vpand %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -12251,98 +11099,17 @@ define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %_
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -12369,98 +11136,17 @@ define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -12734,103 +11420,22 @@ define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %_
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -12857,103 +11462,22 @@ define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -14198,98 +12722,19 @@ define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %_
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -14317,98 +12762,19 @@ define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -14685,89 +13051,15 @@ define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %_
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -14776,12 +13068,7 @@ define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %_
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -14809,89 +13096,15 @@ define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -14900,12 +13113,7 @@ define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -15367,141 +13575,13 @@ define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %_
; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -15630,142 +13710,14 @@ define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw 32(%rdi), %ymm1, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %eax, %xmm1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -15966,15 +13918,15 @@ define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask(i32 zeroext %__u, <8
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm4
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm3
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
-; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
+; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm4, %xmm4
+; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
+; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
@@ -15987,145 +13939,17 @@ define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask(i32 zeroext %__u, <8
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm4, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
-; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
+; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
+; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -16234,174 +14058,46 @@ define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem(i32 zeroext %__u,
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm5
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm4, %xmm4
+; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm3
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
+; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpgtw 32(%rsi), %ymm1, %ymm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm2
-; NoVLX-NEXT: vinserti128 $1, %xmm4, %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm2, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %eax, %xmm2
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtw 32(%rsi), %ymm3, %ymm3
-; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
-; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
+; NoVLX-NEXT: vpand %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -24486,100 +22182,19 @@ define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %_
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -24606,101 +22221,20 @@ define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -24979,105 +22513,24 @@ define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %_
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -25104,106 +22557,25 @@ define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -26493,100 +23865,21 @@ define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %_
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -26614,101 +23907,22 @@ define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -26990,91 +24204,17 @@ define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %_
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -27083,12 +24223,7 @@ define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %_
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -27116,92 +24251,18 @@ define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -27210,12 +24271,7 @@ define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -27680,146 +24736,18 @@ define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %_
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
-; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm2
-; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
-; NoVLX-NEXT: vpxor %ymm1, %ymm2, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -27949,146 +24877,18 @@ define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64
; NoVLX-NEXT: vmovdqa (%rdi), %ymm2
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
; NoVLX-NEXT: vmovdqa 32(%rdi), %ymm2
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
-; NoVLX-NEXT: vpxor %ymm1, %ymm2, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1
+; NoVLX-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -28289,15 +25089,15 @@ define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask(i32 zeroext %__u, <8
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm4
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm3
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
-; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
+; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm4, %xmm4
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
+; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
@@ -28310,148 +25110,20 @@ define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask(i32 zeroext %__u, <8
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpgtw %ymm4, %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4
-; NoVLX-NEXT: vpxor %ymm4, %ymm2, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3
-; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
-; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
+; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -28577,162 +25249,34 @@ define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask_mem(i32 zeroext %__u,
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm4
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm2
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
-; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
+; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm4, %xmm4
+; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm3
-; NoVLX-NEXT: vmovdqa (%rsi), %ymm4
-; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm4, %ymm5
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; NoVLX-NEXT: vmovdqa (%rsi), %ymm3
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm3, %ymm0
; NoVLX-NEXT: vmovdqa 32(%rsi), %ymm3
-; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm3
-; NoVLX-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4
-; NoVLX-NEXT: vpxor %ymm4, %ymm5, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3
-; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
-; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
+; NoVLX-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
+; NoVLX-NEXT: vpxor %ymm3, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
+; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %ymm3, %ymm1, %ymm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -36945,18 +33489,10 @@ define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %_
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
@@ -36964,82 +33500,9 @@ define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %_
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -37066,18 +33529,10 @@ define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
@@ -37085,82 +33540,9 @@ define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -37440,18 +33822,12 @@ define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %_
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
@@ -37459,87 +33835,12 @@ define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %_
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -37566,18 +33867,12 @@ define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
@@ -37585,87 +33880,12 @@ define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -38964,101 +35184,22 @@ define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %_
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -39086,101 +35227,22 @@ define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -39463,92 +35525,18 @@ define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %_
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -39557,12 +35545,7 @@ define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %_
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -39590,92 +35573,18 @@ define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -39684,12 +35593,7 @@ define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -40153,150 +36057,22 @@ define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %_
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm4
-; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
-; NoVLX-NEXT: vpxor %ymm2, %ymm3, %ymm3
-; NoVLX-NEXT: vpxor %ymm2, %ymm4, %ymm4
-; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm4, %ymm3
-; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
-; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpmovsxbd %xmm3, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
-; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2
+; NoVLX-NEXT: vmovdqa {{.*#+}} ymm4 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3
+; NoVLX-NEXT: vpxor %ymm4, %ymm2, %ymm2
+; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
+; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
+; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
-; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm4, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor %ymm4, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -40422,150 +36198,22 @@ define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm2
-; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
-; NoVLX-NEXT: vpxor %ymm1, %ymm2, %ymm2
-; NoVLX-NEXT: vpxor 32(%rdi), %ymm1, %ymm3
-; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %eax, %xmm2
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
+; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor 32(%rdi), %ymm2, %ymm3
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
-; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor (%rdi), %ymm2, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -40765,15 +36413,15 @@ define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask(i32 zeroext %__u, <8
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm8
+; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm6
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm4
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
-; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0
+; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm4, %xmm4
+; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
+; NoVLX-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm6, %xmm6
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
@@ -40787,150 +36435,22 @@ define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask(i32 zeroext %__u, <8
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm5 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
-; NoVLX-NEXT: vpxor %ymm5, %ymm6, %ymm3
+; NoVLX-NEXT: vpxor %ymm5, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm5, %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm3
-; NoVLX-NEXT: vpxor %ymm5, %ymm8, %ymm2
-; NoVLX-NEXT: vpxor %ymm5, %ymm4, %ymm4
-; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm4, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpand %xmm1, %xmm2, %xmm1
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
+; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %ymm5, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm5, %ymm3, %ymm2
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm6, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm0, %xmm3, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -40999,219 +36519,91 @@ define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask_mem(i32 zeroext %__u,
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm2
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm1
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm4
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm1
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm5
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm1
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm6
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm4, %xmm4
+; NoVLX-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm5, %xmm5
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor (%rsi), %ymm2, %ymm3
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm3, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
+; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor 32(%rsi), %ymm2, %ymm2
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm3
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm6, %xmm2
-; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
-; NoVLX-NEXT: vmovdqa {{.*#+}} ymm4 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
-; NoVLX-NEXT: vpxor %ymm4, %ymm2, %ymm2
-; NoVLX-NEXT: vpxor (%rsi), %ymm4, %ymm5
-; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm5, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %eax, %xmm2
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3
-; NoVLX-NEXT: vpxor 32(%rsi), %ymm4, %ymm4
-; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm4, %ymm3
-; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
-; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
+; NoVLX-NEXT: vpand %xmm5, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
diff --git a/llvm/test/CodeGen/X86/bitcast-and-setcc-512.ll b/llvm/test/CodeGen/X86/bitcast-and-setcc-512.ll
index 4180bc22cce..dfda374aa52 100644
--- a/llvm/test/CodeGen/X86/bitcast-and-setcc-512.ll
+++ b/llvm/test/CodeGen/X86/bitcast-and-setcc-512.ll
@@ -287,278 +287,20 @@ define i32 @v32i16(<32 x i16> %a, <32 x i16> %b, <32 x i16> %c, <32 x i16> %d) {
; AVX512F-NEXT: .cfi_def_cfa_register %rbp
; AVX512F-NEXT: andq $-32, %rsp
; AVX512F-NEXT: subq $32, %rsp
-; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
-; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
-; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
-; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: kshiftlw $14, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: kshiftlw $15, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %ecx
-; AVX512F-NEXT: vmovd %ecx, %xmm1
-; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $13, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $12, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $11, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $10, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $9, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $8, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $7, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $6, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $5, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $4, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $3, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $2, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $1, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftrw $15, %k0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
-; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
-; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512F-NEXT: kshiftlw $14, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: kshiftlw $15, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %ecx
-; AVX512F-NEXT: vmovd %ecx, %xmm0
-; AVX512F-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $13, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $12, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $11, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $10, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $9, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $8, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $7, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $6, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $5, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $4, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $3, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $2, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $1, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftrw $15, %k0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm1
+; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
-; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
-; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: kshiftlw $14, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: kshiftlw $15, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %ecx
-; AVX512F-NEXT: vmovd %ecx, %xmm1
-; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $13, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $12, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $11, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $10, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $9, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $8, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $7, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $6, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $5, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $4, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $3, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $2, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $1, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftrw $15, %k0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
+; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
; AVX512F-NEXT: vpcmpgtw %ymm6, %ymm4, %ymm2
; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2
-; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2
-; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
-; AVX512F-NEXT: kshiftlw $14, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: kshiftlw $15, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %ecx
-; AVX512F-NEXT: vmovd %ecx, %xmm2
-; AVX512F-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $13, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $12, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $11, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $10, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $9, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $8, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $7, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $6, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $5, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $4, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $3, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $2, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $1, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftrw $15, %k0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
-; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
+; AVX512F-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX512F-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm2
+; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2
+; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
+; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
diff --git a/llvm/test/CodeGen/X86/bitcast-setcc-512.ll b/llvm/test/CodeGen/X86/bitcast-setcc-512.ll
index 371aef00c8d..9914f0b9343 100644
--- a/llvm/test/CodeGen/X86/bitcast-setcc-512.ll
+++ b/llvm/test/CodeGen/X86/bitcast-setcc-512.ll
@@ -60,142 +60,14 @@ define i32 @v32i16(<32 x i16> %a, <32 x i16> %b) {
; AVX512F-NEXT: subq $32, %rsp
; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
-; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
-; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: kshiftlw $14, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: kshiftlw $15, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %ecx
-; AVX512F-NEXT: vmovd %ecx, %xmm1
-; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $13, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $12, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $11, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $10, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $9, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $8, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $7, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $6, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $5, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $4, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $3, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $2, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $1, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftrw $15, %k0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
+; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
-; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
-; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512F-NEXT: kshiftlw $14, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: kshiftlw $15, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %ecx
-; AVX512F-NEXT: vmovd %ecx, %xmm0
-; AVX512F-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $13, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $12, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $11, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $10, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $9, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $8, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $7, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $6, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $5, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $4, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $3, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $2, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $1, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftrw $15, %k0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
diff --git a/llvm/test/CodeGen/X86/vector-compare-results.ll b/llvm/test/CodeGen/X86/vector-compare-results.ll
index 029217cc32d..47ed70ce0ed 100644
--- a/llvm/test/CodeGen/X86/vector-compare-results.ll
+++ b/llvm/test/CodeGen/X86/vector-compare-results.ll
@@ -4259,280 +4259,24 @@ define <64 x i1> @test_cmp_v64i16(<64 x i16> %a0, <64 x i16> %a1) nounwind {
;
; AVX512F-LABEL: test_cmp_v64i16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpcmpgtw %ymm7, %ymm3, %ymm3
-; AVX512F-NEXT: vpmovsxwd %ymm3, %zmm3
-; AVX512F-NEXT: vpslld $31, %zmm3, %zmm3
-; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0
-; AVX512F-NEXT: kshiftlw $14, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: kshiftlw $15, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %ecx
-; AVX512F-NEXT: vmovd %ecx, %xmm3
-; AVX512F-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: kshiftlw $13, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: kshiftlw $12, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: kshiftlw $11, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: kshiftlw $10, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: kshiftlw $9, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: kshiftlw $8, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: kshiftlw $7, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: kshiftlw $6, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: kshiftlw $5, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: kshiftlw $4, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: kshiftlw $3, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: kshiftlw $2, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: kshiftlw $1, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: kshiftrw $15, %k0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
; AVX512F-NEXT: vpcmpgtw %ymm6, %ymm2, %ymm2
; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2
-; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2
-; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
-; AVX512F-NEXT: kshiftlw $14, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: kshiftlw $15, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %ecx
-; AVX512F-NEXT: vmovd %ecx, %xmm2
-; AVX512F-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $13, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $12, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $11, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $10, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $9, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $8, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $7, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $6, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $5, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $4, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $3, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $2, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $1, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftrw $15, %k0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
+; AVX512F-NEXT: vpcmpgtw %ymm7, %ymm3, %ymm3
+; AVX512F-NEXT: vpmovsxwd %ymm3, %zmm3
+; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
; AVX512F-NEXT: vpsllw $7, %ymm2, %ymm2
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vpxor %xmm6, %xmm6, %xmm6
; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm6, %ymm2
-; AVX512F-NEXT: vpcmpgtw %ymm5, %ymm1, %ymm1
-; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
-; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
-; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: kshiftlw $14, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: kshiftlw $15, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %ecx
-; AVX512F-NEXT: vmovd %ecx, %xmm1
-; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $13, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $12, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $11, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $10, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $9, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $8, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $7, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $6, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $5, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $4, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $3, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $2, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $1, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftrw $15, %k0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; AVX512F-NEXT: vpcmpgtw %ymm4, %ymm0, %ymm0
; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
-; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
-; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512F-NEXT: kshiftlw $14, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: kshiftlw $15, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %ecx
-; AVX512F-NEXT: vmovd %ecx, %xmm0
-; AVX512F-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $13, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $12, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $11, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $10, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $9, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $8, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $7, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $6, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $5, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $4, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $3, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $2, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $1, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftrw $15, %k0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT: vpcmpgtw %ymm5, %ymm1, %ymm1
+; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
+; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
@@ -4546,280 +4290,24 @@ define <64 x i1> @test_cmp_v64i16(<64 x i16> %a0, <64 x i16> %a1) nounwind {
;
; AVX512DQ-LABEL: test_cmp_v64i16:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpcmpgtw %ymm7, %ymm3, %ymm3
-; AVX512DQ-NEXT: vpmovsxwd %ymm3, %zmm3
-; AVX512DQ-NEXT: vpslld $31, %zmm3, %zmm3
-; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k0
-; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %ecx
-; AVX512DQ-NEXT: vmovd %ecx, %xmm3
-; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: vpcmpgtw %ymm6, %ymm2, %ymm2
; AVX512DQ-NEXT: vpmovsxwd %ymm2, %zmm2
-; AVX512DQ-NEXT: vpslld $31, %zmm2, %zmm2
-; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k0
-; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %ecx
-; AVX512DQ-NEXT: vmovd %ecx, %xmm2
-; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2
+; AVX512DQ-NEXT: vpcmpgtw %ymm7, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpmovsxwd %ymm3, %zmm3
+; AVX512DQ-NEXT: vpmovdb %zmm3, %xmm3
; AVX512DQ-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
; AVX512DQ-NEXT: vpsllw $7, %ymm2, %ymm2
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX512DQ-NEXT: vpxor %xmm6, %xmm6, %xmm6
; AVX512DQ-NEXT: vpcmpgtb %ymm2, %ymm6, %ymm2
-; AVX512DQ-NEXT: vpcmpgtw %ymm5, %ymm1, %ymm1
-; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm1
-; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1
-; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %ecx
-; AVX512DQ-NEXT: vmovd %ecx, %xmm1
-; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; AVX512DQ-NEXT: vpcmpgtw %ymm4, %ymm0, %ymm0
; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
-; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
-; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %ecx
-; AVX512DQ-NEXT: vmovd %ecx, %xmm0
-; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512DQ-NEXT: vpcmpgtw %ymm5, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm1
+; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vpsllw $7, %ymm0, %ymm0
; AVX512DQ-NEXT: vpand %ymm3, %ymm0, %ymm0
OpenPOWER on IntegriCloud