summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/avx512-mask-op.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86/avx512-mask-op.ll')
-rw-r--r--llvm/test/CodeGen/X86/avx512-mask-op.ll639
1 files changed, 334 insertions, 305 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index 77a2a021416..2e6cce09f94 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
-; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
+; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
+; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512BW
; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512DQ
@@ -287,7 +287,6 @@ define i8 @shuf_test1(i16 %v) nounwind {
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; KNL-NEXT: retq
-; KNL-NEXT: ## -- End function
;
; SKX-LABEL: shuf_test1:
; SKX: ## BB#0:
@@ -296,7 +295,6 @@ define i8 @shuf_test1(i16 %v) nounwind {
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; SKX-NEXT: retq
-; SKX-NEXT: ## -- End function
;
; AVX512BW-LABEL: shuf_test1:
; AVX512BW: ## BB#0:
@@ -305,7 +303,6 @@ define i8 @shuf_test1(i16 %v) nounwind {
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: retq
-; AVX512BW-NEXT: ## -- End function
;
; AVX512DQ-LABEL: shuf_test1:
; AVX512DQ: ## BB#0:
@@ -314,7 +311,6 @@ define i8 @shuf_test1(i16 %v) nounwind {
; AVX512DQ-NEXT: kmovw %k0, %eax
; AVX512DQ-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512DQ-NEXT: retq
-; AVX512DQ-NEXT: ## -- End function
%v1 = bitcast i16 %v to <16 x i1>
%mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%mask1 = bitcast <8 x i1> %mask to i8
@@ -329,6 +325,7 @@ define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: zext_test1:
@@ -375,6 +372,7 @@ define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
+; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: zext_test2:
@@ -424,6 +422,7 @@ define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: andb $1, %al
; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
+; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: zext_test3:
@@ -516,6 +515,7 @@ define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1
; KNL-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1
; KNL-NEXT: vpmovqd %zmm1, %ymm1
; KNL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
+; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test4:
@@ -611,6 +611,7 @@ define void @test7(<8 x i1> %mask) {
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: testb %al, %al
+; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test7:
@@ -661,8 +662,8 @@ false:
define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
; KNL-LABEL: test8:
; KNL: ## BB#0:
-; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; KNL-NEXT: cmpl %esi, %edi
+; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; KNL-NEXT: jg LBB17_1
; KNL-NEXT: ## BB#2:
; KNL-NEXT: vpcmpltud %zmm2, %zmm1, %k1
@@ -672,12 +673,13 @@ define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
; KNL-NEXT: LBB17_3:
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
+; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test8:
; SKX: ## BB#0:
-; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
; SKX-NEXT: cmpl %esi, %edi
+; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
; SKX-NEXT: jg LBB17_1
; SKX-NEXT: ## BB#2:
; SKX-NEXT: vpcmpltud %zmm2, %zmm1, %k0
@@ -743,6 +745,7 @@ define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) {
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
+; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test9:
@@ -883,6 +886,7 @@ define <16 x i1> @test15(i32 %x, i32 %y) {
; KNL-NEXT: kmovw %ecx, %k1
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
+; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test15:
@@ -1189,6 +1193,7 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
; KNL-NEXT: korw %k1, %k0, %k1
; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovqw %zmm0, %xmm0
+; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test18:
@@ -1289,7 +1294,6 @@ define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone {
; KNL-NEXT: vpsraw $15, %ymm2, %ymm2
; KNL-NEXT: vpand %ymm1, %ymm2, %ymm1
; KNL-NEXT: retq
-; KNL-NEXT: ## -- End function
;
; SKX-LABEL: test21:
; SKX: ## BB#0:
@@ -1297,7 +1301,6 @@ define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone {
; SKX-NEXT: vpmovb2m %ymm1, %k1
; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
; SKX-NEXT: retq
-; SKX-NEXT: ## -- End function
;
; AVX512BW-LABEL: test21:
; AVX512BW: ## BB#0:
@@ -1305,7 +1308,6 @@ define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone {
; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: retq
-; AVX512BW-NEXT: ## -- End function
;
; AVX512DQ-LABEL: test21:
; AVX512DQ: ## BB#0:
@@ -1319,7 +1321,6 @@ define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone {
; AVX512DQ-NEXT: vpsraw $15, %ymm2, %ymm2
; AVX512DQ-NEXT: vpand %ymm1, %ymm2, %ymm1
; AVX512DQ-NEXT: retq
-; AVX512DQ-NEXT: ## -- End function
%ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
ret <32 x i16> %ret
}
@@ -1332,6 +1333,7 @@ define void @test22(<4 x i1> %a, <4 x i1>* %addr) {
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: movb %al, (%rdi)
+; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test22:
@@ -1371,6 +1373,7 @@ define void @test23(<2 x i1> %a, <2 x i1>* %addr) {
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: movb %al, (%rdi)
+; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test23:
@@ -1450,6 +1453,7 @@ define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) {
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: movb %al, (%rdi)
+; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: store_v2i1:
@@ -1494,6 +1498,7 @@ define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) {
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: movb %al, (%rdi)
+; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: store_v4i1:
@@ -1538,6 +1543,7 @@ define void @store_v8i1(<8 x i1> %c , <8 x i1>* %ptr) {
; KNL-NEXT: knotw %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: movb %al, (%rdi)
+; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: store_v8i1:
@@ -1580,6 +1586,7 @@ define void @store_v16i1(<16 x i1> %c , <16 x i1>* %ptr) {
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: knotw %k0, %k0
; KNL-NEXT: kmovw %k0, (%rdi)
+; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: store_v16i1:
@@ -1738,9 +1745,11 @@ define void @ktest_1(<8 x double> %in, double * %base) {
; KNL-NEXT: je LBB41_2
; KNL-NEXT: ## BB#1: ## %L1
; KNL-NEXT: vmovapd %zmm0, (%rdi)
+; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
; KNL-NEXT: LBB41_2: ## %L2
; KNL-NEXT: vmovapd %zmm0, 8(%rdi)
+; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: ktest_1:
@@ -1835,73 +1844,9 @@ define void @ktest_2(<32 x float> %in, float * %base) {
; KNL-NEXT: .cfi_def_cfa_register %rbp
; KNL-NEXT: andq $-32, %rsp
; KNL-NEXT: subq $32, %rsp
-; KNL-NEXT: vmovups 64(%rdi), %zmm2
-; KNL-NEXT: vcmpltps %zmm1, %zmm2, %k2
-; KNL-NEXT: kshiftlw $14, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: kshiftlw $15, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: vmovd %ecx, %xmm2
-; KNL-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; KNL-NEXT: kshiftlw $13, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; KNL-NEXT: kshiftlw $12, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; KNL-NEXT: kshiftlw $11, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; KNL-NEXT: kshiftlw $10, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; KNL-NEXT: kshiftlw $9, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; KNL-NEXT: kshiftlw $8, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; KNL-NEXT: kshiftlw $7, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; KNL-NEXT: kshiftlw $6, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; KNL-NEXT: kshiftlw $5, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; KNL-NEXT: kshiftlw $4, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; KNL-NEXT: kshiftlw $3, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; KNL-NEXT: kshiftlw $2, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; KNL-NEXT: kshiftlw $1, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; KNL-NEXT: kshiftrw $15, %k2, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; KNL-NEXT: vmovups (%rdi), %zmm3
-; KNL-NEXT: vcmpltps %zmm0, %zmm3, %k1
+; KNL-NEXT: vmovups (%rdi), %zmm2
+; KNL-NEXT: vmovups 64(%rdi), %zmm3
+; KNL-NEXT: vcmpltps %zmm1, %zmm3, %k1
; KNL-NEXT: kshiftlw $14, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
@@ -1965,138 +1910,202 @@ define void @ktest_2(<32 x float> %in, float * %base) {
; KNL-NEXT: kshiftrw $15, %k1, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
-; KNL-NEXT: vmovups 68(%rdi), %zmm4 {%k2} {z}
+; KNL-NEXT: vcmpltps %zmm0, %zmm2, %k2
+; KNL-NEXT: kshiftlw $14, %k2, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: kshiftlw $15, %k2, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: kmovw %k0, %ecx
+; KNL-NEXT: vmovd %ecx, %xmm2
+; KNL-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; KNL-NEXT: kshiftlw $13, %k2, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; KNL-NEXT: kshiftlw $12, %k2, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; KNL-NEXT: kshiftlw $11, %k2, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; KNL-NEXT: kshiftlw $10, %k2, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; KNL-NEXT: kshiftlw $9, %k2, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; KNL-NEXT: kshiftlw $8, %k2, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; KNL-NEXT: kshiftlw $7, %k2, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; KNL-NEXT: kshiftlw $6, %k2, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; KNL-NEXT: kshiftlw $5, %k2, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; KNL-NEXT: kshiftlw $4, %k2, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; KNL-NEXT: kshiftlw $3, %k2, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; KNL-NEXT: kshiftlw $2, %k2, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
+; KNL-NEXT: kshiftlw $1, %k2, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
+; KNL-NEXT: kshiftrw $15, %k2, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; KNL-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
+; KNL-NEXT: vmovups 4(%rdi), %zmm3 {%k2} {z}
+; KNL-NEXT: vmovups 68(%rdi), %zmm4 {%k1} {z}
; KNL-NEXT: vcmpltps %zmm4, %zmm1, %k0
-; KNL-NEXT: kshiftlw $14, %k0, %k2
-; KNL-NEXT: kshiftrw $15, %k2, %k2
-; KNL-NEXT: kmovw %k2, %eax
-; KNL-NEXT: kshiftlw $15, %k0, %k2
-; KNL-NEXT: kshiftrw $15, %k2, %k2
-; KNL-NEXT: kmovw %k2, %ecx
+; KNL-NEXT: kshiftlw $14, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: kshiftlw $15, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: vmovd %ecx, %xmm4
; KNL-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
-; KNL-NEXT: kshiftlw $13, %k0, %k2
-; KNL-NEXT: kshiftrw $15, %k2, %k2
-; KNL-NEXT: kmovw %k2, %eax
+; KNL-NEXT: kshiftlw $13, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
-; KNL-NEXT: kshiftlw $12, %k0, %k2
-; KNL-NEXT: kshiftrw $15, %k2, %k2
-; KNL-NEXT: kmovw %k2, %eax
+; KNL-NEXT: kshiftlw $12, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
-; KNL-NEXT: kshiftlw $11, %k0, %k2
-; KNL-NEXT: kshiftrw $15, %k2, %k2
-; KNL-NEXT: kmovw %k2, %eax
+; KNL-NEXT: kshiftlw $11, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
-; KNL-NEXT: kshiftlw $10, %k0, %k2
-; KNL-NEXT: kshiftrw $15, %k2, %k2
-; KNL-NEXT: kmovw %k2, %eax
+; KNL-NEXT: kshiftlw $10, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
-; KNL-NEXT: kshiftlw $9, %k0, %k2
-; KNL-NEXT: kshiftrw $15, %k2, %k2
-; KNL-NEXT: kmovw %k2, %eax
+; KNL-NEXT: kshiftlw $9, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
-; KNL-NEXT: kshiftlw $8, %k0, %k2
-; KNL-NEXT: kshiftrw $15, %k2, %k2
-; KNL-NEXT: kmovw %k2, %eax
+; KNL-NEXT: kshiftlw $8, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
-; KNL-NEXT: kshiftlw $7, %k0, %k2
-; KNL-NEXT: kshiftrw $15, %k2, %k2
-; KNL-NEXT: kmovw %k2, %eax
+; KNL-NEXT: kshiftlw $7, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
-; KNL-NEXT: kshiftlw $6, %k0, %k2
-; KNL-NEXT: kshiftrw $15, %k2, %k2
-; KNL-NEXT: kmovw %k2, %eax
+; KNL-NEXT: kshiftlw $6, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
-; KNL-NEXT: kshiftlw $5, %k0, %k2
-; KNL-NEXT: kshiftrw $15, %k2, %k2
-; KNL-NEXT: kmovw %k2, %eax
+; KNL-NEXT: kshiftlw $5, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
-; KNL-NEXT: kshiftlw $4, %k0, %k2
-; KNL-NEXT: kshiftrw $15, %k2, %k2
-; KNL-NEXT: kmovw %k2, %eax
+; KNL-NEXT: kshiftlw $4, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
-; KNL-NEXT: kshiftlw $3, %k0, %k2
-; KNL-NEXT: kshiftrw $15, %k2, %k2
-; KNL-NEXT: kmovw %k2, %eax
+; KNL-NEXT: kshiftlw $3, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
-; KNL-NEXT: kshiftlw $2, %k0, %k2
-; KNL-NEXT: kshiftrw $15, %k2, %k2
-; KNL-NEXT: kmovw %k2, %eax
+; KNL-NEXT: kshiftlw $2, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4
-; KNL-NEXT: kshiftlw $1, %k0, %k2
-; KNL-NEXT: kshiftrw $15, %k2, %k2
-; KNL-NEXT: kmovw %k2, %eax
+; KNL-NEXT: kshiftlw $1, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4
-; KNL-NEXT: vmovups 4(%rdi), %zmm5 {%k1} {z}
-; KNL-NEXT: vcmpltps %zmm5, %zmm0, %k0
+; KNL-NEXT: vcmpltps %zmm3, %zmm0, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vmovd %ecx, %xmm5
-; KNL-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5
+; KNL-NEXT: vmovd %ecx, %xmm3
+; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $13, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5
+; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $12, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5
+; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $11, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5
+; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $10, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5
+; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $9, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5
+; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $8, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5
+; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $7, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5
+; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $6, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5
+; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $5, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
+; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $4, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5
+; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $3, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
+; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5
+; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5
+; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $15, %eax, %xmm5, %xmm5
-; KNL-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2
-; KNL-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm3
+; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
+; KNL-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3
; KNL-NEXT: vpor %ymm3, %ymm2, %ymm2
; KNL-NEXT: vextracti128 $1, %ymm2, %xmm3
; KNL-NEXT: vpmovsxbd %xmm3, %zmm3
@@ -2119,6 +2128,7 @@ define void @ktest_2(<32 x float> %in, float * %base) {
; KNL-NEXT: LBB42_3: ## %End
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
+; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: ktest_2:
@@ -2565,6 +2575,7 @@ define <2 x i16> @load_2i1(<2 x i1>* %a) {
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
+; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: load_2i1:
@@ -2602,6 +2613,7 @@ define <4 x i16> @load_4i1(<4 x i1>* %a) {
; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovqd %zmm0, %ymm0
; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
+; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: load_4i1:
@@ -2730,6 +2742,7 @@ define void @store_8i1(<8 x i1>* %a, <8 x i1> %v) {
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: movb %al, (%rdi)
+; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: store_8i1:
@@ -2768,6 +2781,7 @@ define void @store_8i1_1(<8 x i1>* %a, <8 x i16> %v) {
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: movb %al, (%rdi)
+; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: store_8i1_1:
@@ -2806,6 +2820,7 @@ define void @store_16i1(<16 x i1>* %a, <16 x i1> %v) {
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, (%rdi)
+; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: store_16i1:
@@ -2847,6 +2862,7 @@ define void @store_32i1(<32 x i1>* %a, <32 x i1> %v) {
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, (%rdi)
+; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: store_32i1:
@@ -2897,6 +2913,7 @@ define void @store_32i1_1(<32 x i1>* %a, <32 x i16> %v) {
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, (%rdi)
+; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: store_32i1_1:
@@ -2941,6 +2958,36 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
;
; KNL-LABEL: store_64i1:
; KNL: ## BB#0:
+; KNL-NEXT: pushq %rbp
+; KNL-NEXT: Lcfi9:
+; KNL-NEXT: .cfi_def_cfa_offset 16
+; KNL-NEXT: pushq %r15
+; KNL-NEXT: Lcfi10:
+; KNL-NEXT: .cfi_def_cfa_offset 24
+; KNL-NEXT: pushq %r14
+; KNL-NEXT: Lcfi11:
+; KNL-NEXT: .cfi_def_cfa_offset 32
+; KNL-NEXT: pushq %r13
+; KNL-NEXT: Lcfi12:
+; KNL-NEXT: .cfi_def_cfa_offset 40
+; KNL-NEXT: pushq %r12
+; KNL-NEXT: Lcfi13:
+; KNL-NEXT: .cfi_def_cfa_offset 48
+; KNL-NEXT: pushq %rbx
+; KNL-NEXT: Lcfi14:
+; KNL-NEXT: .cfi_def_cfa_offset 56
+; KNL-NEXT: Lcfi15:
+; KNL-NEXT: .cfi_offset %rbx, -56
+; KNL-NEXT: Lcfi16:
+; KNL-NEXT: .cfi_offset %r12, -48
+; KNL-NEXT: Lcfi17:
+; KNL-NEXT: .cfi_offset %r13, -40
+; KNL-NEXT: Lcfi18:
+; KNL-NEXT: .cfi_offset %r14, -32
+; KNL-NEXT: Lcfi19:
+; KNL-NEXT: .cfi_offset %r15, -24
+; KNL-NEXT: Lcfi20:
+; KNL-NEXT: .cfi_offset %rbp, -16
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
@@ -2952,66 +2999,66 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: kmovw %k1, %r8d
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: kmovw %k1, %r9d
; KNL-NEXT: kshiftlw $13, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %edx
+; KNL-NEXT: kmovw %k1, %r10d
; KNL-NEXT: kshiftlw $12, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vmovd %ecx, %xmm3
-; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: kmovw %k1, %r11d
; KNL-NEXT: kshiftlw $11, %k0, %k1
-; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: kmovw %k1, %r14d
; KNL-NEXT: kshiftlw $10, %k0, %k1
-; KNL-NEXT: vpinsrb $2, %edx, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %edx
+; KNL-NEXT: kmovw %k1, %r15d
; KNL-NEXT: kshiftlw $9, %k0, %k1
-; KNL-NEXT: vpinsrb $3, %ecx, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: kmovw %k1, %r12d
; KNL-NEXT: kshiftlw $8, %k0, %k1
-; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: kmovw %k1, %r13d
; KNL-NEXT: kshiftlw $7, %k0, %k1
-; KNL-NEXT: vpinsrb $5, %edx, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %edx
+; KNL-NEXT: kmovw %k1, %ebx
; KNL-NEXT: kshiftlw $6, %k0, %k1
-; KNL-NEXT: vpinsrb $6, %ecx, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: kmovw %k1, %ebp
; KNL-NEXT: kshiftlw $5, %k0, %k1
-; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $4, %k0, %k1
-; KNL-NEXT: vpinsrb $8, %edx, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %edx
+; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $3, %k0, %k1
-; KNL-NEXT: vpinsrb $9, %ecx, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: kmovw %k1, %edx
; KNL-NEXT: kshiftlw $2, %k0, %k1
-; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: kmovw %k1, %esi
; KNL-NEXT: kshiftlw $1, %k0, %k1
-; KNL-NEXT: vpinsrb $11, %edx, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %edx
+; KNL-NEXT: vmovd %r9d, %xmm3
+; KNL-NEXT: kmovw %k1, %r9d
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k2
-; KNL-NEXT: vpinsrb $12, %ecx, %xmm3, %xmm2
-; KNL-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $14, %edx, %xmm2, %xmm2
; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $1, %r8d, %xmm3, %xmm2
+; KNL-NEXT: vpinsrb $2, %r10d, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $3, %r11d, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $4, %r14d, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $5, %r15d, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $6, %r12d, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $7, %r13d, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $8, %ebx, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $9, %ebp, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $12, %edx, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $13, %esi, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $14, %r9d, %xmm2, %xmm2
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
@@ -3020,66 +3067,66 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
; KNL-NEXT: kmovw %k0, 6(%rdi)
; KNL-NEXT: kshiftlw $14, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: kmovw %k0, %r8d
; KNL-NEXT: kshiftlw $15, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
+; KNL-NEXT: kmovw %k0, %r10d
; KNL-NEXT: kshiftlw $13, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %edx
+; KNL-NEXT: kmovw %k0, %r9d
; KNL-NEXT: kshiftlw $12, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vmovd %ecx, %xmm2
-; KNL-NEXT: kmovw %k0, %ecx
+; KNL-NEXT: kmovw %k0, %r11d
; KNL-NEXT: kshiftlw $11, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: kmovw %k0, %r14d
; KNL-NEXT: kshiftlw $10, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $2, %edx, %xmm2, %xmm2
-; KNL-NEXT: kmovw %k0, %edx
+; KNL-NEXT: kmovw %k0, %r15d
; KNL-NEXT: kshiftlw $9, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $3, %ecx, %xmm2, %xmm2
-; KNL-NEXT: kmovw %k0, %ecx
+; KNL-NEXT: kmovw %k0, %r12d
; KNL-NEXT: kshiftlw $8, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: kmovw %k0, %r13d
; KNL-NEXT: kshiftlw $7, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $5, %edx, %xmm2, %xmm2
-; KNL-NEXT: kmovw %k0, %edx
+; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: kshiftlw $6, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $6, %ecx, %xmm2, %xmm2
-; KNL-NEXT: kmovw %k0, %ecx
+; KNL-NEXT: kmovw %k0, %esi
; KNL-NEXT: kshiftlw $5, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: kmovw %k0, %ebp
; KNL-NEXT: kshiftlw $4, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $8, %edx, %xmm2, %xmm2
-; KNL-NEXT: kmovw %k0, %edx
+; KNL-NEXT: kmovw %k0, %ebx
; KNL-NEXT: kshiftlw $3, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $9, %ecx, %xmm2, %xmm2
-; KNL-NEXT: kmovw %k0, %ecx
+; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $2, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: kmovw %k0, %edx
; KNL-NEXT: kshiftlw $1, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $11, %edx, %xmm2, %xmm2
-; KNL-NEXT: kmovw %k0, %edx
+; KNL-NEXT: vmovd %r10d, %xmm2
+; KNL-NEXT: kmovw %k0, %r10d
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
; KNL-NEXT: kshiftrw $15, %k2, %k0
-; KNL-NEXT: vpinsrb $12, %ecx, %xmm2, %xmm1
-; KNL-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $14, %edx, %xmm1, %xmm1
+; KNL-NEXT: vpinsrb $1, %r8d, %xmm2, %xmm1
+; KNL-NEXT: vpinsrb $2, %r9d, %xmm1, %xmm1
+; KNL-NEXT: vpinsrb $3, %r11d, %xmm1, %xmm1
+; KNL-NEXT: vpinsrb $4, %r14d, %xmm1, %xmm1
+; KNL-NEXT: vpinsrb $5, %r15d, %xmm1, %xmm1
+; KNL-NEXT: vpinsrb $6, %r12d, %xmm1, %xmm1
+; KNL-NEXT: vpinsrb $7, %r13d, %xmm1, %xmm1
+; KNL-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
+; KNL-NEXT: vpinsrb $9, %esi, %xmm1, %xmm1
+; KNL-NEXT: vpinsrb $10, %ebp, %xmm1, %xmm1
+; KNL-NEXT: vpinsrb $11, %ebx, %xmm1, %xmm1
+; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; KNL-NEXT: vpinsrb $13, %edx, %xmm1, %xmm1
+; KNL-NEXT: vpinsrb $14, %r10d, %xmm1, %xmm1
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
@@ -3088,139 +3135,146 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
; KNL-NEXT: kmovw %k0, 4(%rdi)
; KNL-NEXT: kshiftlw $14, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: kmovw %k0, %r8d
; KNL-NEXT: kshiftlw $15, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
+; KNL-NEXT: kmovw %k0, %r10d
; KNL-NEXT: kshiftlw $13, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %edx
+; KNL-NEXT: kmovw %k0, %r9d
; KNL-NEXT: kshiftlw $12, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vmovd %ecx, %xmm1
-; KNL-NEXT: kmovw %k0, %ecx
+; KNL-NEXT: kmovw %k0, %r11d
; KNL-NEXT: kshiftlw $11, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: kmovw %k0, %r14d
; KNL-NEXT: kshiftlw $10, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $2, %edx, %xmm1, %xmm1
-; KNL-NEXT: kmovw %k0, %edx
+; KNL-NEXT: kmovw %k0, %r15d
; KNL-NEXT: kshiftlw $9, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1
-; KNL-NEXT: kmovw %k0, %ecx
+; KNL-NEXT: kmovw %k0, %r12d
; KNL-NEXT: kshiftlw $8, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: kmovw %k0, %r13d
; KNL-NEXT: kshiftlw $7, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $5, %edx, %xmm1, %xmm1
-; KNL-NEXT: kmovw %k0, %edx
+; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: kshiftlw $6, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $6, %ecx, %xmm1, %xmm1
-; KNL-NEXT: kmovw %k0, %ecx
+; KNL-NEXT: kmovw %k0, %esi
; KNL-NEXT: kshiftlw $5, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
-; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: kmovw %k0, %ebp
; KNL-NEXT: kshiftlw $4, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $8, %edx, %xmm1, %xmm1
-; KNL-NEXT: kmovw %k0, %edx
+; KNL-NEXT: kmovw %k0, %ebx
; KNL-NEXT: kshiftlw $3, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $9, %ecx, %xmm1, %xmm1
-; KNL-NEXT: kmovw %k0, %ecx
+; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $2, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
-; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: kmovw %k0, %edx
; KNL-NEXT: kshiftlw $1, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $11, %edx, %xmm1, %xmm1
-; KNL-NEXT: kmovw %k0, %edx
+; KNL-NEXT: vmovd %r10d, %xmm1
+; KNL-NEXT: kmovw %k0, %r10d
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $12, %ecx, %xmm1, %xmm0
-; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: vpinsrb $1, %r8d, %xmm1, %xmm0
+; KNL-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $14, %r10d, %xmm0, %xmm0
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
+; KNL-NEXT: vpslld $31, %zmm0, %zmm0
+; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
+; KNL-NEXT: kmovw %k1, 2(%rdi)
; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: kmovw %k1, %r8d
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $14, %edx, %xmm0, %xmm0
-; KNL-NEXT: kmovw %k1, %edx
+; KNL-NEXT: kmovw %k1, %r9d
; KNL-NEXT: kshiftlw $13, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $15, %ecx, %xmm0, %xmm0
-; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: kmovw %k1, %r10d
; KNL-NEXT: kshiftlw $12, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vmovd %edx, %xmm1
-; KNL-NEXT: kmovw %k1, %edx
+; KNL-NEXT: kmovw %k1, %r11d
; KNL-NEXT: kshiftlw $11, %k0, %k1
-; KNL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: kmovw %k1, %r14d
; KNL-NEXT: kshiftlw $10, %k0, %k1
-; KNL-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: kmovw %k1, %r15d
; KNL-NEXT: kshiftlw $9, %k0, %k1
-; KNL-NEXT: vpinsrb $3, %edx, %xmm1, %xmm1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %edx
+; KNL-NEXT: kmovw %k1, %r12d
; KNL-NEXT: kshiftlw $8, %k0, %k1
-; KNL-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: kmovw %k1, %r13d
; KNL-NEXT: kshiftlw $7, %k0, %k1
-; KNL-NEXT: vpinsrb $5, %ecx, %xmm1, %xmm1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: kmovw %k1, %edx
; KNL-NEXT: kshiftlw $6, %k0, %k1
-; KNL-NEXT: vpinsrb $6, %edx, %xmm1, %xmm1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %edx
+; KNL-NEXT: kmovw %k1, %esi
; KNL-NEXT: kshiftlw $5, %k0, %k1
-; KNL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: kmovw %k1, %ebp
; KNL-NEXT: kshiftlw $4, %k0, %k1
-; KNL-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: kmovw %k1, %ebx
; KNL-NEXT: kshiftlw $3, %k0, %k1
-; KNL-NEXT: vpinsrb $9, %edx, %xmm1, %xmm1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %edx
+; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $2, %k0, %k1
-; KNL-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $1, %k0, %k1
-; KNL-NEXT: vpinsrb $11, %ecx, %xmm1, %xmm1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vpinsrb $12, %edx, %xmm1, %xmm1
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vpslld $31, %zmm0, %zmm0
-; KNL-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
+; KNL-NEXT: vmovd %r9d, %xmm0
+; KNL-NEXT: kmovw %k1, %r9d
+; KNL-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $2, %r10d, %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $8, %edx, %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $14, %r9d, %xmm0, %xmm0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: vpinsrb $14, %ecx, %xmm1, %xmm0
; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; KNL-NEXT: kmovw %k0, 2(%rdi)
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, (%rdi)
+; KNL-NEXT: popq %rbx
+; KNL-NEXT: popq %r12
+; KNL-NEXT: popq %r13
+; KNL-NEXT: popq %r14
+; KNL-NEXT: popq %r15
+; KNL-NEXT: popq %rbp
+; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: store_64i1:
@@ -3571,6 +3625,7 @@ define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) {
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: movzbl %al, %eax
; KNL-NEXT: addl %eax, %eax
+; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test_bitcast_v8i1_zext:
@@ -3609,40 +3664,14 @@ define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) {
}
define i32 @test_bitcast_v16i1_zext(<16 x i32> %a) {
-; KNL-LABEL: test_bitcast_v16i1_zext:
-; KNL: ## BB#0:
-; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: addl %eax, %eax
-; KNL-NEXT: retq
-;
-; SKX-LABEL: test_bitcast_v16i1_zext:
-; SKX: ## BB#0:
-; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; SKX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
-; SKX-NEXT: kmovw %k0, %eax
-; SKX-NEXT: addl %eax, %eax
-; SKX-NEXT: vzeroupper
-; SKX-NEXT: retq
-;
-; AVX512BW-LABEL: test_bitcast_v16i1_zext:
-; AVX512BW: ## BB#0:
-; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512BW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
-; AVX512BW-NEXT: kmovw %k0, %eax
-; AVX512BW-NEXT: addl %eax, %eax
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: test_bitcast_v16i1_zext:
-; AVX512DQ: ## BB#0:
-; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512DQ-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: addl %eax, %eax
-; AVX512DQ-NEXT: vzeroupper
-; AVX512DQ-NEXT: retq
+; CHECK-LABEL: test_bitcast_v16i1_zext:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: addl %eax, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
%v1 = icmp eq <16 x i32> %a, zeroinitializer
%mask1 = bitcast <16 x i1> %v1 to i16
%val = zext i16 %mask1 to i32
OpenPOWER on IntegriCloud