summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp10
-rw-r--r--llvm/test/CodeGen/X86/avx512-ext.ll33
-rwxr-xr-xllvm/test/CodeGen/X86/avx512-schedule.ll6
-rw-r--r--llvm/test/CodeGen/X86/bitcast-and-setcc-128.ll5
-rw-r--r--llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll10
-rw-r--r--llvm/test/CodeGen/X86/bitcast-setcc-128.ll5
-rw-r--r--llvm/test/CodeGen/X86/bitcast-setcc-256.ll10
-rw-r--r--llvm/test/CodeGen/X86/broadcastm-lowering.ll12
-rw-r--r--llvm/test/CodeGen/X86/movmsk-cmp.ll170
9 files changed, 61 insertions, 200 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4056b4982b0..67f2929dae7 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -32737,9 +32737,17 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
if (!VT.isScalarInteger() || !VecVT.isSimple())
return SDValue();
+ // If the input is a truncate from v16i8 or v32i8 go ahead and use a
+ // movmskb even with avx512. This will be better than truncating to vXi1 and
+ // using a kmov. This can especially help KNL if the input is a v16i8/v32i8
+ // vpcmpeqb/vpcmpgtb.
+ bool IsTruncated = N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
+ (N0.getOperand(0).getValueType() == MVT::v16i8 ||
+ N0.getOperand(0).getValueType() == MVT::v32i8);
+
// With AVX512 vxi1 types are legal and we prefer using k-regs.
// MOVMSK is supported in SSE2 or later.
- if (Subtarget.hasAVX512() || !Subtarget.hasSSE2())
+ if (!Subtarget.hasSSE2() || (Subtarget.hasAVX512() && !IsTruncated))
return SDValue();
// There are MOVMSK flavors for types v16i8, v32i8, v4f32, v8f32, v4f64 and
diff --git a/llvm/test/CodeGen/X86/avx512-ext.ll b/llvm/test/CodeGen/X86/avx512-ext.ll
index 2381180af76..072e3c8bdae 100644
--- a/llvm/test/CodeGen/X86/avx512-ext.ll
+++ b/llvm/test/CodeGen/X86/avx512-ext.ll
@@ -1644,33 +1644,12 @@ define <8 x i64> @zext_8i1_to_8xi64(i8 %b) {
}
define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
-; KNL-LABEL: trunc_16i8_to_16i1:
-; KNL: # %bb.0:
-; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; KNL-NEXT: vpslld $31, %zmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: # kill: def $ax killed $ax killed $eax
-; KNL-NEXT: vzeroupper
-; KNL-NEXT: retq
-;
-; SKX-LABEL: trunc_16i8_to_16i1:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
-; SKX-NEXT: vpmovb2m %xmm0, %k0
-; SKX-NEXT: kmovd %k0, %eax
-; SKX-NEXT: # kill: def $ax killed $ax killed $eax
-; SKX-NEXT: retq
-;
-; AVX512DQNOBW-LABEL: trunc_16i8_to_16i1:
-; AVX512DQNOBW: # %bb.0:
-; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0
-; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k0
-; AVX512DQNOBW-NEXT: kmovw %k0, %eax
-; AVX512DQNOBW-NEXT: # kill: def $ax killed $ax killed $eax
-; AVX512DQNOBW-NEXT: vzeroupper
-; AVX512DQNOBW-NEXT: retq
+; ALL-LABEL: trunc_16i8_to_16i1:
+; ALL: # %bb.0:
+; ALL-NEXT: vpsllw $7, %xmm0, %xmm0
+; ALL-NEXT: vpmovmskb %xmm0, %eax
+; ALL-NEXT: # kill: def $ax killed $ax killed $eax
+; ALL-NEXT: retq
%mask_b = trunc <16 x i8>%a to <16 x i1>
%mask = bitcast <16 x i1> %mask_b to i16
ret i16 %mask
diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll
index 5c44d8679b9..2c9b6e13481 100755
--- a/llvm/test/CodeGen/X86/avx512-schedule.ll
+++ b/llvm/test/CodeGen/X86/avx512-schedule.ll
@@ -4285,16 +4285,14 @@ define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
; GENERIC-LABEL: trunc_16i8_to_16i1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
+; GENERIC-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00]
; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: trunc_16i8_to_16i1:
; SKX: # %bb.0:
; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
+; SKX-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00]
; SKX-NEXT: # kill: def $ax killed $ax killed $eax
; SKX-NEXT: retq # sched: [7:1.00]
%mask_b = trunc <16 x i8>%a to <16 x i1>
diff --git a/llvm/test/CodeGen/X86/bitcast-and-setcc-128.ll b/llvm/test/CodeGen/X86/bitcast-and-setcc-128.ll
index 0a2e154f5b9..289ddcb194b 100644
--- a/llvm/test/CodeGen/X86/bitcast-and-setcc-128.ll
+++ b/llvm/test/CodeGen/X86/bitcast-and-setcc-128.ll
@@ -159,11 +159,8 @@ define i16 @v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
; AVX512F-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm1
; AVX512F-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: vpmovmskb %xmm0, %eax
; AVX512F-NEXT: # kill: def $ax killed $ax killed $eax
-; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v16i8:
diff --git a/llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll b/llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll
index 0f3b8c94540..426cabe5f4c 100644
--- a/llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll
+++ b/llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll
@@ -399,15 +399,7 @@ define i32 @v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <32 x i8> %d) {
; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm1
; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm1
-; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: kmovw %k0, %ecx
-; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: shll $16, %eax
-; AVX512F-NEXT: orl %ecx, %eax
+; AVX512F-NEXT: vpmovmskb %ymm0, %eax
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/bitcast-setcc-128.ll b/llvm/test/CodeGen/X86/bitcast-setcc-128.ll
index f803901c0e7..fb585974e5b 100644
--- a/llvm/test/CodeGen/X86/bitcast-setcc-128.ll
+++ b/llvm/test/CodeGen/X86/bitcast-setcc-128.ll
@@ -128,11 +128,8 @@ define i16 @v16i8(<16 x i8> %a, <16 x i8> %b) {
; AVX512F-LABEL: v16i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
-; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: vpmovmskb %xmm0, %eax
; AVX512F-NEXT: # kill: def $ax killed $ax killed $eax
-; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v16i8:
diff --git a/llvm/test/CodeGen/X86/bitcast-setcc-256.ll b/llvm/test/CodeGen/X86/bitcast-setcc-256.ll
index d349ae3bc40..b0af971366c 100644
--- a/llvm/test/CodeGen/X86/bitcast-setcc-256.ll
+++ b/llvm/test/CodeGen/X86/bitcast-setcc-256.ll
@@ -184,15 +184,7 @@ define i32 @v32i8(<32 x i8> %a, <32 x i8> %b) {
; AVX512F-LABEL: v32i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm1
-; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: kmovw %k0, %ecx
-; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: shll $16, %eax
-; AVX512F-NEXT: orl %ecx, %eax
+; AVX512F-NEXT: vpmovmskb %ymm0, %eax
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/broadcastm-lowering.ll b/llvm/test/CodeGen/X86/broadcastm-lowering.ll
index d25a4da9843..986d313cb8d 100644
--- a/llvm/test/CodeGen/X86/broadcastm-lowering.ll
+++ b/llvm/test/CodeGen/X86/broadcastm-lowering.ll
@@ -43,15 +43,9 @@ define <4 x i32> @test_mm_epi32(<16 x i8> %a, <16 x i8> %b) {
; AVX512CD-LABEL: test_mm_epi32:
; AVX512CD: # %bb.0: # %entry
; AVX512CD-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX512CD-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512CD-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512CD-NEXT: kmovw %k0, %eax
-; AVX512CD-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; AVX512CD-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
-; AVX512CD-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
-; AVX512CD-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; AVX512CD-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
-; AVX512CD-NEXT: vzeroupper
+; AVX512CD-NEXT: vpmovmskb %xmm0, %eax
+; AVX512CD-NEXT: vmovd %eax, %xmm0
+; AVX512CD-NEXT: vpbroadcastd %xmm0, %xmm0
; AVX512CD-NEXT: retq
;
; AVX512VLCDBW-LABEL: test_mm_epi32:
diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll
index 718ade02435..93d86b0e11d 100644
--- a/llvm/test/CodeGen/X86/movmsk-cmp.ll
+++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll
@@ -22,13 +22,9 @@ define i1 @allones_v16i8_sign(<16 x i8> %arg) {
;
; KNL-LABEL: allones_v16i8_sign:
; KNL: # %bb.0:
-; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; KNL-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kortestw %k0, %k0
-; KNL-NEXT: setb %al
-; KNL-NEXT: vzeroupper
+; KNL-NEXT: vpmovmskb %xmm0, %eax
+; KNL-NEXT: cmpw $-1, %ax
+; KNL-NEXT: sete %al
; KNL-NEXT: retq
;
; SKX-LABEL: allones_v16i8_sign:
@@ -60,13 +56,9 @@ define i1 @allzeros_v16i8_sign(<16 x i8> %arg) {
;
; KNL-LABEL: allzeros_v16i8_sign:
; KNL: # %bb.0:
-; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; KNL-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kortestw %k0, %k0
+; KNL-NEXT: vpmovmskb %xmm0, %eax
+; KNL-NEXT: testw %ax, %ax
; KNL-NEXT: sete %al
-; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: allzeros_v16i8_sign:
@@ -117,18 +109,8 @@ define i1 @allones_v32i8_sign(<32 x i8> %arg) {
;
; KNL-LABEL: allones_v32i8_sign:
; KNL: # %bb.0:
-; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; KNL-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: shll $16, %ecx
-; KNL-NEXT: orl %eax, %ecx
-; KNL-NEXT: cmpl $-1, %ecx
+; KNL-NEXT: vpmovmskb %ymm0, %eax
+; KNL-NEXT: cmpl $-1, %eax
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
@@ -180,17 +162,8 @@ define i1 @allzeros_v32i8_sign(<32 x i8> %arg) {
;
; KNL-LABEL: allzeros_v32i8_sign:
; KNL: # %bb.0:
-; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; KNL-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: shll $16, %ecx
-; KNL-NEXT: orl %eax, %ecx
+; KNL-NEXT: vpmovmskb %ymm0, %eax
+; KNL-NEXT: testl %eax, %eax
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
@@ -1438,14 +1411,10 @@ define i1 @allones_v16i8_and1(<16 x i8> %arg) {
;
; KNL-LABEL: allones_v16i8_and1:
; KNL: # %bb.0:
-; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
-; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kortestw %k0, %k0
-; KNL-NEXT: setb %al
-; KNL-NEXT: vzeroupper
+; KNL-NEXT: vpsllw $7, %xmm0, %xmm0
+; KNL-NEXT: vpmovmskb %xmm0, %eax
+; KNL-NEXT: cmpw $-1, %ax
+; KNL-NEXT: sete %al
; KNL-NEXT: retq
;
; SKX-LABEL: allones_v16i8_and1:
@@ -1480,14 +1449,10 @@ define i1 @allzeros_v16i8_and1(<16 x i8> %arg) {
;
; KNL-LABEL: allzeros_v16i8_and1:
; KNL: # %bb.0:
-; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
-; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kortestw %k0, %k0
+; KNL-NEXT: vpsllw $7, %xmm0, %xmm0
+; KNL-NEXT: vpmovmskb %xmm0, %eax
+; KNL-NEXT: testw %ax, %ax
; KNL-NEXT: sete %al
-; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: allzeros_v16i8_and1:
@@ -1546,19 +1511,9 @@ define i1 @allones_v32i8_and1(<32 x i8> %arg) {
;
; KNL-LABEL: allones_v32i8_and1:
; KNL: # %bb.0:
-; KNL-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
-; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: shll $16, %ecx
-; KNL-NEXT: orl %eax, %ecx
-; KNL-NEXT: cmpl $-1, %ecx
+; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
+; KNL-NEXT: vpmovmskb %ymm0, %eax
+; KNL-NEXT: cmpl $-1, %eax
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
@@ -1618,18 +1573,9 @@ define i1 @allzeros_v32i8_and1(<32 x i8> %arg) {
;
; KNL-LABEL: allzeros_v32i8_and1:
; KNL: # %bb.0:
-; KNL-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
-; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: shll $16, %ecx
-; KNL-NEXT: orl %eax, %ecx
+; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
+; KNL-NEXT: vpmovmskb %ymm0, %eax
+; KNL-NEXT: testl %eax, %eax
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
@@ -3102,14 +3048,10 @@ define i1 @allones_v16i8_and4(<16 x i8> %arg) {
;
; KNL-LABEL: allones_v16i8_and4:
; KNL: # %bb.0:
-; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
-; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kortestw %k0, %k0
-; KNL-NEXT: setb %al
-; KNL-NEXT: vzeroupper
+; KNL-NEXT: vpsllw $5, %xmm0, %xmm0
+; KNL-NEXT: vpmovmskb %xmm0, %eax
+; KNL-NEXT: cmpw $-1, %ax
+; KNL-NEXT: sete %al
; KNL-NEXT: retq
;
; SKX-LABEL: allones_v16i8_and4:
@@ -3144,14 +3086,10 @@ define i1 @allzeros_v16i8_and4(<16 x i8> %arg) {
;
; KNL-LABEL: allzeros_v16i8_and4:
; KNL: # %bb.0:
-; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
-; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kortestw %k0, %k0
+; KNL-NEXT: vpsllw $5, %xmm0, %xmm0
+; KNL-NEXT: vpmovmskb %xmm0, %eax
+; KNL-NEXT: testw %ax, %ax
; KNL-NEXT: sete %al
-; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: allzeros_v16i8_and4:
@@ -3210,19 +3148,9 @@ define i1 @allones_v32i8_and4(<32 x i8> %arg) {
;
; KNL-LABEL: allones_v32i8_and4:
; KNL: # %bb.0:
-; KNL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
-; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: shll $16, %ecx
-; KNL-NEXT: orl %eax, %ecx
-; KNL-NEXT: cmpl $-1, %ecx
+; KNL-NEXT: vpsllw $5, %ymm0, %ymm0
+; KNL-NEXT: vpmovmskb %ymm0, %eax
+; KNL-NEXT: cmpl $-1, %eax
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
@@ -3282,18 +3210,9 @@ define i1 @allzeros_v32i8_and4(<32 x i8> %arg) {
;
; KNL-LABEL: allzeros_v32i8_and4:
; KNL: # %bb.0:
-; KNL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
-; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: shll $16, %ecx
-; KNL-NEXT: orl %eax, %ecx
+; KNL-NEXT: vpsllw $5, %ymm0, %ymm0
+; KNL-NEXT: vpmovmskb %ymm0, %eax
+; KNL-NEXT: testl %eax, %eax
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
@@ -4926,12 +4845,7 @@ define i32 @movmskb(<16 x i8> %x) {
;
; KNL-LABEL: movmskb:
; KNL: # %bb.0:
-; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; KNL-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vzeroupper
+; KNL-NEXT: vpmovmskb %xmm0, %eax
; KNL-NEXT: retq
;
; SKX-LABEL: movmskb:
@@ -4975,17 +4889,7 @@ define i32 @movmskb256(<32 x i8> %x) {
;
; KNL-LABEL: movmskb256:
; KNL: # %bb.0:
-; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; KNL-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: shll $16, %eax
-; KNL-NEXT: orl %ecx, %eax
+; KNL-NEXT: vpmovmskb %ymm0, %eax
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
OpenPOWER on IntegriCloud