summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2019-01-05 22:42:58 +0000
committerCraig Topper <craig.topper@intel.com>2019-01-05 22:42:58 +0000
commitd0ba531a0cdb6e6d35570e9d398e34519816372f (patch)
tree2d62aa6ea132f90cdc689c8a175c651e0b0423d3
parent46f8b4a11e2c936568d6ba36f5186e8ddd96b2f2 (diff)
downloadbcm5719-llvm-d0ba531a0cdb6e6d35570e9d398e34519816372f.tar.gz
bcm5719-llvm-d0ba531a0cdb6e6d35570e9d398e34519816372f.zip
[X86] Use two pmovmskbs in combineBitcastvxi1 for (i64 (bitcast (v64i1 (truncate (v64i8)))) on KNL.
llvm-svn: 350481
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp23
-rw-r--r--llvm/test/CodeGen/X86/bitcast-setcc-512.ll22
-rw-r--r--llvm/test/CodeGen/X86/movmsk-cmp.ll184
3 files changed, 59 insertions, 170 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 67f2929dae7..f643482884e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -32743,7 +32743,8 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
// vpcmpeqb/vpcmpgtb.
bool IsTruncated = N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
(N0.getOperand(0).getValueType() == MVT::v16i8 ||
- N0.getOperand(0).getValueType() == MVT::v32i8);
+ N0.getOperand(0).getValueType() == MVT::v32i8 ||
+ N0.getOperand(0).getValueType() == MVT::v64i8);
// With AVX512 vxi1 types are legal and we prefer using k-regs.
// MOVMSK is supported in SSE2 or later.
@@ -32799,12 +32800,30 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
case MVT::v32i1:
SExtVT = MVT::v32i8;
break;
+ case MVT::v64i1:
+ // If we have AVX512F, but not AVX512BW and the input is truncated from
+ // v64i8 checked earlier. Then split the input and make two pmovmskbs.
+ if (Subtarget.hasAVX512() && !Subtarget.hasBWI()) {
+ SExtVT = MVT::v64i8;
+ break;
+ }
+ return SDValue();
};
SDLoc DL(BitCast);
SDValue V = DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, N0);
- if (SExtVT == MVT::v16i8 || SExtVT == MVT::v32i8) {
+ if (SExtVT == MVT::v64i8) {
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitVector(V, DL);
+ Lo = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Lo);
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Lo);
+ Hi = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Hi);
+ Hi = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Hi);
+ Hi = DAG.getNode(ISD::SHL, DL, MVT::i64, Hi,
+ DAG.getConstant(32, DL, MVT::i8));
+ V = DAG.getNode(ISD::OR, DL, MVT::i64, Lo, Hi);
+ } else if (SExtVT == MVT::v16i8 || SExtVT == MVT::v32i8) {
V = getPMOVMSKB(DL, V, DAG, Subtarget);
} else {
if (SExtVT == MVT::v8i16)
diff --git a/llvm/test/CodeGen/X86/bitcast-setcc-512.ll b/llvm/test/CodeGen/X86/bitcast-setcc-512.ll
index 1911aed2973..340c7ab3afd 100644
--- a/llvm/test/CodeGen/X86/bitcast-setcc-512.ll
+++ b/llvm/test/CodeGen/X86/bitcast-setcc-512.ll
@@ -256,26 +256,10 @@ define i64 @v64i8(<64 x i8> %a, <64 x i8> %b) {
;
; AVX512F-LABEL: v64i8:
; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0
-; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm2
-; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512F-NEXT: kmovw %k0, %ecx
-; AVX512F-NEXT: shll $16, %ecx
-; AVX512F-NEXT: orl %eax, %ecx
-; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm0
-; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm1
-; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: kmovw %k0, %edx
-; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: shll $16, %eax
-; AVX512F-NEXT: orl %edx, %eax
+; AVX512F-NEXT: vpmovmskb %ymm0, %ecx
+; AVX512F-NEXT: vpmovmskb %ymm1, %eax
; AVX512F-NEXT: shlq $32, %rax
; AVX512F-NEXT: orq %rcx, %rax
; AVX512F-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll
index 93d86b0e11d..bc16d8c710e 100644
--- a/llvm/test/CodeGen/X86/movmsk-cmp.ll
+++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll
@@ -235,30 +235,11 @@ define i1 @allones_v64i8_sign(<64 x i8> %arg) {
;
; KNL-LABEL: allones_v64i8_sign:
; KNL: # %bb.0:
-; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm3
-; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: shll $16, %ecx
-; KNL-NEXT: orl %eax, %ecx
-; KNL-NEXT: vpcmpgtb %ymm1, %ymm2, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %edx
-; KNL-NEXT: shll $16, %edx
-; KNL-NEXT: orl %eax, %edx
-; KNL-NEXT: shlq $32, %rdx
-; KNL-NEXT: orq %rcx, %rdx
-; KNL-NEXT: cmpq $-1, %rdx
+; KNL-NEXT: vpmovmskb %ymm1, %eax
+; KNL-NEXT: shlq $32, %rax
+; KNL-NEXT: vpmovmskb %ymm0, %ecx
+; KNL-NEXT: orq %rax, %rcx
+; KNL-NEXT: cmpq $-1, %rcx
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
@@ -327,29 +308,10 @@ define i1 @allzeros_v64i8_sign(<64 x i8> %arg) {
;
; KNL-LABEL: allzeros_v64i8_sign:
; KNL: # %bb.0:
-; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm3
-; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: shll $16, %ecx
-; KNL-NEXT: orl %eax, %ecx
-; KNL-NEXT: vpcmpgtb %ymm1, %ymm2, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %edx
-; KNL-NEXT: shll $16, %edx
-; KNL-NEXT: orl %eax, %edx
-; KNL-NEXT: shlq $32, %rdx
-; KNL-NEXT: orq %rcx, %rdx
+; KNL-NEXT: vpmovmskb %ymm1, %eax
+; KNL-NEXT: shlq $32, %rax
+; KNL-NEXT: vpmovmskb %ymm0, %ecx
+; KNL-NEXT: orq %rax, %rcx
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
@@ -1662,32 +1624,13 @@ define i1 @allones_v64i8_and1(<64 x i8> %arg) {
;
; KNL-LABEL: allones_v64i8_and1:
; KNL: # %bb.0:
-; KNL-NEXT: vmovdqa {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1
-; KNL-NEXT: vpand %ymm2, %ymm0, %ymm0
-; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm3
-; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: shll $16, %ecx
-; KNL-NEXT: orl %eax, %ecx
-; KNL-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %edx
-; KNL-NEXT: shll $16, %edx
-; KNL-NEXT: orl %eax, %edx
-; KNL-NEXT: shlq $32, %rdx
-; KNL-NEXT: orq %rcx, %rdx
-; KNL-NEXT: cmpq $-1, %rdx
+; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
+; KNL-NEXT: vpsllw $7, %ymm1, %ymm1
+; KNL-NEXT: vpmovmskb %ymm1, %eax
+; KNL-NEXT: shlq $32, %rax
+; KNL-NEXT: vpmovmskb %ymm0, %ecx
+; KNL-NEXT: orq %rax, %rcx
+; KNL-NEXT: cmpq $-1, %rcx
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
@@ -1771,31 +1714,12 @@ define i1 @allzeros_v64i8_and1(<64 x i8> %arg) {
;
; KNL-LABEL: allzeros_v64i8_and1:
; KNL: # %bb.0:
-; KNL-NEXT: vmovdqa {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1
-; KNL-NEXT: vpand %ymm2, %ymm0, %ymm0
-; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm3
-; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: shll $16, %ecx
-; KNL-NEXT: orl %eax, %ecx
-; KNL-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %edx
-; KNL-NEXT: shll $16, %edx
-; KNL-NEXT: orl %eax, %edx
-; KNL-NEXT: shlq $32, %rdx
-; KNL-NEXT: orq %rcx, %rdx
+; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
+; KNL-NEXT: vpsllw $7, %ymm1, %ymm1
+; KNL-NEXT: vpmovmskb %ymm1, %eax
+; KNL-NEXT: shlq $32, %rax
+; KNL-NEXT: vpmovmskb %ymm0, %ecx
+; KNL-NEXT: orq %rax, %rcx
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
@@ -3299,32 +3223,13 @@ define i1 @allones_v64i8_and4(<64 x i8> %arg) {
;
; KNL-LABEL: allones_v64i8_and4:
; KNL: # %bb.0:
-; KNL-NEXT: vmovdqa {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1
-; KNL-NEXT: vpand %ymm2, %ymm0, %ymm0
-; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm3
-; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: shll $16, %ecx
-; KNL-NEXT: orl %eax, %ecx
-; KNL-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %edx
-; KNL-NEXT: shll $16, %edx
-; KNL-NEXT: orl %eax, %edx
-; KNL-NEXT: shlq $32, %rdx
-; KNL-NEXT: orq %rcx, %rdx
-; KNL-NEXT: cmpq $-1, %rdx
+; KNL-NEXT: vpsllw $5, %ymm0, %ymm0
+; KNL-NEXT: vpsllw $5, %ymm1, %ymm1
+; KNL-NEXT: vpmovmskb %ymm1, %eax
+; KNL-NEXT: shlq $32, %rax
+; KNL-NEXT: vpmovmskb %ymm0, %ecx
+; KNL-NEXT: orq %rax, %rcx
+; KNL-NEXT: cmpq $-1, %rcx
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
@@ -3408,31 +3313,12 @@ define i1 @allzeros_v64i8_and4(<64 x i8> %arg) {
;
; KNL-LABEL: allzeros_v64i8_and4:
; KNL: # %bb.0:
-; KNL-NEXT: vmovdqa {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1
-; KNL-NEXT: vpand %ymm2, %ymm0, %ymm0
-; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm3
-; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: shll $16, %ecx
-; KNL-NEXT: orl %eax, %ecx
-; KNL-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %edx
-; KNL-NEXT: shll $16, %edx
-; KNL-NEXT: orl %eax, %edx
-; KNL-NEXT: shlq $32, %rdx
-; KNL-NEXT: orq %rcx, %rdx
+; KNL-NEXT: vpsllw $5, %ymm0, %ymm0
+; KNL-NEXT: vpsllw $5, %ymm1, %ymm1
+; KNL-NEXT: vpmovmskb %ymm1, %eax
+; KNL-NEXT: shlq $32, %rax
+; KNL-NEXT: vpmovmskb %ymm0, %ecx
+; KNL-NEXT: orq %rax, %rcx
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
OpenPOWER on IntegriCloud