summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-02-08 07:54:16 +0000
committerCraig Topper <craig.topper@intel.com>2018-02-08 07:54:16 +0000
commit93505707b6d3ec117e555c5a48adc2cc56470e38 (patch)
treecba43d0312a42a16177ecf0b1cdd7d43c042adba
parentf5465f98d2f210c20c94be9fe4edb9e60243ab80 (diff)
downloadbcm5719-llvm-93505707b6d3ec117e555c5a48adc2cc56470e38.tar.gz
bcm5719-llvm-93505707b6d3ec117e555c5a48adc2cc56470e38.zip
[X86] Allow KORTEST instruction to be used for testing if a mask is all ones
The KTEST instruction sets the C flag if the result of anding both operands together is all 1s. We can use this to lower (icmp eq/ne (bitcast (vXi1 X), -1) Differential Revision: https://reviews.llvm.org/D42772 llvm-svn: 324577
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp10
-rw-r--r--llvm/test/CodeGen/X86/avx512-mask-op.ll41
-rwxr-xr-xllvm/test/CodeGen/X86/avx512-schedule.ll12
-rw-r--r--llvm/test/CodeGen/X86/setcc-lowering.ll3
4 files changed, 49 insertions, 17 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6cc185be1c8..500a1d3ab03 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -18135,18 +18135,22 @@ static SDValue EmitKTEST(SDValue Op0, SDValue Op1, ISD::CondCode CC,
Op0 = Op0.getOperand(0);
MVT VT = Op0.getSimpleValueType();
- if (!(Subtarget.hasDQI() && (VT == MVT::v8i1 || VT == MVT::v16i1)) &&
+ if (!(Subtarget.hasAVX512() && VT == MVT::v16i1) &&
+ !(Subtarget.hasDQI() && VT == MVT::v8i1) &&
!(Subtarget.hasBWI() && (VT == MVT::v32i1 || VT == MVT::v64i1)))
return SDValue();
X86::CondCode X86CC;
if (isNullConstant(Op1)) {
X86CC = CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE;
+ } else if (isAllOnesConstant(Op1)) {
+ // C flag is set for all ones.
+ X86CC = CC == ISD::SETEQ ? X86::COND_B : X86::COND_AE;
} else
return SDValue();
- SDValue KTEST = DAG.getNode(X86ISD::KTEST, dl, MVT::i32, Op0, Op0);
- return getSETCC(X86CC, KTEST, dl, DAG);
+ SDValue KORTEST = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, Op0, Op0);
+ return getSETCC(X86CC, KORTEST, dl, DAG);
}
SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index 6c2dfcb5cb7..4766c8b1560 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -584,7 +584,7 @@ define void @test7(<8 x i1> %mask) {
; SKX-NEXT: movb $85, %al
; SKX-NEXT: kmovd %eax, %k1
; SKX-NEXT: korb %k1, %k0, %k0
-; SKX-NEXT: ktestb %k0, %k0
+; SKX-NEXT: kortestb %k0, %k0
; SKX-NEXT: retq
;
; AVX512BW-LABEL: test7:
@@ -607,7 +607,7 @@ define void @test7(<8 x i1> %mask) {
; AVX512DQ-NEXT: movb $85, %al
; AVX512DQ-NEXT: kmovw %eax, %k1
; AVX512DQ-NEXT: korb %k1, %k0, %k0
-; AVX512DQ-NEXT: ktestb %k0, %k0
+; AVX512DQ-NEXT: kortestb %k0, %k0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
allocas:
@@ -1673,7 +1673,7 @@ define void @ktest_1(<8 x double> %in, double * %base) {
; SKX-NEXT: vcmpltpd %zmm0, %zmm1, %k1
; SKX-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
; SKX-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
-; SKX-NEXT: ktestb %k0, %k0
+; SKX-NEXT: kortestb %k0, %k0
; SKX-NEXT: je LBB42_2
; SKX-NEXT: ## %bb.1: ## %L1
; SKX-NEXT: vmovapd %zmm0, (%rdi)
@@ -1708,7 +1708,7 @@ define void @ktest_1(<8 x double> %in, double * %base) {
; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1
; AVX512DQ-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
; AVX512DQ-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
-; AVX512DQ-NEXT: ktestb %k0, %k0
+; AVX512DQ-NEXT: kortestb %k0, %k0
; AVX512DQ-NEXT: je LBB42_2
; AVX512DQ-NEXT: ## %bb.1: ## %L1
; AVX512DQ-NEXT: vmovapd %zmm0, (%rdi)
@@ -1788,7 +1788,7 @@ define void @ktest_2(<32 x float> %in, float * %base) {
; SKX-NEXT: vcmpltps %zmm2, %zmm1, %k2
; SKX-NEXT: kunpckwd %k1, %k2, %k1
; SKX-NEXT: kord %k1, %k0, %k0
-; SKX-NEXT: ktestd %k0, %k0
+; SKX-NEXT: kortestd %k0, %k0
; SKX-NEXT: je LBB43_2
; SKX-NEXT: ## %bb.1: ## %L1
; SKX-NEXT: vmovaps %zmm0, (%rdi)
@@ -1814,7 +1814,7 @@ define void @ktest_2(<32 x float> %in, float * %base) {
; AVX512BW-NEXT: vcmpltps %zmm2, %zmm1, %k2
; AVX512BW-NEXT: kunpckwd %k1, %k2, %k1
; AVX512BW-NEXT: kord %k1, %k0, %k0
-; AVX512BW-NEXT: ktestd %k0, %k0
+; AVX512BW-NEXT: kortestd %k0, %k0
; AVX512BW-NEXT: je LBB43_2
; AVX512BW-NEXT: ## %bb.1: ## %L1
; AVX512BW-NEXT: vmovaps %zmm0, (%rdi)
@@ -2786,3 +2786,32 @@ bb.2:
}
declare void @foo()
+; Make sure we can use the C flag from kortest to check for all ones.
+define void @ktest_allones(<16 x i32> %x, <16 x i32> %y) {
+; CHECK-LABEL: ktest_allones:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k0
+; CHECK-NEXT: kortestw %k0, %k0
+; CHECK-NEXT: jb LBB65_2
+; CHECK-NEXT: ## %bb.1: ## %bb.1
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq _foo
+; CHECK-NEXT: LBB65_2: ## %bb.2
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %a = icmp eq <16 x i32> %x, zeroinitializer
+ %b = icmp eq <16 x i32> %y, zeroinitializer
+ %c = and <16 x i1> %a, %b
+ %d = bitcast <16 x i1> %c to i16
+ %e = icmp eq i16 %d, -1
+ br i1 %e, label %bb.2, label %bb.1
+bb.1:
+ call void @foo()
+ br label %bb.2
+bb.2:
+ ret void
+}
diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll
index 1f4e4c9062d..69d0a62f281 100755
--- a/llvm/test/CodeGen/X86/avx512-schedule.ll
+++ b/llvm/test/CodeGen/X86/avx512-schedule.ll
@@ -7031,7 +7031,7 @@ define void @vcmp_test7(<8 x i1> %mask) {
; GENERIC-NEXT: movb $85, %al # sched: [1:0.33]
; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33]
; GENERIC-NEXT: korb %k1, %k0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT: ktestb %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kortestb %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vcmp_test7:
@@ -7041,7 +7041,7 @@ define void @vcmp_test7(<8 x i1> %mask) {
; SKX-NEXT: movb $85, %al # sched: [1:0.25]
; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00]
; SKX-NEXT: korb %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: ktestb %k0, %k0 # sched: [3:1.00]
+; SKX-NEXT: kortestb %k0, %k0 # sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
allocas:
%a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
@@ -7615,7 +7615,7 @@ define void @ktest_1(<8 x double> %in, double * %base) {
; GENERIC-NEXT: vcmpltpd %zmm0, %zmm1, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} # sched: [4:0.50]
; GENERIC-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} # sched: [3:1.00]
-; GENERIC-NEXT: ktestb %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kortestb %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: je .LBB410_2 # sched: [1:1.00]
; GENERIC-NEXT: # %bb.1: # %L1
; GENERIC-NEXT: vmovapd %zmm0, (%rdi) # sched: [1:1.00]
@@ -7632,7 +7632,7 @@ define void @ktest_1(<8 x double> %in, double * %base) {
; SKX-NEXT: vcmpltpd %zmm0, %zmm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} # sched: [8:0.50]
; SKX-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} # sched: [3:1.00]
-; SKX-NEXT: ktestb %k0, %k0 # sched: [3:1.00]
+; SKX-NEXT: kortestb %k0, %k0 # sched: [3:1.00]
; SKX-NEXT: je .LBB410_2 # sched: [1:0.50]
; SKX-NEXT: # %bb.1: # %L1
; SKX-NEXT: vmovapd %zmm0, (%rdi) # sched: [1:1.00]
@@ -7684,7 +7684,7 @@ define void @ktest_2(<32 x float> %in, float * %base) {
; GENERIC-NEXT: vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00]
; GENERIC-NEXT: kunpckwd %k1, %k2, %k1 # sched: [1:1.00]
; GENERIC-NEXT: kord %k1, %k0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT: ktestd %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kortestd %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: je .LBB411_2 # sched: [1:1.00]
; GENERIC-NEXT: # %bb.1: # %L1
; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00]
@@ -7710,7 +7710,7 @@ define void @ktest_2(<32 x float> %in, float * %base) {
; SKX-NEXT: vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00]
; SKX-NEXT: kunpckwd %k1, %k2, %k1 # sched: [3:1.00]
; SKX-NEXT: kord %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: ktestd %k0, %k0 # sched: [3:1.00]
+; SKX-NEXT: kortestd %k0, %k0 # sched: [3:1.00]
; SKX-NEXT: je .LBB411_2 # sched: [1:0.50]
; SKX-NEXT: # %bb.1: # %L1
; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00]
diff --git a/llvm/test/CodeGen/X86/setcc-lowering.ll b/llvm/test/CodeGen/X86/setcc-lowering.ll
index 3ba2d8cb5ee..a9f5b68a232 100644
--- a/llvm/test/CodeGen/X86/setcc-lowering.ll
+++ b/llvm/test/CodeGen/X86/setcc-lowering.ll
@@ -84,8 +84,7 @@ define void @pr26232(i64 %a, <16 x i1> %b) {
; KNL-32-NEXT: cmovlw %dx, %si
; KNL-32-NEXT: kmovw %esi, %k1
; KNL-32-NEXT: kandw %k0, %k1, %k1
-; KNL-32-NEXT: kmovw %k1, %esi
-; KNL-32-NEXT: testw %si, %si
+; KNL-32-NEXT: kortestw %k1, %k1
; KNL-32-NEXT: jne .LBB1_1
; KNL-32-NEXT: # %bb.2: # %for_exit600
; KNL-32-NEXT: popl %esi
OpenPOWER on IntegriCloud