summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-02-08 08:29:43 +0000
committerCraig Topper <craig.topper@intel.com>2018-02-08 08:29:43 +0000
commit8d0c8c9be158d2c83864c3124f258f8790476602 (patch)
tree7b8e62bbb3bbfabd9d20595076b7ad8821460b9d
parent93505707b6d3ec117e555c5a48adc2cc56470e38 (diff)
downloadbcm5719-llvm-8d0c8c9be158d2c83864c3124f258f8790476602.tar.gz
bcm5719-llvm-8d0c8c9be158d2c83864c3124f258f8790476602.zip
[X86] Support folding in a k-register OR when creating KORTEST from scalar compare of a bitcast from vXi1.
This should allow us to remove the kortest intrinsic from IR and use compare+bitcast+or in IR instead. llvm-svn: 324580
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp10
-rw-r--r--llvm/test/CodeGen/X86/avx512-mask-op.ll12
-rwxr-xr-xllvm/test/CodeGen/X86/avx512-schedule.ll14
3 files changed, 18 insertions, 18 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 500a1d3ab03..5aaf2417e1b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -18149,7 +18149,15 @@ static SDValue EmitKTEST(SDValue Op0, SDValue Op1, ISD::CondCode CC,
} else
return SDValue();
- SDValue KORTEST = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, Op0, Op0);
+ // If the input is an OR, we can combine it's operands into the KORTEST.
+ SDValue LHS = Op0;
+ SDValue RHS = Op0;
+ if (Op0.getOpcode() == ISD::OR && Op0.hasOneUse( && Op0.hasOneUse())) {
+ LHS = Op0.getOperand(0);
+ RHS = Op0.getOperand(1);
+ }
+
+ SDValue KORTEST = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS);
return getSETCC(X86CC, KORTEST, dl, DAG);
}
diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index 4766c8b1560..6953cd9ed47 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -583,8 +583,7 @@ define void @test7(<8 x i1> %mask) {
; SKX-NEXT: vpmovw2m %xmm0, %k0
; SKX-NEXT: movb $85, %al
; SKX-NEXT: kmovd %eax, %k1
-; SKX-NEXT: korb %k1, %k0, %k0
-; SKX-NEXT: kortestb %k0, %k0
+; SKX-NEXT: kortestb %k1, %k0
; SKX-NEXT: retq
;
; AVX512BW-LABEL: test7:
@@ -606,8 +605,7 @@ define void @test7(<8 x i1> %mask) {
; AVX512DQ-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512DQ-NEXT: movb $85, %al
; AVX512DQ-NEXT: kmovw %eax, %k1
-; AVX512DQ-NEXT: korb %k1, %k0, %k0
-; AVX512DQ-NEXT: kortestb %k0, %k0
+; AVX512DQ-NEXT: kortestb %k1, %k0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
allocas:
@@ -1787,8 +1785,7 @@ define void @ktest_2(<32 x float> %in, float * %base) {
; SKX-NEXT: vcmpltps %zmm3, %zmm0, %k1
; SKX-NEXT: vcmpltps %zmm2, %zmm1, %k2
; SKX-NEXT: kunpckwd %k1, %k2, %k1
-; SKX-NEXT: kord %k1, %k0, %k0
-; SKX-NEXT: kortestd %k0, %k0
+; SKX-NEXT: kortestd %k1, %k0
; SKX-NEXT: je LBB43_2
; SKX-NEXT: ## %bb.1: ## %L1
; SKX-NEXT: vmovaps %zmm0, (%rdi)
@@ -1813,8 +1810,7 @@ define void @ktest_2(<32 x float> %in, float * %base) {
; AVX512BW-NEXT: vcmpltps %zmm3, %zmm0, %k1
; AVX512BW-NEXT: vcmpltps %zmm2, %zmm1, %k2
; AVX512BW-NEXT: kunpckwd %k1, %k2, %k1
-; AVX512BW-NEXT: kord %k1, %k0, %k0
-; AVX512BW-NEXT: kortestd %k0, %k0
+; AVX512BW-NEXT: kortestd %k1, %k0
; AVX512BW-NEXT: je LBB43_2
; AVX512BW-NEXT: ## %bb.1: ## %L1
; AVX512BW-NEXT: vmovaps %zmm0, (%rdi)
diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll
index 69d0a62f281..1055570de07 100755
--- a/llvm/test/CodeGen/X86/avx512-schedule.ll
+++ b/llvm/test/CodeGen/X86/avx512-schedule.ll
@@ -7030,8 +7030,7 @@ define void @vcmp_test7(<8 x i1> %mask) {
; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33]
; GENERIC-NEXT: movb $85, %al # sched: [1:0.33]
; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: korb %k1, %k0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT: kortestb %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kortestb %k1, %k0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vcmp_test7:
@@ -7040,8 +7039,7 @@ define void @vcmp_test7(<8 x i1> %mask) {
; SKX-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00]
; SKX-NEXT: movb $85, %al # sched: [1:0.25]
; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00]
-; SKX-NEXT: korb %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kortestb %k0, %k0 # sched: [3:1.00]
+; SKX-NEXT: kortestb %k1, %k0 # sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
allocas:
%a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
@@ -7683,8 +7681,7 @@ define void @ktest_2(<32 x float> %in, float * %base) {
; GENERIC-NEXT: vcmpltps %zmm3, %zmm0, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00]
; GENERIC-NEXT: kunpckwd %k1, %k2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: kord %k1, %k0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT: kortestd %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kortestd %k1, %k0 # sched: [1:1.00]
; GENERIC-NEXT: je .LBB411_2 # sched: [1:1.00]
; GENERIC-NEXT: # %bb.1: # %L1
; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00]
@@ -7703,14 +7700,13 @@ define void @ktest_2(<32 x float> %in, float * %base) {
; SKX-NEXT: vmovups 64(%rdi), %zmm3 # sched: [8:0.50]
; SKX-NEXT: vcmpltps %zmm0, %zmm2, %k1 # sched: [3:1.00]
; SKX-NEXT: vcmpltps %zmm1, %zmm3, %k2 # sched: [3:1.00]
+; SKX-NEXT: kunpckwd %k1, %k2, %k0 # sched: [3:1.00]
; SKX-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} # sched: [8:0.50]
; SKX-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} # sched: [8:0.50]
-; SKX-NEXT: kunpckwd %k1, %k2, %k0 # sched: [3:1.00]
; SKX-NEXT: vcmpltps %zmm3, %zmm0, %k1 # sched: [3:1.00]
; SKX-NEXT: vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00]
; SKX-NEXT: kunpckwd %k1, %k2, %k1 # sched: [3:1.00]
-; SKX-NEXT: kord %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kortestd %k0, %k0 # sched: [3:1.00]
+; SKX-NEXT: kortestd %k1, %k0 # sched: [3:1.00]
; SKX-NEXT: je .LBB411_2 # sched: [1:0.50]
; SKX-NEXT: # %bb.1: # %L1
; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00]
OpenPOWER on IntegriCloud