summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td90
-rw-r--r--llvm/test/CodeGen/X86/avx512-mask-op.ll10
-rw-r--r--llvm/test/CodeGen/X86/avx512-select.ll7
3 files changed, 36 insertions, 71 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index cef834ba506..104dc2659d5 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -2458,7 +2458,7 @@ multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
HasBWI>, VEX, PS, VEX_W;
}
-defm KNOT : avx512_mask_unop_all<0x44, "knot", not>;
+defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot>;
multiclass avx512_mask_unop_int<string IntName, string InstName> {
let Predicates = [HasAVX512] in
@@ -2469,27 +2469,15 @@ multiclass avx512_mask_unop_int<string IntName, string InstName> {
}
defm : avx512_mask_unop_int<"knot", "KNOT">;
-let Predicates = [HasDQI] in
-def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)), (KNOTBrr VK8:$src1)>;
-let Predicates = [HasAVX512] in
-def : Pat<(xor VK16:$src1, (v16i1 immAllOnesV)), (KNOTWrr VK16:$src1)>;
-let Predicates = [HasBWI] in
-def : Pat<(xor VK32:$src1, (v32i1 immAllOnesV)), (KNOTDrr VK32:$src1)>;
-let Predicates = [HasBWI] in
-def : Pat<(xor VK64:$src1, (v64i1 immAllOnesV)), (KNOTQrr VK64:$src1)>;
-
// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
-let Predicates = [HasAVX512, NoDQI] in {
-def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)),
- (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src1, VK16)), VK8)>;
-def : Pat<(not VK8:$src),
- (COPY_TO_REGCLASS
- (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
-}
-def : Pat<(xor VK4:$src1, (v4i1 immAllOnesV)),
- (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src1, VK16)), VK4)>;
-def : Pat<(xor VK2:$src1, (v2i1 immAllOnesV)),
- (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src1, VK16)), VK2)>;
+let Predicates = [HasAVX512, NoDQI] in
+def : Pat<(vnot VK8:$src),
+ (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
+
+def : Pat<(vnot VK4:$src),
+ (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
+def : Pat<(vnot VK2:$src),
+ (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
// Mask binary operation
// - KAND, KANDN, KOR, KXNOR, KXOR
@@ -2518,13 +2506,16 @@ multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
+// These nodes use 'vnot' instead of 'not' to support vectors.
+def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
+def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
-defm KAND : avx512_mask_binop_all<0x41, "kand", and, 1>;
-defm KOR : avx512_mask_binop_all<0x45, "kor", or, 1>;
-defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", xnor, 1>;
-defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, 1>;
-defm KANDN : avx512_mask_binop_all<0x42, "kandn", andn, 0>;
-defm KADD : avx512_mask_binop_all<0x4A, "kadd", add, 1, HasDQI>;
+defm KAND : avx512_mask_binop_all<0x41, "kand", and, 1>;
+defm KOR : avx512_mask_binop_all<0x45, "kor", or, 1>;
+defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, 1>;
+defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, 1>;
+defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, 0>;
+defm KADD : avx512_mask_binop_all<0x4A, "kadd", add, 1, HasDQI>;
multiclass avx512_mask_binop_int<string IntName, string InstName> {
let Predicates = [HasAVX512] in
@@ -2541,11 +2532,12 @@ defm : avx512_mask_binop_int<"kor", "KOR">;
defm : avx512_mask_binop_int<"kxnor", "KXNOR">;
defm : avx512_mask_binop_int<"kxor", "KXOR">;
-multiclass avx512_binop_pat<SDPatternOperator OpNode, Instruction Inst> {
+multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
+ Instruction Inst> {
// With AVX512F, 8-bit mask is promoted to 16-bit mask,
// for the DQI set, this type is legal and KxxxB instruction is used
let Predicates = [NoDQI] in
- def : Pat<(OpNode VK8:$src1, VK8:$src2),
+ def : Pat<(VOpNode VK8:$src1, VK8:$src2),
(COPY_TO_REGCLASS
(Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
(COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
@@ -2555,47 +2547,21 @@ multiclass avx512_binop_pat<SDPatternOperator OpNode, Instruction Inst> {
(COPY_TO_REGCLASS (Inst
(COPY_TO_REGCLASS VK1:$src1, VK16),
(COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
- def : Pat<(OpNode VK2:$src1, VK2:$src2),
+ def : Pat<(VOpNode VK2:$src1, VK2:$src2),
(COPY_TO_REGCLASS (Inst
(COPY_TO_REGCLASS VK2:$src1, VK16),
(COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
- def : Pat<(OpNode VK4:$src1, VK4:$src2),
+ def : Pat<(VOpNode VK4:$src1, VK4:$src2),
(COPY_TO_REGCLASS (Inst
(COPY_TO_REGCLASS VK4:$src1, VK16),
(COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
}
-defm : avx512_binop_pat<and, KANDWrr>;
-defm : avx512_binop_pat<andn, KANDNWrr>;
-defm : avx512_binop_pat<or, KORWrr>;
-defm : avx512_binop_pat<xnor, KXNORWrr>;
-defm : avx512_binop_pat<xor, KXORWrr>;
-
-def : Pat<(xor (xor VK16:$src1, VK16:$src2), (v16i1 immAllOnesV)),
- (KXNORWrr VK16:$src1, VK16:$src2)>;
-def : Pat<(xor (xor VK8:$src1, VK8:$src2), (v8i1 immAllOnesV)),
- (KXNORBrr VK8:$src1, VK8:$src2)>, Requires<[HasDQI]>;
-def : Pat<(xor (xor VK32:$src1, VK32:$src2), (v32i1 immAllOnesV)),
- (KXNORDrr VK32:$src1, VK32:$src2)>, Requires<[HasBWI]>;
-def : Pat<(xor (xor VK64:$src1, VK64:$src2), (v64i1 immAllOnesV)),
- (KXNORQrr VK64:$src1, VK64:$src2)>, Requires<[HasBWI]>;
-
-let Predicates = [NoDQI] in
-def : Pat<(xor (xor VK8:$src1, VK8:$src2), (v8i1 immAllOnesV)),
- (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK8:$src1, VK16),
- (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
-
-def : Pat<(xor (xor VK4:$src1, VK4:$src2), (v4i1 immAllOnesV)),
- (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK4:$src1, VK16),
- (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>;
-
-def : Pat<(xor (xor VK2:$src1, VK2:$src2), (v2i1 immAllOnesV)),
- (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK2:$src1, VK16),
- (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>;
-
-def : Pat<(xor (xor VK1:$src1, VK1:$src2), (i1 1)),
- (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
- (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
+defm : avx512_binop_pat<and, and, KANDWrr>;
+defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
+defm : avx512_binop_pat<or, or, KORWrr>;
+defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
+defm : avx512_binop_pat<xor, xor, KXORWrr>;
// Mask unpacking
multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT,
diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index 353f9ad42a6..96ae5f4633f 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -257,9 +257,9 @@ define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1
;
; SKX-LABEL: test4:
; SKX: ## BB#0:
-; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k0
-; SKX-NEXT: knotw %k0, %k1
-; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
+; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
+; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k1
+; SKX-NEXT: kandnw %k0, %k1, %k0
; SKX-NEXT: vpmovm2d %k0, %xmm0
; SKX-NEXT: retq
%x_gt_y = icmp sgt <4 x i64> %x, %y
@@ -280,8 +280,8 @@ define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1
; SKX-LABEL: test5:
; SKX: ## BB#0:
; SKX-NEXT: vpcmpgtq %xmm0, %xmm1, %k0
-; SKX-NEXT: knotw %k0, %k1
-; SKX-NEXT: vpcmpgtq %xmm3, %xmm2, %k0 {%k1}
+; SKX-NEXT: vpcmpgtq %xmm3, %xmm2, %k1
+; SKX-NEXT: kandnw %k1, %k0, %k0
; SKX-NEXT: vpmovm2q %k0, %xmm0
; SKX-NEXT: retq
%x_gt_y = icmp slt <2 x i64> %x, %y
diff --git a/llvm/test/CodeGen/X86/avx512-select.ll b/llvm/test/CodeGen/X86/avx512-select.ll
index 4a3695fab18..42579377ef3 100644
--- a/llvm/test/CodeGen/X86/avx512-select.ll
+++ b/llvm/test/CodeGen/X86/avx512-select.ll
@@ -133,10 +133,9 @@ define i8 @select07(i8 %a.0, i8 %b.0, i8 %m) {
; CHECK-NEXT: kmovw %edx, %k0
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: kmovw %esi, %k2
-; CHECK-NEXT: kandw %k0, %k1, %k1
-; CHECK-NEXT: knotw %k0, %k0
-; CHECK-NEXT: kandw %k0, %k2, %k0
-; CHECK-NEXT: korw %k0, %k1, %k0
+; CHECK-NEXT: kandnw %k2, %k0, %k2
+; CHECK-NEXT: kandw %k0, %k1, %k0
+; CHECK-NEXT: korw %k2, %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%mask = bitcast i8 %m to <8 x i1>
OpenPOWER on IntegriCloud