diff options
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 90 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-mask-op.ll | 10 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-select.ll | 7 |
3 files changed, 36 insertions, 71 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index cef834ba506..104dc2659d5 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2458,7 +2458,7 @@ multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr, HasBWI>, VEX, PS, VEX_W; } -defm KNOT : avx512_mask_unop_all<0x44, "knot", not>; +defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot>; multiclass avx512_mask_unop_int<string IntName, string InstName> { let Predicates = [HasAVX512] in @@ -2469,27 +2469,15 @@ multiclass avx512_mask_unop_int<string IntName, string InstName> { } defm : avx512_mask_unop_int<"knot", "KNOT">; -let Predicates = [HasDQI] in -def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)), (KNOTBrr VK8:$src1)>; -let Predicates = [HasAVX512] in -def : Pat<(xor VK16:$src1, (v16i1 immAllOnesV)), (KNOTWrr VK16:$src1)>; -let Predicates = [HasBWI] in -def : Pat<(xor VK32:$src1, (v32i1 immAllOnesV)), (KNOTDrr VK32:$src1)>; -let Predicates = [HasBWI] in -def : Pat<(xor VK64:$src1, (v64i1 immAllOnesV)), (KNOTQrr VK64:$src1)>; - // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit -let Predicates = [HasAVX512, NoDQI] in { -def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)), - (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src1, VK16)), VK8)>; -def : Pat<(not VK8:$src), - (COPY_TO_REGCLASS - (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>; -} -def : Pat<(xor VK4:$src1, (v4i1 immAllOnesV)), - (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src1, VK16)), VK4)>; -def : Pat<(xor VK2:$src1, (v2i1 immAllOnesV)), - (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src1, VK16)), VK2)>; +let Predicates = [HasAVX512, NoDQI] in +def : Pat<(vnot VK8:$src), + (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>; + +def : Pat<(vnot VK4:$src), + (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>; +def : Pat<(vnot VK2:$src), + (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>; // Mask binary operation // - KAND, KANDN, KOR, KXNOR, KXOR @@ -2518,13 +2506,16 @@ multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr, def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>; def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>; +// These nodes use 'vnot' instead of 'not' to support vectors. +def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>; +def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>; -defm KAND : avx512_mask_binop_all<0x41, "kand", and, 1>; -defm KOR : avx512_mask_binop_all<0x45, "kor", or, 1>; -defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", xnor, 1>; -defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, 1>; -defm KANDN : avx512_mask_binop_all<0x42, "kandn", andn, 0>; -defm KADD : avx512_mask_binop_all<0x4A, "kadd", add, 1, HasDQI>; +defm KAND : avx512_mask_binop_all<0x41, "kand", and, 1>; +defm KOR : avx512_mask_binop_all<0x45, "kor", or, 1>; +defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, 1>; +defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, 1>; +defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, 0>; +defm KADD : avx512_mask_binop_all<0x4A, "kadd", add, 1, HasDQI>; multiclass avx512_mask_binop_int<string IntName, string InstName> { let Predicates = [HasAVX512] in @@ -2541,11 +2532,12 @@ defm : avx512_mask_binop_int<"kor", "KOR">; defm : avx512_mask_binop_int<"kxnor", "KXNOR">; defm : avx512_mask_binop_int<"kxor", "KXOR">; -multiclass avx512_binop_pat<SDPatternOperator OpNode, Instruction Inst> { +multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode, + Instruction Inst> { // With AVX512F, 8-bit mask is promoted to 16-bit mask, // for the DQI set, this type is legal and KxxxB instruction is used let Predicates = [NoDQI] in - def : Pat<(OpNode VK8:$src1, VK8:$src2), + def : Pat<(VOpNode VK8:$src1, VK8:$src2), (COPY_TO_REGCLASS (Inst (COPY_TO_REGCLASS VK8:$src1, VK16), (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>; @@ -2555,47 +2547,21 @@ multiclass avx512_binop_pat<SDPatternOperator OpNode, Instruction Inst> { (COPY_TO_REGCLASS (Inst (COPY_TO_REGCLASS VK1:$src1, VK16), (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>; - def : Pat<(OpNode VK2:$src1, VK2:$src2), + def : Pat<(VOpNode VK2:$src1, VK2:$src2), (COPY_TO_REGCLASS (Inst (COPY_TO_REGCLASS VK2:$src1, VK16), (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>; - def : Pat<(OpNode VK4:$src1, VK4:$src2), + def : Pat<(VOpNode VK4:$src1, VK4:$src2), (COPY_TO_REGCLASS (Inst (COPY_TO_REGCLASS VK4:$src1, VK16), (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>; } -defm : avx512_binop_pat<and, KANDWrr>; -defm : avx512_binop_pat<andn, KANDNWrr>; -defm : avx512_binop_pat<or, KORWrr>; -defm : avx512_binop_pat<xnor, KXNORWrr>; -defm : avx512_binop_pat<xor, KXORWrr>; - -def : Pat<(xor (xor VK16:$src1, VK16:$src2), (v16i1 immAllOnesV)), - (KXNORWrr VK16:$src1, VK16:$src2)>; -def : Pat<(xor (xor VK8:$src1, VK8:$src2), (v8i1 immAllOnesV)), - (KXNORBrr VK8:$src1, VK8:$src2)>, Requires<[HasDQI]>; -def : Pat<(xor (xor VK32:$src1, VK32:$src2), (v32i1 immAllOnesV)), - (KXNORDrr VK32:$src1, VK32:$src2)>, Requires<[HasBWI]>; -def : Pat<(xor (xor VK64:$src1, VK64:$src2), (v64i1 immAllOnesV)), - (KXNORQrr VK64:$src1, VK64:$src2)>, Requires<[HasBWI]>; - -let Predicates = [NoDQI] in -def : Pat<(xor (xor VK8:$src1, VK8:$src2), (v8i1 immAllOnesV)), - (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK8:$src1, VK16), - (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>; - -def : Pat<(xor (xor VK4:$src1, VK4:$src2), (v4i1 immAllOnesV)), - (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK4:$src1, VK16), - (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>; - -def : Pat<(xor (xor VK2:$src1, VK2:$src2), (v2i1 immAllOnesV)), - (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK2:$src1, VK16), - (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>; - -def : Pat<(xor (xor VK1:$src1, VK1:$src2), (i1 1)), - (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK1:$src1, VK16), - (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>; +defm : avx512_binop_pat<and, and, KANDWrr>; +defm : avx512_binop_pat<vandn, andn, KANDNWrr>; +defm : avx512_binop_pat<or, or, KORWrr>; +defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>; +defm : avx512_binop_pat<xor, xor, KXORWrr>; // Mask unpacking multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT, diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll index 353f9ad42a6..96ae5f4633f 100644 --- a/llvm/test/CodeGen/X86/avx512-mask-op.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll @@ -257,9 +257,9 @@ define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1 ; ; SKX-LABEL: test4: ; SKX: ## BB#0: -; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 -; SKX-NEXT: knotw %k0, %k1 -; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} +; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 +; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k1 +; SKX-NEXT: kandnw %k0, %k1, %k0 ; SKX-NEXT: vpmovm2d %k0, %xmm0 ; SKX-NEXT: retq %x_gt_y = icmp sgt <4 x i64> %x, %y @@ -280,8 +280,8 @@ define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1 ; SKX-LABEL: test5: ; SKX: ## BB#0: ; SKX-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 -; SKX-NEXT: knotw %k0, %k1 -; SKX-NEXT: vpcmpgtq %xmm3, %xmm2, %k0 {%k1} +; SKX-NEXT: vpcmpgtq %xmm3, %xmm2, %k1 +; SKX-NEXT: kandnw %k1, %k0, %k0 ; SKX-NEXT: vpmovm2q %k0, %xmm0 ; SKX-NEXT: retq %x_gt_y = icmp slt <2 x i64> %x, %y diff --git a/llvm/test/CodeGen/X86/avx512-select.ll b/llvm/test/CodeGen/X86/avx512-select.ll index 4a3695fab18..42579377ef3 100644 --- a/llvm/test/CodeGen/X86/avx512-select.ll +++ b/llvm/test/CodeGen/X86/avx512-select.ll @@ -133,10 +133,9 @@ define i8 @select07(i8 %a.0, i8 %b.0, i8 %m) { ; CHECK-NEXT: kmovw %edx, %k0 ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: kmovw %esi, %k2 -; CHECK-NEXT: kandw %k0, %k1, %k1 -; CHECK-NEXT: knotw %k0, %k0 -; CHECK-NEXT: kandw %k0, %k2, %k0 -; CHECK-NEXT: korw %k0, %k1, %k0 +; CHECK-NEXT: kandnw %k2, %k0, %k2 +; CHECK-NEXT: kandw %k0, %k1, %k0 +; CHECK-NEXT: korw %k2, %k0, %k0 ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: retq %mask = bitcast i8 %m to <8 x i1> |

