summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86InstrAVX512.td
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86InstrAVX512.td')
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td309
1 files changed, 197 insertions, 112 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 2aee9e0977d..80eca2487c6 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -2160,7 +2160,7 @@ multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
(outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
- (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
+ (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
let isCommutable = IsCommutable in
def rrk : AVX512BI<opc, MRMSrcReg,
@@ -2240,11 +2240,15 @@ multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
// This fragment treats X86cmpm as commutable to help match loads in both
// operands for PCMPEQ.
+def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
def X86pcmpeqm_c : PatFrag<(ops node:$src1, node:$src2),
- (X86cmpm_c node:$src1, node:$src2, (i8 0))>;
+ (X86setcc_commute node:$src1, node:$src2, SETEQ)>;
def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
- (X86cmpm node:$src1, node:$src2, (i8 6))>;
+ (setcc node:$src1, node:$src2, SETGT)>;
+// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
+// increase the pattern complexity the way an immediate would.
+let AddedComplexity = 2 in {
// FIXME: Is there a better scheduler class for VPCMP?
defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm_c,
SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
@@ -2277,33 +2281,29 @@ defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
+}
-// Transforms to swizzle an immediate to help matching memory operand in first
-// operand.
-def CommutePCMPCC : SDNodeXForm<imm, [{
- uint8_t Imm = N->getZExtValue() & 0x7;
- Imm = X86::getSwappedVPCMPImm(Imm);
- return getI8Imm(Imm, SDLoc(N));
-}]>;
-
-multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
- X86FoldableSchedWrite sched, X86VectorVTInfo _,
- string Name> {
+multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
+ PatFrag CommFrag, X86FoldableSchedWrite sched,
+ X86VectorVTInfo _, string Name> {
let isCommutable = 1 in
def rri : AVX512AIi8<opc, MRMSrcReg,
(outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512ICC:$cc),
!strconcat("vpcmp${cc}", Suffix,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
- imm:$cc))]>,
+ [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ cond)))]>,
EVEX_4V, Sched<[sched]>;
def rmi : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVX512ICC:$cc),
!strconcat("vpcmp${cc}", Suffix,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
- (_.VT (bitconvert (_.LdFrag addr:$src2))),
- imm:$cc))]>,
+ [(set _.KRC:$dst, (_.KVT
+ (Frag:$cc
+ (_.VT _.RC:$src1),
+ (_.VT (bitconvert (_.LdFrag addr:$src2))),
+ cond)))]>,
EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
let isCommutable = 1 in
def rrik : AVX512AIi8<opc, MRMSrcReg,
@@ -2313,8 +2313,9 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
"\t{$src2, $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, $src2}"),
[(set _.KRC:$dst, (and _.KRCWM:$mask,
- (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
- imm:$cc)))]>,
+ (_.KVT (Frag:$cc (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ cond))))]>,
EVEX_4V, EVEX_K, Sched<[sched]>;
def rmik : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
@@ -2323,9 +2324,12 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
"\t{$src2, $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, $src2}"),
[(set _.KRC:$dst, (and _.KRCWM:$mask,
- (OpNode (_.VT _.RC:$src1),
- (_.VT (bitconvert (_.LdFrag addr:$src2))),
- imm:$cc)))]>,
+ (_.KVT
+ (Frag:$cc
+ (_.VT _.RC:$src1),
+ (_.VT (bitconvert
+ (_.LdFrag addr:$src2))),
+ cond))))]>,
EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
// Accept explicit immediate argument form instead of comparison code.
@@ -2359,31 +2363,34 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
NotMemoryFoldable;
}
- def : Pat<(OpNode (bitconvert (_.LdFrag addr:$src2)),
- (_.VT _.RC:$src1), imm:$cc),
- (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
- (CommutePCMPCC imm:$cc))>;
+ def : Pat<(_.KVT (CommFrag:$cc (bitconvert (_.LdFrag addr:$src2)),
+ (_.VT _.RC:$src1), cond)),
+ (!cast<Instruction>(Name#_.ZSuffix#"rmi")
+ _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
- def : Pat<(and _.KRCWM:$mask, (OpNode (bitconvert (_.LdFrag addr:$src2)),
- (_.VT _.RC:$src1), imm:$cc)),
- (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
- _.RC:$src1, addr:$src2,
- (CommutePCMPCC imm:$cc))>;
+ def : Pat<(and _.KRCWM:$mask,
+ (_.KVT (CommFrag:$cc (bitconvert (_.LdFrag addr:$src2)),
+ (_.VT _.RC:$src1), cond))),
+ (!cast<Instruction>(Name#_.ZSuffix#"rmik")
+ _.KRCWM:$mask, _.RC:$src1, addr:$src2,
+ (CommFrag.OperandTransform $cc))>;
}
-multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode,
- X86FoldableSchedWrite sched, X86VectorVTInfo _,
- string Name> :
- avx512_icmp_cc<opc, Suffix, OpNode, sched, _, Name> {
+multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
+ PatFrag CommFrag, X86FoldableSchedWrite sched,
+ X86VectorVTInfo _, string Name> :
+ avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched, _, Name> {
def rmib : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
AVX512ICC:$cc),
!strconcat("vpcmp${cc}", Suffix,
"\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
"$dst, $src1, ${src2}", _.BroadcastStr, "}"),
- [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
- (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
- imm:$cc))]>,
+ [(set _.KRC:$dst, (_.KVT (Frag:$cc
+ (_.VT _.RC:$src1),
+ (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2)),
+ cond)))]>,
EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
def rmibk : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
@@ -2392,9 +2399,11 @@ multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode,
"\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
[(set _.KRC:$dst, (and _.KRCWM:$mask,
- (OpNode (_.VT _.RC:$src1),
- (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
- imm:$cc)))]>,
+ (_.KVT (Frag:$cc
+ (_.VT _.RC:$src1),
+ (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2)),
+ cond))))]>,
EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
// Accept explicit immediate argument form instead of comparison code.
@@ -2417,77 +2426,118 @@ multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode,
NotMemoryFoldable;
}
- def : Pat<(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
- (_.VT _.RC:$src1), imm:$cc),
- (!cast<Instruction>(Name#_.ZSuffix#"rmib") _.RC:$src1, addr:$src2,
- (CommutePCMPCC imm:$cc))>;
+ def : Pat<(_.KVT (CommFrag:$cc (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
+ (_.VT _.RC:$src1), cond)),
+ (!cast<Instruction>(Name#_.ZSuffix#"rmib")
+ _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
- def : Pat<(and _.KRCWM:$mask, (OpNode (X86VBroadcast
- (_.ScalarLdFrag addr:$src2)),
- (_.VT _.RC:$src1), imm:$cc)),
- (!cast<Instruction>(Name#_.ZSuffix#"rmibk") _.KRCWM:$mask,
- _.RC:$src1, addr:$src2,
- (CommutePCMPCC imm:$cc))>;
+ def : Pat<(and _.KRCWM:$mask,
+ (_.KVT (CommFrag:$cc (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2)),
+ (_.VT _.RC:$src1), cond))),
+ (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
+ _.KRCWM:$mask, _.RC:$src1, addr:$src2,
+ (CommFrag.OperandTransform $cc))>;
}
-multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, SDNode OpNode,
- X86SchedWriteWidths sched,
+multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
+ PatFrag CommFrag, X86SchedWriteWidths sched,
AVX512VLVectorVTInfo VTInfo, Predicate prd> {
let Predicates = [prd] in
- defm Z : avx512_icmp_cc<opc, Suffix, OpNode, sched.ZMM, VTInfo.info512, NAME>,
- EVEX_V512;
+ defm Z : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.ZMM,
+ VTInfo.info512, NAME>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
- defm Z256 : avx512_icmp_cc<opc, Suffix, OpNode, sched.YMM, VTInfo.info256,
- NAME>,
- EVEX_V256;
- defm Z128 : avx512_icmp_cc<opc, Suffix, OpNode, sched.XMM, VTInfo.info128,
- NAME>,
- EVEX_V128;
+ defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.YMM,
+ VTInfo.info256, NAME>, EVEX_V256;
+ defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.XMM,
+ VTInfo.info128, NAME>, EVEX_V128;
}
}
-multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, SDNode OpNode,
- X86SchedWriteWidths sched,
+multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
+ PatFrag CommFrag, X86SchedWriteWidths sched,
AVX512VLVectorVTInfo VTInfo, Predicate prd> {
let Predicates = [prd] in
- defm Z : avx512_icmp_cc_rmb<opc, Suffix, OpNode, sched.ZMM,
+ defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.ZMM,
VTInfo.info512, NAME>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
- defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, sched.YMM,
+ defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.YMM,
VTInfo.info256, NAME>, EVEX_V256;
- defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, sched.XMM,
+ defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.XMM,
VTInfo.info128, NAME>, EVEX_V128;
}
}
-// FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
-defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86cmpm, SchedWriteVecALU,
- avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>;
-defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86cmpmu, SchedWriteVecALU,
- avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>;
+def X86pcmpm_imm : SDNodeXForm<setcc, [{
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ uint8_t SSECC = X86::getVPCMPImmForCond(CC);
+ return getI8Imm(SSECC, SDLoc(N));
+}]>;
+
+// Swapped operand version of the above.
+def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ uint8_t SSECC = X86::getVPCMPImmForCond(CC);
+ SSECC = X86::getSwappedVPCMPImm(SSECC);
+ return getI8Imm(SSECC, SDLoc(N));
+}]>;
+
+def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
+ (setcc node:$src1, node:$src2, node:$cc), [{
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ return !ISD::isUnsignedIntSetCC(CC);
+}], X86pcmpm_imm>;
+
+// Same as above, but commutes immediate. Use for load folding.
+def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
+ (setcc node:$src1, node:$src2, node:$cc), [{
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ return !ISD::isUnsignedIntSetCC(CC);
+}], X86pcmpm_imm_commute>;
+
+def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
+ (setcc node:$src1, node:$src2, node:$cc), [{
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ return ISD::isUnsignedIntSetCC(CC);
+}], X86pcmpm_imm>;
+
+// Same as above, but commutes immediate. Use for load folding.
+def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
+ (setcc node:$src1, node:$src2, node:$cc), [{
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ return ISD::isUnsignedIntSetCC(CC);
+}], X86pcmpm_imm_commute>;
-defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86cmpm, SchedWriteVecALU,
- avx512vl_i16_info, HasBWI>,
+// FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
+defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_commute,
+ SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
+ EVEX_CD8<8, CD8VF>;
+defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_commute,
+ SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
+ EVEX_CD8<8, CD8VF>;
+
+defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_commute,
+ SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
VEX_W, EVEX_CD8<16, CD8VF>;
-defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86cmpmu, SchedWriteVecALU,
- avx512vl_i16_info, HasBWI>,
+defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_commute,
+ SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
VEX_W, EVEX_CD8<16, CD8VF>;
-defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86cmpm, SchedWriteVecALU,
- avx512vl_i32_info, HasAVX512>,
- EVEX_CD8<32, CD8VF>;
-defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86cmpmu, SchedWriteVecALU,
- avx512vl_i32_info, HasAVX512>,
- EVEX_CD8<32, CD8VF>;
+defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_commute,
+ SchedWriteVecALU, avx512vl_i32_info,
+ HasAVX512>, EVEX_CD8<32, CD8VF>;
+defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_commute,
+ SchedWriteVecALU, avx512vl_i32_info,
+ HasAVX512>, EVEX_CD8<32, CD8VF>;
-defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, SchedWriteVecALU,
- avx512vl_i64_info, HasAVX512>,
- VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, SchedWriteVecALU,
- avx512vl_i64_info, HasAVX512>,
- VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_commute,
+ SchedWriteVecALU, avx512vl_i64_info,
+ HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_commute,
+ SchedWriteVecALU, avx512vl_i64_info,
+ HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
string Name> {
@@ -3085,6 +3135,7 @@ multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
+// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, string InstStr,
X86VectorVTInfo Narrow,
X86VectorVTInfo Wide> {
@@ -3107,9 +3158,34 @@ multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, string InstStr,
Narrow.KRC)>;
}
-multiclass axv512_icmp_packed_cc_no_vlx_lowering<SDNode OpNode, string InstStr,
+// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
+multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag,
+ string InstStr,
X86VectorVTInfo Narrow,
X86VectorVTInfo Wide> {
+def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
+ (Narrow.VT Narrow.RC:$src2), cond)),
+ (COPY_TO_REGCLASS
+ (!cast<Instruction>(InstStr##Zrri)
+ (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
+ (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
+ (Frag.OperandTransform $cc)), Narrow.KRC)>;
+
+def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
+ (Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
+ (Narrow.VT Narrow.RC:$src2),
+ cond)))),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
+ (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
+ (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
+ (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
+ (Frag.OperandTransform $cc)), Narrow.KRC)>;
+}
+
+// Same as above, but for fp types which don't use PatFrags.
+multiclass axv512_cmp_packed_cc_no_vlx_lowering<SDNode OpNode, string InstStr,
+ X86VectorVTInfo Narrow,
+ X86VectorVTInfo Wide> {
def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
(Narrow.VT Narrow.RC:$src2), imm:$cc)),
(COPY_TO_REGCLASS
@@ -3129,6 +3205,9 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
}
let Predicates = [HasAVX512, NoVLX] in {
+ // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
+ // increase the pattern complexity the way an immediate would.
+ let AddedComplexity = 2 in {
defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v8i32x_info, v16i32_info>;
defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQD", v8i32x_info, v16i32_info>;
@@ -3140,25 +3219,30 @@ let Predicates = [HasAVX512, NoVLX] in {
defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v2i64x_info, v8i64_info>;
defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQQ", v2i64x_info, v8i64_info>;
+ }
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v8f32x_info, v16f32_info>;
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPD", v8i32x_info, v16i32_info>;
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUD", v8i32x_info, v16i32_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPD", v8i32x_info, v16i32_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUD", v8i32x_info, v16i32_info>;
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v4f32x_info, v16f32_info>;
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPD", v4i32x_info, v16i32_info>;
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUD", v4i32x_info, v16i32_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPD", v4i32x_info, v16i32_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUD", v4i32x_info, v16i32_info>;
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v4f64x_info, v8f64_info>;
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPQ", v4i64x_info, v8i64_info>;
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUQ", v4i64x_info, v8i64_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPQ", v4i64x_info, v8i64_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUQ", v4i64x_info, v8i64_info>;
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v2f64x_info, v8f64_info>;
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPQ", v2i64x_info, v8i64_info>;
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUQ", v2i64x_info, v8i64_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPQ", v2i64x_info, v8i64_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUQ", v2i64x_info, v8i64_info>;
+
+ defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v8f32x_info, v16f32_info>;
+ defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v4f32x_info, v16f32_info>;
+ defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v4f64x_info, v8f64_info>;
+ defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v2f64x_info, v8f64_info>;
}
let Predicates = [HasBWI, NoVLX] in {
+ // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
+ // increase the pattern complexity the way an immediate would.
+ let AddedComplexity = 2 in {
defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v32i8x_info, v64i8_info>;
defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQB", v32i8x_info, v64i8_info>;
@@ -3170,18 +3254,19 @@ let Predicates = [HasBWI, NoVLX] in {
defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v8i16x_info, v32i16_info>;
defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQW", v8i16x_info, v32i16_info>;
+ }
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPB", v32i8x_info, v64i8_info>;
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUB", v32i8x_info, v64i8_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPB", v32i8x_info, v64i8_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUB", v32i8x_info, v64i8_info>;
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPB", v16i8x_info, v64i8_info>;
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUB", v16i8x_info, v64i8_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPB", v16i8x_info, v64i8_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUB", v16i8x_info, v64i8_info>;
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPW", v16i16x_info, v32i16_info>;
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUW", v16i16x_info, v32i16_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPW", v16i16x_info, v32i16_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUW", v16i16x_info, v32i16_info>;
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPW", v8i16x_info, v32i16_info>;
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUW", v8i16x_info, v32i16_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPW", v8i16x_info, v32i16_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUW", v8i16x_info, v32i16_info>;
}
// Mask setting all 0s or 1s
@@ -5701,9 +5786,9 @@ multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
// as commutable here because we already canonicalized all zeros vectors to the
// RHS during lowering.
def X86pcmpeqm : PatFrag<(ops node:$src1, node:$src2),
- (X86cmpm node:$src1, node:$src2, (i8 0))>;
+ (setcc node:$src1, node:$src2, SETEQ)>;
def X86pcmpnem : PatFrag<(ops node:$src1, node:$src2),
- (X86cmpm node:$src1, node:$src2, (i8 4))>;
+ (setcc node:$src1, node:$src2, SETNE)>;
multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
PatFrag OpNode, X86SchedWriteWidths sched> :
OpenPOWER on IntegriCloud