summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/AsmParser/X86Operand.h49
-rw-r--r--llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp1
-rw-r--r--llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp4
-rw-r--r--llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h7
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp25
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h1
-rw-r--r--llvm/lib/Target/X86/X86.td4
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp23
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h3
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td57
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td4
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp8
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.td28
-rw-r--r--llvm/lib/Target/X86/X86MCInstLower.cpp71
-rw-r--r--llvm/lib/Target/X86/X86RegisterInfo.td12
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.h4
16 files changed, 301 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/AsmParser/X86Operand.h b/llvm/lib/Target/X86/AsmParser/X86Operand.h
index 24715f7070d..a771ba36631 100644
--- a/llvm/lib/Target/X86/AsmParser/X86Operand.h
+++ b/llvm/lib/Target/X86/AsmParser/X86Operand.h
@@ -451,6 +451,31 @@ struct X86Operand final : public MCParsedAsmOperand {
X86MCRegisterClasses[X86::GR64RegClassID].contains(getReg()));
}
+ bool isVK1Pair() const {
+ return Kind == Register &&
+ X86MCRegisterClasses[X86::VK1RegClassID].contains(getReg());
+ }
+
+ bool isVK2Pair() const {
+ return Kind == Register &&
+ X86MCRegisterClasses[X86::VK2RegClassID].contains(getReg());
+ }
+
+ bool isVK4Pair() const {
+ return Kind == Register &&
+ X86MCRegisterClasses[X86::VK4RegClassID].contains(getReg());
+ }
+
+ bool isVK8Pair() const {
+ return Kind == Register &&
+ X86MCRegisterClasses[X86::VK8RegClassID].contains(getReg());
+ }
+
+ bool isVK16Pair() const {
+ return Kind == Register &&
+ X86MCRegisterClasses[X86::VK16RegClassID].contains(getReg());
+ }
+
void addExpr(MCInst &Inst, const MCExpr *Expr) const {
// Add as immediates when possible.
if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
@@ -482,6 +507,30 @@ struct X86Operand final : public MCParsedAsmOperand {
addExpr(Inst, getImm());
}
+ void addMaskPairOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ unsigned Reg = getReg();
+ switch (Reg) {
+ case X86::K0:
+ case X86::K1:
+ Reg = X86::K0_K1;
+ break;
+ case X86::K2:
+ case X86::K3:
+ Reg = X86::K2_K3;
+ break;
+ case X86::K4:
+ case X86::K5:
+ Reg = X86::K4_K5;
+ break;
+ case X86::K6:
+ case X86::K7:
+ Reg = X86::K6_K7;
+ break;
+ }
+ Inst.addOperand(MCOperand::createReg(Reg));
+ }
+
void addMemOperands(MCInst &Inst, unsigned N) const {
assert((N == 5) && "Invalid number of operands!");
Inst.addOperand(MCOperand::createReg(getMemBaseReg()));
diff --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 9fa4aabab77..9a635bbe5f8 100644
--- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -694,6 +694,7 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
case TYPE_XMM:
case TYPE_YMM:
case TYPE_ZMM:
+ case TYPE_VK_PAIR:
case TYPE_VK:
case TYPE_DEBUGREG:
case TYPE_CONTROLREG:
diff --git a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
index b5789b5d837..76aadc79402 100644
--- a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
+++ b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
@@ -1468,6 +1468,10 @@ static int readModRM(struct InternalInstruction* insn) {
if (index > 7) \
*valid = 0; \
return prefix##_K0 + index; \
+ case TYPE_VK_PAIR: \
+ if (index > 7) \
+ *valid = 0; \
+ return prefix##_K0_K1 + (index / 2); \
case TYPE_MM64: \
return prefix##_MM0 + (index & 0x7); \
case TYPE_SEGMENTREG: \
diff --git a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index 8e0749d10cf..7c0a42c019e 100644
--- a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -324,6 +324,12 @@ namespace X86Disassembler {
ENTRY(K6) \
ENTRY(K7)
+#define REGS_MASK_PAIRS \
+ ENTRY(K0_K1) \
+ ENTRY(K2_K3) \
+ ENTRY(K4_K5) \
+ ENTRY(K6_K7)
+
#define REGS_SEGMENT \
ENTRY(ES) \
ENTRY(CS) \
@@ -393,6 +399,7 @@ namespace X86Disassembler {
REGS_YMM \
REGS_ZMM \
REGS_MASKS \
+ REGS_MASK_PAIRS \
REGS_SEGMENT \
REGS_DEBUG \
REGS_CONTROL \
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
index 878ce590550..a2155507697 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
@@ -335,3 +335,28 @@ void X86InstPrinterCommon::printInstFlags(const MCInst *MI, raw_ostream &O) {
else if (Flags & X86::IP_HAS_REPEAT)
O << "\trep\t";
}
+
+void X86InstPrinterCommon::printVKPair(const MCInst *MI, unsigned OpNo,
+ raw_ostream &OS) {
+ // In assembly listings, a pair is represented by one of its members, any
+ // of the two. Here, we pick k0, k2, k4, k6, but we could as well
+ // print K2_K3 as "k3". It would probably make a lot more sense, if
+ // the assembly would look something like:
+ // "vp2intersect %zmm5, %zmm7, {%k2, %k3}"
+ // but this can work too.
+ switch (MI->getOperand(OpNo).getReg()) {
+ case X86::K0_K1:
+ printRegName(OS, X86::K0);
+ return;
+ case X86::K2_K3:
+ printRegName(OS, X86::K2);
+ return;
+ case X86::K4_K5:
+ printRegName(OS, X86::K4);
+ return;
+ case X86::K6_K7:
+ printRegName(OS, X86::K6);
+ return;
+ }
+ llvm_unreachable("Unknown mask pair register name");
+}
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h
index 03c761dbcb7..8e28f24b619 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h
@@ -33,6 +33,7 @@ public:
protected:
void printInstFlags(const MCInst *MI, raw_ostream &O);
void printOptionalSegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printVKPair(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
};
} // end namespace llvm
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 4148a445825..52d90d711c0 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -173,6 +173,10 @@ def FeatureBF16 : SubtargetFeature<"avx512bf16", "HasBF16", "true",
def FeatureBITALG : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
"Enable AVX-512 Bit Algorithms",
[FeatureBWI]>;
+def FeatureVP2INTERSECT : SubtargetFeature<"avx512vp2intersect",
+ "HasVP2INTERSECT", "true",
+ "Enable AVX-512 vp2intersect",
+ [FeatureAVX512]>;
def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
"Enable packed carry-less multiplication instructions",
[FeatureSSE2]>;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f97907fb7e5..253f4487976 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -22944,6 +22944,28 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
}
+
+ case Intrinsic::x86_avx512_vp2intersect_q_512:
+ case Intrinsic::x86_avx512_vp2intersect_q_256:
+ case Intrinsic::x86_avx512_vp2intersect_q_128:
+ case Intrinsic::x86_avx512_vp2intersect_d_512:
+ case Intrinsic::x86_avx512_vp2intersect_d_256:
+ case Intrinsic::x86_avx512_vp2intersect_d_128: {
+ MVT MaskVT = Op.getSimpleValueType();
+
+ SDVTList VTs = DAG.getVTList(MVT::Untyped, MVT::Other);
+ SDLoc DL(Op);
+
+ SDValue Operation =
+ DAG.getNode(X86ISD::VP2INTERSECT, DL, VTs,
+ Op->getOperand(1), Op->getOperand(2));
+
+ SDValue Result0 = DAG.getTargetExtractSubreg(X86::sub_mask_0, DL,
+ MaskVT, Operation);
+ SDValue Result1 = DAG.getTargetExtractSubreg(X86::sub_mask_1, DL,
+ MaskVT, Operation);
+ return DAG.getMergeValues({Result0, Result1}, DL);
+ }
}
}
@@ -28284,6 +28306,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::TPAUSE: return "X86ISD::TPAUSE";
case X86ISD::ENQCMD: return "X86ISD:ENQCMD";
case X86ISD::ENQCMDS: return "X86ISD:ENQCMDS";
+ case X86ISD::VP2INTERSECT: return "X86ISD::VP2INTERSECT";
}
return nullptr;
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index a3ebe1001e3..7eed866614a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -592,6 +592,9 @@ namespace llvm {
// Enqueue Stores Instructions
ENQCMD, ENQCMDS,
+ // For avx512-vp2intersect
+ VP2INTERSECT,
+
// Compare and swap.
LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
LCMPXCHG8_DAG,
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 97e696981b1..20380bb8448 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -26,6 +26,10 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
// Corresponding mask register class.
RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
+ // Corresponding mask register pair class.
+ RegisterOperand KRPC = !if (!gt(NumElts, 16), ?,
+ !cast<RegisterOperand>("VK" # NumElts # "Pair"));
+
// Corresponding write-mask register class.
RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
@@ -12556,6 +12560,59 @@ defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
Sched<[SchedWriteFMA.ZMM.Folded]>;
}
+let hasSideEffects = 0 in {
+ def MASKPAIR16STORE : PseudoI<(outs), (ins VK16PAIR:$src, anymem:$dst),
+ [(store VK16PAIR:$src, addr:$dst)]>;
+ def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src),
+ [(set VK16PAIR:$dst, (load addr:$src))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// VP2INTERSECT
+//===----------------------------------------------------------------------===//
+
+multiclass avx512_vp2intersect_modes<X86VectorVTInfo _> {
+ def rr : I<0x68, MRMSrcReg,
+ (outs _.KRPC:$dst),
+ (ins _.RC:$src1, _.RC:$src2),
+ !strconcat("vp2intersect", _.Suffix,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set _.KRPC:$dst, (X86vp2intersect
+ _.RC:$src1, (_.VT _.RC:$src2)))]>,
+ EVEX_4V, T8XD;
+
+ def rm : I<0x68, MRMSrcMem,
+ (outs _.KRPC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2),
+ !strconcat("vp2intersect", _.Suffix,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set _.KRPC:$dst, (X86vp2intersect
+ _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
+ EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>;
+
+ def rmb : I<0x68, MRMSrcMem,
+ (outs _.KRPC:$dst),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2),
+ !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
+ ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
+ [(set _.KRPC:$dst, (X86vp2intersect
+ _.RC:$src1, (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src2)))))]>,
+ EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
+}
+
+multiclass avx512_vp2intersect<AVX512VLVectorVTInfo _> {
+ let Predicates = [HasAVX512, HasVP2INTERSECT] in
+ defm Z : avx512_vp2intersect_modes<_.info512>, EVEX_V512;
+
+ let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
+ defm Z256 : avx512_vp2intersect_modes<_.info256>, EVEX_V256;
+ defm Z128 : avx512_vp2intersect_modes<_.info128>, EVEX_V128;
+ }
+}
+
+defm VP2INTERSECTD : avx512_vp2intersect<avx512vl_i32_info>;
+defm VP2INTERSECTQ : avx512_vp2intersect<avx512vl_i64_info>, VEX_W;
+
multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
X86SchedWriteWidths sched,
AVX512VLVectorVTInfo _SrcVTInfo,
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 8a6f09f5cac..50d81fcaf83 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -505,6 +505,10 @@ def X86FnmsubRnd : SDNode<"X86ISD::FNMSUB_RND", SDTFmaRound, [SDNPCommutat
def X86FmaddsubRnd : SDNode<"X86ISD::FMADDSUB_RND", SDTFmaRound, [SDNPCommutative]>;
def X86FmsubaddRnd : SDNode<"X86ISD::FMSUBADD_RND", SDTFmaRound, [SDNPCommutative]>;
+def X86vp2intersect : SDNode<"X86ISD::VP2INTERSECT",
+ SDTypeProfile<1, 2, [SDTCisVT<0, untyped>,
+ SDTCisVec<1>, SDTCisSameAs<1, 2>]>>;
+
def SDTIFma : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
def x86vpmadd52l : SDNode<"X86ISD::VPMADD52L", SDTIFma, [SDNPCommutative]>;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 20d3cf0d927..124ad5dfdf4 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -2877,6 +2877,14 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
assert(STI.hasBWI() && "KMOVD requires BWI");
return load ? X86::KMOVDkm : X86::KMOVDmk;
}
+ // All of these mask pair classes have the same spill size, the same kind
+ // of kmov instructions can be used with all of them.
+ if (X86::VK1PAIRRegClass.hasSubClassEq(RC) ||
+ X86::VK2PAIRRegClass.hasSubClassEq(RC) ||
+ X86::VK4PAIRRegClass.hasSubClassEq(RC) ||
+ X86::VK8PAIRRegClass.hasSubClassEq(RC) ||
+ X86::VK16PAIRRegClass.hasSubClassEq(RC))
+ return load ? X86::MASKPAIR16LOAD : X86::MASKPAIR16STORE;
llvm_unreachable("Unknown 4-byte regclass");
case 8:
if (X86::GR64RegClass.hasSubClassEq(RC))
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 98af217ebcf..85255096a7d 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -757,6 +757,33 @@ def lea64mem : Operand<i64> {
let ParserMatchClass = X86MemAsmOperand;
}
+let RenderMethod = "addMaskPairOperands" in {
+ def VK1PairAsmOperand : AsmOperandClass { let Name = "VK1Pair"; }
+ def VK2PairAsmOperand : AsmOperandClass { let Name = "VK2Pair"; }
+ def VK4PairAsmOperand : AsmOperandClass { let Name = "VK4Pair"; }
+ def VK8PairAsmOperand : AsmOperandClass { let Name = "VK8Pair"; }
+ def VK16PairAsmOperand : AsmOperandClass { let Name = "VK16Pair"; }
+}
+
+def VK1Pair : RegisterOperand<VK1PAIR, "printVKPair"> {
+ let ParserMatchClass = VK1PairAsmOperand;
+}
+
+def VK2Pair : RegisterOperand<VK2PAIR, "printVKPair"> {
+ let ParserMatchClass = VK2PairAsmOperand;
+}
+
+def VK4Pair : RegisterOperand<VK4PAIR, "printVKPair"> {
+ let ParserMatchClass = VK4PairAsmOperand;
+}
+
+def VK8Pair : RegisterOperand<VK8PAIR, "printVKPair"> {
+ let ParserMatchClass = VK8PairAsmOperand;
+}
+
+def VK16Pair : RegisterOperand<VK16PAIR, "printVKPair"> {
+ let ParserMatchClass = VK16PairAsmOperand;
+}
//===----------------------------------------------------------------------===//
// X86 Complex Pattern Definitions.
@@ -843,6 +870,7 @@ def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">;
def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">;
def PKU : Predicate<"Subtarget->hasPKU()">;
def HasVNNI : Predicate<"Subtarget->hasVNNI()">;
+def HasVP2INTERSECT : Predicate<"Subtarget->hasVP2INTERSECT()">;
def HasBF16 : Predicate<"Subtarget->hasBF16()">;
def HasBITALG : Predicate<"Subtarget->hasBITALG()">;
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index 6ed9a533d51..9de2d18e0be 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -1680,6 +1680,77 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
case X86::TLS_base_addr64:
return LowerTlsAddr(MCInstLowering, *MI);
+ // Loading/storing mask pairs requires two kmov operations. The second one of these
+ // needs a 2 byte displacement relative to the specified address (with 32 bit spill
+ // size). The pairs of 1bit masks up to 16 bit masks all use the same spill size,
+ // they all are stored using MASKPAIR16STORE, loaded using MASKPAIR16LOAD.
+ //
+ // The displacement value might wrap around in theory, thus the asserts in both
+ // cases.
+ case X86::MASKPAIR16LOAD: {
+ int64_t Disp = MI->getOperand(1 + X86::AddrDisp).getImm();
+ assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement");
+ const X86RegisterInfo *RI =
+ MF->getSubtarget<X86Subtarget>().getRegisterInfo();
+ unsigned Reg = MI->getOperand(0).getReg();
+ unsigned Reg0 = RI->getSubReg(Reg, X86::sub_mask_0);
+ unsigned Reg1 = RI->getSubReg(Reg, X86::sub_mask_1);
+
+ // Load the first mask register
+ MCInstBuilder MIB = MCInstBuilder(X86::KMOVWkm);
+ MIB.addReg(Reg0);
+ for (int i = 0; i < X86::AddrNumOperands; ++i) {
+ auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(1 + i));
+ MIB.addOperand(Op.getValue());
+ }
+ EmitAndCountInstruction(MIB);
+
+ // Load the second mask register of the pair
+ MIB = MCInstBuilder(X86::KMOVWkm);
+ MIB.addReg(Reg1);
+ for (int i = 0; i < X86::AddrNumOperands; ++i) {
+ if (i == X86::AddrDisp) {
+ MIB.addImm(Disp + 2);
+ } else {
+ auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(1 + i));
+ MIB.addOperand(Op.getValue());
+ }
+ }
+ EmitAndCountInstruction(MIB);
+ return;
+ }
+
+ case X86::MASKPAIR16STORE: {
+ int64_t Disp = MI->getOperand(X86::AddrDisp).getImm();
+ assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement");
+ const X86RegisterInfo *RI =
+ MF->getSubtarget<X86Subtarget>().getRegisterInfo();
+ unsigned Reg = MI->getOperand(X86::AddrNumOperands).getReg();
+ unsigned Reg0 = RI->getSubReg(Reg, X86::sub_mask_0);
+ unsigned Reg1 = RI->getSubReg(Reg, X86::sub_mask_1);
+
+ // Store the first mask register
+ MCInstBuilder MIB = MCInstBuilder(X86::KMOVWmk);
+ for (int i = 0; i < X86::AddrNumOperands; ++i)
+ MIB.addOperand(MCInstLowering.LowerMachineOperand(MI, MI->getOperand(i)).getValue());
+ MIB.addReg(Reg0);
+ EmitAndCountInstruction(MIB);
+
+ // Store the second mask register of the pair
+ MIB = MCInstBuilder(X86::KMOVWmk);
+ for (int i = 0; i < X86::AddrNumOperands; ++i) {
+ if (i == X86::AddrDisp) {
+ MIB.addImm(Disp + 2);
+ } else {
+ auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(0 + i));
+ MIB.addOperand(Op.getValue());
+ }
+ }
+ MIB.addReg(Reg1);
+ EmitAndCountInstruction(MIB);
+ return;
+ }
+
case X86::MOVPC32r: {
// This is a pseudo op for a two instruction sequence with a label, which
// looks like:
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td
index c0acff9c8c3..0528b90c1fd 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.td
+++ b/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -28,6 +28,8 @@ let Namespace = "X86" in {
def sub_32bit : SubRegIndex<32>;
def sub_xmm : SubRegIndex<128>;
def sub_ymm : SubRegIndex<256>;
+ def sub_mask_0 : SubRegIndex<-1>;
+ def sub_mask_1 : SubRegIndex<-1, -1>;
}
//===----------------------------------------------------------------------===//
@@ -594,6 +596,16 @@ def VK16 : RegisterClass<"X86", [v16i1], 16, (add VK8)> {let Size = 16;}
def VK32 : RegisterClass<"X86", [v32i1], 32, (add VK16)> {let Size = 32;}
def VK64 : RegisterClass<"X86", [v64i1], 64, (add VK32)> {let Size = 64;}
+// Mask register pairs
+def KPAIRS : RegisterTuples<[sub_mask_0, sub_mask_1],
+ [(add K0, K2, K4, K6), (add K1, K3, K5, K7)]>;
+
+def VK1PAIR : RegisterClass<"X86", [untyped], 16, (add KPAIRS)> {let Size = 32;}
+def VK2PAIR : RegisterClass<"X86", [untyped], 16, (add KPAIRS)> {let Size = 32;}
+def VK4PAIR : RegisterClass<"X86", [untyped], 16, (add KPAIRS)> {let Size = 32;}
+def VK8PAIR : RegisterClass<"X86", [untyped], 16, (add KPAIRS)> {let Size = 32;}
+def VK16PAIR : RegisterClass<"X86", [untyped], 16, (add KPAIRS)> {let Size = 32;}
+
def VK1WM : RegisterClass<"X86", [v1i1], 16, (sub VK1, K0)> {let Size = 16;}
def VK2WM : RegisterClass<"X86", [v2i1], 16, (sub VK2, K0)> {let Size = 16;}
def VK4WM : RegisterClass<"X86", [v4i1], 16, (sub VK4, K0)> {let Size = 16;}
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index 6fefe23182f..9e5613654af 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -362,6 +362,9 @@ protected:
/// Processor has AVX-512 Bit Algorithms instructions
bool HasBITALG = false;
+ /// Processor has AVX-512 vp2intersect instructions
+ bool HasVP2INTERSECT = false;
+
/// Processor supports MPX - Memory Protection Extensions
bool HasMPX = false;
@@ -679,6 +682,7 @@ public:
bool hasPKU() const { return HasPKU; }
bool hasVNNI() const { return HasVNNI; }
bool hasBF16() const { return HasBF16; }
+ bool hasVP2INTERSECT() const { return HasVP2INTERSECT; }
bool hasBITALG() const { return HasBITALG; }
bool hasMPX() const { return HasMPX; }
bool hasSHSTK() const { return HasSHSTK; }
OpenPOWER on IntegriCloud