diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/AsmParser/X86Operand.h | 49 | ||||
-rw-r--r-- | llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h | 7 | ||||
-rw-r--r-- | llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp | 25 | ||||
-rw-r--r-- | llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h | 1 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 23 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 3 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 57 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.td | 28 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86MCInstLower.cpp | 71 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86RegisterInfo.td | 12 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86Subtarget.h | 4 |
16 files changed, 301 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/AsmParser/X86Operand.h b/llvm/lib/Target/X86/AsmParser/X86Operand.h index 24715f7070d..a771ba36631 100644 --- a/llvm/lib/Target/X86/AsmParser/X86Operand.h +++ b/llvm/lib/Target/X86/AsmParser/X86Operand.h @@ -451,6 +451,31 @@ struct X86Operand final : public MCParsedAsmOperand { X86MCRegisterClasses[X86::GR64RegClassID].contains(getReg())); } + bool isVK1Pair() const { + return Kind == Register && + X86MCRegisterClasses[X86::VK1RegClassID].contains(getReg()); + } + + bool isVK2Pair() const { + return Kind == Register && + X86MCRegisterClasses[X86::VK2RegClassID].contains(getReg()); + } + + bool isVK4Pair() const { + return Kind == Register && + X86MCRegisterClasses[X86::VK4RegClassID].contains(getReg()); + } + + bool isVK8Pair() const { + return Kind == Register && + X86MCRegisterClasses[X86::VK8RegClassID].contains(getReg()); + } + + bool isVK16Pair() const { + return Kind == Register && + X86MCRegisterClasses[X86::VK16RegClassID].contains(getReg()); + } + void addExpr(MCInst &Inst, const MCExpr *Expr) const { // Add as immediates when possible. if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) @@ -482,6 +507,30 @@ struct X86Operand final : public MCParsedAsmOperand { addExpr(Inst, getImm()); } + void addMaskPairOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + unsigned Reg = getReg(); + switch (Reg) { + case X86::K0: + case X86::K1: + Reg = X86::K0_K1; + break; + case X86::K2: + case X86::K3: + Reg = X86::K2_K3; + break; + case X86::K4: + case X86::K5: + Reg = X86::K4_K5; + break; + case X86::K6: + case X86::K7: + Reg = X86::K6_K7; + break; + } + Inst.addOperand(MCOperand::createReg(Reg)); + } + void addMemOperands(MCInst &Inst, unsigned N) const { assert((N == 5) && "Invalid number of operands!"); Inst.addOperand(MCOperand::createReg(getMemBaseReg())); diff --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp index 9fa4aabab77..9a635bbe5f8 100644 --- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -694,6 +694,7 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, case TYPE_XMM: case TYPE_YMM: case TYPE_ZMM: + case TYPE_VK_PAIR: case TYPE_VK: case TYPE_DEBUGREG: case TYPE_CONTROLREG: diff --git a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp index b5789b5d837..76aadc79402 100644 --- a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp +++ b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp @@ -1468,6 +1468,10 @@ static int readModRM(struct InternalInstruction* insn) { if (index > 7) \ *valid = 0; \ return prefix##_K0 + index; \ + case TYPE_VK_PAIR: \ + if (index > 7) \ + *valid = 0; \ + return prefix##_K0_K1 + (index / 2); \ case TYPE_MM64: \ return prefix##_MM0 + (index & 0x7); \ case TYPE_SEGMENTREG: \ diff --git a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index 8e0749d10cf..7c0a42c019e 100644 --- a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -324,6 +324,12 @@ namespace X86Disassembler { ENTRY(K6) \ ENTRY(K7) +#define REGS_MASK_PAIRS \ + ENTRY(K0_K1) \ + ENTRY(K2_K3) \ + ENTRY(K4_K5) \ + ENTRY(K6_K7) + #define REGS_SEGMENT \ ENTRY(ES) \ ENTRY(CS) \ @@ -393,6 +399,7 @@ namespace X86Disassembler { REGS_YMM \ REGS_ZMM \ REGS_MASKS \ + REGS_MASK_PAIRS \ REGS_SEGMENT \ REGS_DEBUG \ REGS_CONTROL \ diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp index 878ce590550..a2155507697 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp @@ -335,3 +335,28 @@ void X86InstPrinterCommon::printInstFlags(const MCInst *MI, raw_ostream &O) { else if (Flags & X86::IP_HAS_REPEAT) O << "\trep\t"; } + +void X86InstPrinterCommon::printVKPair(const MCInst *MI, unsigned OpNo, + raw_ostream &OS) { + // In assembly listings, a pair is represented by one of its members, any + // of the two. Here, we pick k0, k2, k4, k6, but we could as well + // print K2_K3 as "k3". It would probably make a lot more sense, if + // the assembly would look something like: + // "vp2intersect %zmm5, %zmm7, {%k2, %k3}" + // but this can work too. + switch (MI->getOperand(OpNo).getReg()) { + case X86::K0_K1: + printRegName(OS, X86::K0); + return; + case X86::K2_K3: + printRegName(OS, X86::K2); + return; + case X86::K4_K5: + printRegName(OS, X86::K4); + return; + case X86::K6_K7: + printRegName(OS, X86::K6); + return; + } + llvm_unreachable("Unknown mask pair register name"); +} diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h index 03c761dbcb7..8e28f24b619 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h @@ -33,6 +33,7 @@ public: protected: void printInstFlags(const MCInst *MI, raw_ostream &O); void printOptionalSegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printVKPair(const MCInst *MI, unsigned OpNo, raw_ostream &OS); }; } // end namespace llvm diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 4148a445825..52d90d711c0 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -173,6 +173,10 @@ def FeatureBF16 : SubtargetFeature<"avx512bf16", "HasBF16", "true", def FeatureBITALG : SubtargetFeature<"avx512bitalg", "HasBITALG", "true", "Enable AVX-512 Bit Algorithms", [FeatureBWI]>; +def FeatureVP2INTERSECT : SubtargetFeature<"avx512vp2intersect", + "HasVP2INTERSECT", "true", + "Enable AVX-512 vp2intersect", + [FeatureAVX512]>; def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true", "Enable packed carry-less multiplication instructions", [FeatureSSE2]>; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f97907fb7e5..253f4487976 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -22944,6 +22944,28 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, } return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); } + + case Intrinsic::x86_avx512_vp2intersect_q_512: + case Intrinsic::x86_avx512_vp2intersect_q_256: + case Intrinsic::x86_avx512_vp2intersect_q_128: + case Intrinsic::x86_avx512_vp2intersect_d_512: + case Intrinsic::x86_avx512_vp2intersect_d_256: + case Intrinsic::x86_avx512_vp2intersect_d_128: { + MVT MaskVT = Op.getSimpleValueType(); + + SDVTList VTs = DAG.getVTList(MVT::Untyped, MVT::Other); + SDLoc DL(Op); + + SDValue Operation = + DAG.getNode(X86ISD::VP2INTERSECT, DL, VTs, + Op->getOperand(1), Op->getOperand(2)); + + SDValue Result0 = DAG.getTargetExtractSubreg(X86::sub_mask_0, DL, + MaskVT, Operation); + SDValue Result1 = DAG.getTargetExtractSubreg(X86::sub_mask_1, DL, + MaskVT, Operation); + return DAG.getMergeValues({Result0, Result1}, DL); + } } } @@ -28284,6 +28306,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::TPAUSE: return "X86ISD::TPAUSE"; case X86ISD::ENQCMD: return "X86ISD:ENQCMD"; case X86ISD::ENQCMDS: return "X86ISD:ENQCMDS"; + case X86ISD::VP2INTERSECT: return "X86ISD::VP2INTERSECT"; } return nullptr; } diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index a3ebe1001e3..7eed866614a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -592,6 +592,9 @@ namespace llvm { // Enqueue Stores Instructions ENQCMD, ENQCMDS, + // For avx512-vp2intersect + VP2INTERSECT, + // Compare and swap. LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, LCMPXCHG8_DAG, diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 97e696981b1..20380bb8448 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -26,6 +26,10 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc, // Corresponding mask register class. RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts); + // Corresponding mask register pair class. + RegisterOperand KRPC = !if (!gt(NumElts, 16), ?, + !cast<RegisterOperand>("VK" # NumElts # "Pair")); + // Corresponding write-mask register class. RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM"); @@ -12556,6 +12560,59 @@ defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info, Sched<[SchedWriteFMA.ZMM.Folded]>; } +let hasSideEffects = 0 in { + def MASKPAIR16STORE : PseudoI<(outs), (ins VK16PAIR:$src, anymem:$dst), + [(store VK16PAIR:$src, addr:$dst)]>; + def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), + [(set VK16PAIR:$dst, (load addr:$src))]>; +} + +//===----------------------------------------------------------------------===// +// VP2INTERSECT +//===----------------------------------------------------------------------===// + +multiclass avx512_vp2intersect_modes<X86VectorVTInfo _> { + def rr : I<0x68, MRMSrcReg, + (outs _.KRPC:$dst), + (ins _.RC:$src1, _.RC:$src2), + !strconcat("vp2intersect", _.Suffix, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set _.KRPC:$dst, (X86vp2intersect + _.RC:$src1, (_.VT _.RC:$src2)))]>, + EVEX_4V, T8XD; + + def rm : I<0x68, MRMSrcMem, + (outs _.KRPC:$dst), + (ins _.RC:$src1, _.MemOp:$src2), + !strconcat("vp2intersect", _.Suffix, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set _.KRPC:$dst, (X86vp2intersect + _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>, + EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>; + + def rmb : I<0x68, MRMSrcMem, + (outs _.KRPC:$dst), + (ins _.RC:$src1, _.ScalarMemOp:$src2), + !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr, + ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"), + [(set _.KRPC:$dst, (X86vp2intersect + _.RC:$src1, (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src2)))))]>, + EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>; +} + +multiclass avx512_vp2intersect<AVX512VLVectorVTInfo _> { + let Predicates = [HasAVX512, HasVP2INTERSECT] in + defm Z : avx512_vp2intersect_modes<_.info512>, EVEX_V512; + + let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in { + defm Z256 : avx512_vp2intersect_modes<_.info256>, EVEX_V256; + defm Z128 : avx512_vp2intersect_modes<_.info128>, EVEX_V128; + } +} + +defm VP2INTERSECTD : avx512_vp2intersect<avx512vl_i32_info>; +defm VP2INTERSECTQ : avx512_vp2intersect<avx512vl_i64_info>, VEX_W; + multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, AVX512VLVectorVTInfo _SrcVTInfo, diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 8a6f09f5cac..50d81fcaf83 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -505,6 +505,10 @@ def X86FnmsubRnd : SDNode<"X86ISD::FNMSUB_RND", SDTFmaRound, [SDNPCommutat def X86FmaddsubRnd : SDNode<"X86ISD::FMADDSUB_RND", SDTFmaRound, [SDNPCommutative]>; def X86FmsubaddRnd : SDNode<"X86ISD::FMSUBADD_RND", SDTFmaRound, [SDNPCommutative]>; +def X86vp2intersect : SDNode<"X86ISD::VP2INTERSECT", + SDTypeProfile<1, 2, [SDTCisVT<0, untyped>, + SDTCisVec<1>, SDTCisSameAs<1, 2>]>>; + def SDTIFma : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0,1>, SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>; def x86vpmadd52l : SDNode<"X86ISD::VPMADD52L", SDTIFma, [SDNPCommutative]>; diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 20d3cf0d927..124ad5dfdf4 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -2877,6 +2877,14 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, assert(STI.hasBWI() && "KMOVD requires BWI"); return load ? X86::KMOVDkm : X86::KMOVDmk; } + // All of these mask pair classes have the same spill size, the same kind + // of kmov instructions can be used with all of them. + if (X86::VK1PAIRRegClass.hasSubClassEq(RC) || + X86::VK2PAIRRegClass.hasSubClassEq(RC) || + X86::VK4PAIRRegClass.hasSubClassEq(RC) || + X86::VK8PAIRRegClass.hasSubClassEq(RC) || + X86::VK16PAIRRegClass.hasSubClassEq(RC)) + return load ? X86::MASKPAIR16LOAD : X86::MASKPAIR16STORE; llvm_unreachable("Unknown 4-byte regclass"); case 8: if (X86::GR64RegClass.hasSubClassEq(RC)) diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 98af217ebcf..85255096a7d 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -757,6 +757,33 @@ def lea64mem : Operand<i64> { let ParserMatchClass = X86MemAsmOperand; } +let RenderMethod = "addMaskPairOperands" in { + def VK1PairAsmOperand : AsmOperandClass { let Name = "VK1Pair"; } + def VK2PairAsmOperand : AsmOperandClass { let Name = "VK2Pair"; } + def VK4PairAsmOperand : AsmOperandClass { let Name = "VK4Pair"; } + def VK8PairAsmOperand : AsmOperandClass { let Name = "VK8Pair"; } + def VK16PairAsmOperand : AsmOperandClass { let Name = "VK16Pair"; } +} + +def VK1Pair : RegisterOperand<VK1PAIR, "printVKPair"> { + let ParserMatchClass = VK1PairAsmOperand; +} + +def VK2Pair : RegisterOperand<VK2PAIR, "printVKPair"> { + let ParserMatchClass = VK2PairAsmOperand; +} + +def VK4Pair : RegisterOperand<VK4PAIR, "printVKPair"> { + let ParserMatchClass = VK4PairAsmOperand; +} + +def VK8Pair : RegisterOperand<VK8PAIR, "printVKPair"> { + let ParserMatchClass = VK8PairAsmOperand; +} + +def VK16Pair : RegisterOperand<VK16PAIR, "printVKPair"> { + let ParserMatchClass = VK16PairAsmOperand; +} //===----------------------------------------------------------------------===// // X86 Complex Pattern Definitions. @@ -843,6 +870,7 @@ def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">; def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">; def PKU : Predicate<"Subtarget->hasPKU()">; def HasVNNI : Predicate<"Subtarget->hasVNNI()">; +def HasVP2INTERSECT : Predicate<"Subtarget->hasVP2INTERSECT()">; def HasBF16 : Predicate<"Subtarget->hasBF16()">; def HasBITALG : Predicate<"Subtarget->hasBITALG()">; diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index 6ed9a533d51..9de2d18e0be 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -1680,6 +1680,77 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { case X86::TLS_base_addr64: return LowerTlsAddr(MCInstLowering, *MI); + // Loading/storing mask pairs requires two kmov operations. The second one of these + // needs a 2 byte displacement relative to the specified address (with 32 bit spill + // size). The pairs of 1bit masks up to 16 bit masks all use the same spill size, + // they all are stored using MASKPAIR16STORE, loaded using MASKPAIR16LOAD. + // + // The displacement value might wrap around in theory, thus the asserts in both + // cases. + case X86::MASKPAIR16LOAD: { + int64_t Disp = MI->getOperand(1 + X86::AddrDisp).getImm(); + assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement"); + const X86RegisterInfo *RI = + MF->getSubtarget<X86Subtarget>().getRegisterInfo(); + unsigned Reg = MI->getOperand(0).getReg(); + unsigned Reg0 = RI->getSubReg(Reg, X86::sub_mask_0); + unsigned Reg1 = RI->getSubReg(Reg, X86::sub_mask_1); + + // Load the first mask register + MCInstBuilder MIB = MCInstBuilder(X86::KMOVWkm); + MIB.addReg(Reg0); + for (int i = 0; i < X86::AddrNumOperands; ++i) { + auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(1 + i)); + MIB.addOperand(Op.getValue()); + } + EmitAndCountInstruction(MIB); + + // Load the second mask register of the pair + MIB = MCInstBuilder(X86::KMOVWkm); + MIB.addReg(Reg1); + for (int i = 0; i < X86::AddrNumOperands; ++i) { + if (i == X86::AddrDisp) { + MIB.addImm(Disp + 2); + } else { + auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(1 + i)); + MIB.addOperand(Op.getValue()); + } + } + EmitAndCountInstruction(MIB); + return; + } + + case X86::MASKPAIR16STORE: { + int64_t Disp = MI->getOperand(X86::AddrDisp).getImm(); + assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement"); + const X86RegisterInfo *RI = + MF->getSubtarget<X86Subtarget>().getRegisterInfo(); + unsigned Reg = MI->getOperand(X86::AddrNumOperands).getReg(); + unsigned Reg0 = RI->getSubReg(Reg, X86::sub_mask_0); + unsigned Reg1 = RI->getSubReg(Reg, X86::sub_mask_1); + + // Store the first mask register + MCInstBuilder MIB = MCInstBuilder(X86::KMOVWmk); + for (int i = 0; i < X86::AddrNumOperands; ++i) + MIB.addOperand(MCInstLowering.LowerMachineOperand(MI, MI->getOperand(i)).getValue()); + MIB.addReg(Reg0); + EmitAndCountInstruction(MIB); + + // Store the second mask register of the pair + MIB = MCInstBuilder(X86::KMOVWmk); + for (int i = 0; i < X86::AddrNumOperands; ++i) { + if (i == X86::AddrDisp) { + MIB.addImm(Disp + 2); + } else { + auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(0 + i)); + MIB.addOperand(Op.getValue()); + } + } + MIB.addReg(Reg1); + EmitAndCountInstruction(MIB); + return; + } + case X86::MOVPC32r: { // This is a pseudo op for a two instruction sequence with a label, which // looks like: diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td index c0acff9c8c3..0528b90c1fd 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.td +++ b/llvm/lib/Target/X86/X86RegisterInfo.td @@ -28,6 +28,8 @@ let Namespace = "X86" in { def sub_32bit : SubRegIndex<32>; def sub_xmm : SubRegIndex<128>; def sub_ymm : SubRegIndex<256>; + def sub_mask_0 : SubRegIndex<-1>; + def sub_mask_1 : SubRegIndex<-1, -1>; } //===----------------------------------------------------------------------===// @@ -594,6 +596,16 @@ def VK16 : RegisterClass<"X86", [v16i1], 16, (add VK8)> {let Size = 16;} def VK32 : RegisterClass<"X86", [v32i1], 32, (add VK16)> {let Size = 32;} def VK64 : RegisterClass<"X86", [v64i1], 64, (add VK32)> {let Size = 64;} +// Mask register pairs +def KPAIRS : RegisterTuples<[sub_mask_0, sub_mask_1], + [(add K0, K2, K4, K6), (add K1, K3, K5, K7)]>; + +def VK1PAIR : RegisterClass<"X86", [untyped], 16, (add KPAIRS)> {let Size = 32;} +def VK2PAIR : RegisterClass<"X86", [untyped], 16, (add KPAIRS)> {let Size = 32;} +def VK4PAIR : RegisterClass<"X86", [untyped], 16, (add KPAIRS)> {let Size = 32;} +def VK8PAIR : RegisterClass<"X86", [untyped], 16, (add KPAIRS)> {let Size = 32;} +def VK16PAIR : RegisterClass<"X86", [untyped], 16, (add KPAIRS)> {let Size = 32;} + def VK1WM : RegisterClass<"X86", [v1i1], 16, (sub VK1, K0)> {let Size = 16;} def VK2WM : RegisterClass<"X86", [v2i1], 16, (sub VK2, K0)> {let Size = 16;} def VK4WM : RegisterClass<"X86", [v4i1], 16, (sub VK4, K0)> {let Size = 16;} diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index 6fefe23182f..9e5613654af 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -362,6 +362,9 @@ protected: /// Processor has AVX-512 Bit Algorithms instructions bool HasBITALG = false; + /// Processor has AVX-512 vp2intersect instructions + bool HasVP2INTERSECT = false; + /// Processor supports MPX - Memory Protection Extensions bool HasMPX = false; @@ -679,6 +682,7 @@ public: bool hasPKU() const { return HasPKU; } bool hasVNNI() const { return HasVNNI; } bool hasBF16() const { return HasBF16; } + bool hasVP2INTERSECT() const { return HasVP2INTERSECT; } bool hasBITALG() const { return HasBITALG; } bool hasMPX() const { return HasMPX; } bool hasSHSTK() const { return HasSHSTK; } |