diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPU.td | 20 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 83 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h | 12 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp | 42 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIDefines.h | 12 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 169 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 11 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP1Instructions.td | 33 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP2Instructions.td | 70 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP3Instructions.td | 9 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOPCInstructions.td | 37 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOPInstructions.td | 114 |
13 files changed, 552 insertions, 64 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index b279bd61e18..e7ebb37a9d6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -425,7 +425,7 @@ def FeatureGFX9 : SubtargetFeatureGeneration<"GFX9", FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm, FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode, - FeatureFastFMAF32, FeatureDPP, + FeatureFastFMAF32, FeatureSDWA, FeatureDPP, FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts ] >; @@ -534,10 +534,12 @@ def AMDGPUAsmVariants { int VOP3_ID = 1; string SDWA = "SDWA"; int SDWA_ID = 2; + string SDWA9 = "SDWA9"; + int SDWA9_ID = 3; string DPP = "DPP"; - int DPP_ID = 3; + int DPP_ID = 4; string Disable = "Disable"; - int Disable_ID = 4; + int Disable_ID = 5; } def DefaultAMDGPUAsmParserVariant : AsmParserVariant { @@ -555,6 +557,12 @@ def SDWAAsmParserVariant : AsmParserVariant { let Name = AMDGPUAsmVariants.SDWA; } +def SDWA9AsmParserVariant : AsmParserVariant { + let Variant = AMDGPUAsmVariants.SDWA9_ID; + let Name = AMDGPUAsmVariants.SDWA9; +} + + def DPPAsmParserVariant : AsmParserVariant { let Variant = AMDGPUAsmVariants.DPP_ID; let Name = AMDGPUAsmVariants.DPP; @@ -567,6 +575,7 @@ def AMDGPU : Target { let AssemblyParserVariants = [DefaultAMDGPUAsmParserVariant, VOP3AsmParserVariant, SDWAAsmParserVariant, + SDWA9AsmParserVariant, DPPAsmParserVariant]; let AssemblyWriters = [AMDGPUAsmWriter]; } @@ -607,7 +616,10 @@ def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">, AssemblerPredicate<"FeatureVOP3P">; def HasSDWA : Predicate<"Subtarget->hasSDWA()">, - AssemblerPredicate<"FeatureSDWA">; + AssemblerPredicate<"FeatureSDWA,FeatureVolcanicIslands">; + +def HasSDWA9 : Predicate<"Subtarget->hasSDWA()">, + AssemblerPredicate<"FeatureSDWA,FeatureGFX9">; def HasDPP : Predicate<"Subtarget->hasDPP()">, AssemblerPredicate<"FeatureDPP">; diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index b52ea2b3a2c..f5541e08e1b 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -881,6 +881,10 @@ public: return AMDGPU::isVI(getSTI()); } + bool isGFX9() const { + return AMDGPU::isGFX9(getSTI()); + } + bool hasInv2PiInlineImm() const { return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; } @@ -989,7 +993,6 @@ private: bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; - bool isSGPR(unsigned Reg); public: OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); @@ -1042,9 +1045,10 @@ public: OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); + void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); void cvtSDWA(MCInst &Inst, const OperandVector &Operands, - uint64_t BasicInstType); + uint64_t BasicInstType, bool skipVcc = false); }; struct OptionalOperand { @@ -1966,7 +1970,8 @@ ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { } if (isForcedSDWA()) { - static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA}; + static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, + AMDGPUAsmVariants::SDWA9}; return makeArrayRef(Variants); } @@ -1977,7 +1982,7 @@ ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { static const unsigned Variants[] = { AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, - AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::DPP + AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP }; return makeArrayRef(Variants); @@ -2000,14 +2005,6 @@ unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { return AMDGPU::NoRegister; } -bool AMDGPUAsmParser::isSGPR(unsigned Reg) { - const MCRegisterInfo *TRI = getContext().getRegisterInfo(); - const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); - const unsigned FirstSubReg = TRI->getSubReg(Reg, 1); - return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) || - Reg == AMDGPU::SCC; -} - // NB: This code is correct only when used to check constant // bus limitations because GFX7 support no f16 inline constants. // Note that there are no cases when a GFX7 opcode violates @@ -2049,7 +2046,8 @@ bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { if (MO.isImm()) { return !isInlineConstant(Inst, OpIdx); } - return !MO.isReg() || isSGPR(mc2PseudoReg(MO.getReg())); + return !MO.isReg() || + isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo()); } bool AMDGPUAsmParser::validateOperandLimitations(const MCInst &Inst) { @@ -2060,7 +2058,8 @@ bool AMDGPUAsmParser::validateOperandLimitations(const MCInst &Inst) { if (Desc.TSFlags & (SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | - SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) { + SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | + SIInstrFlags::SDWA)) { // Check special imm operands (used by madmk, etc) if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { @@ -4151,14 +4150,19 @@ void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); } +void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { + cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); +} + void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { - cvtSDWA(Inst, Operands, SIInstrFlags::VOPC); + cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); } void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, - uint64_t BasicInstType) { + uint64_t BasicInstType, bool skipVcc) { using namespace llvm::AMDGPU::SDWA; OptionalImmIndexMap OptionalIdx; + bool skippedVcc = false; unsigned I = 1; const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); @@ -4168,15 +4172,22 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, for (unsigned E = Operands.size(); I != E; ++I) { AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); - // Add the register arguments - if ((BasicInstType == SIInstrFlags::VOPC || - BasicInstType == SIInstrFlags::VOP2)&& - Op.isReg() && - Op.Reg.RegNo == AMDGPU::VCC) { - // VOPC and VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. - // Skip it. - continue; - } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { + if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) { + // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. + // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) + // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. + // Skip VCC only if we didn't skip it on previous iteration. + if (BasicInstType == SIInstrFlags::VOP2 && + (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { + skippedVcc = true; + continue; + } else if (BasicInstType == SIInstrFlags::VOPC && + Inst.getNumOperands() == 0) { + skippedVcc = true; + continue; + } + } + if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { Op.addRegWithInputModsOperands(Inst, 2); } else if (Op.isImm()) { // Handle optional arguments @@ -4184,20 +4195,30 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, } else { llvm_unreachable("Invalid operand type"); } + skippedVcc = false; } - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); - - if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { + if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && + Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { // V_NOP_sdwa_vi has no optional sdwa arguments switch (BasicInstType) { case SIInstrFlags::VOP1: + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); + if (isGFX9() && + AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); + } addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); break; case SIInstrFlags::VOP2: + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); + if (isGFX9() && + AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); + } addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); @@ -4205,6 +4226,9 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, break; case SIInstrFlags::VOPC: + if (isVI()) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); + } addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); break; @@ -4220,10 +4244,9 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { auto it = Inst.begin(); std::advance( - it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); + it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); Inst.insert(it, Inst.getOperand(0)); // src2 = dst } - } /// Force static initialization. diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h index 3d3858ab47e..a856b17a228 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h @@ -52,6 +52,18 @@ public: return 0; } + virtual unsigned getSDWA9SrcEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + return 0; + } + + virtual unsigned getSDWA9VopcDstEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + return 0; + } + protected: uint64_t computeAvailableFeatures(const FeatureBitset &FB) const; void verifyInstructionPredicates(const MCInst &MI, diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp index bda0928036f..247fead86f6 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -69,6 +69,14 @@ public: unsigned getSOPPBrEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const override; + + unsigned getSDWA9SrcEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const override; + + unsigned getSDWA9VopcDstEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const override; }; } // end anonymous namespace @@ -319,6 +327,40 @@ unsigned SIMCCodeEmitter::getSOPPBrEncoding(const MCInst &MI, unsigned OpNo, return getMachineOpValue(MI, MO, Fixups, STI); } +unsigned +SIMCCodeEmitter::getSDWA9SrcEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + uint64_t RegEnc = 0; + + const MCOperand &MO = MI.getOperand(OpNo); + + unsigned Reg = MO.getReg(); + RegEnc |= MRI.getEncodingValue(Reg); + RegEnc &= SDWA9_SRC_REG_MASK; + if (AMDGPU::isSGPR(Reg, &MRI)) { + RegEnc |= SDWA9_SRC_SGPR_MASK; + } + return RegEnc; +} + +unsigned +SIMCCodeEmitter::getSDWA9VopcDstEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + uint64_t RegEnc = 0; + + const MCOperand &MO = MI.getOperand(OpNo); + + unsigned Reg = MO.getReg(); + if (Reg != AMDGPU::VCC) { + RegEnc |= MRI.getEncodingValue(Reg); + RegEnc &= SDWA9_VOPC_DST_REG_MASK; + RegEnc |= SDWA9_VOPC_DST_VCC_MASK; + } + return RegEnc; +} + uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl<MCFixup> &Fixups, diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index a01330cb917..1e765e4c166 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -118,6 +118,10 @@ namespace AMDGPU { // Operand for source modifiers for VOP instructions OPERAND_INPUT_MODS, + // Operand for GFX9 SDWA instructions + OPERAND_SDWA9_SRC, + OPERAND_SDWA9_VOPC_DST, + /// Operand with 32-bit immediate that uses the constant bus. OPERAND_KIMM32, OPERAND_KIMM16 @@ -160,7 +164,8 @@ namespace AMDGPUAsmVariants { DEFAULT = 0, VOP3 = 1, SDWA = 2, - DPP = 3 + SDWA9 = 3, + DPP = 4 }; } @@ -294,6 +299,11 @@ enum DstUnused { UNUSED_PRESERVE = 2, }; +#define SDWA9_SRC_SGPR_MASK 0x100 +#define SDWA9_SRC_REG_MASK 0xFF +#define SDWA9_VOPC_DST_VCC_MASK 0x80 +#define SDWA9_VOPC_DST_REG_MASK 0x7F + } // namespace SDWA } // namespace AMDGPU diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 7b052844f17..17a31601fc1 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -439,6 +439,18 @@ def ExpSrc3 : RegisterOperand<VGPR_32> { let ParserMatchClass = VReg32OrOffClass; } +def SDWA9Src : RegisterOperand<VS_32> { + let OperandNamespace = "AMDGPU"; + let OperandType = "OPERAND_SDWA9_SRC"; + let EncoderMethod = "getSDWA9SrcEncoding"; +} + +def SDWA9VopcDst : VOPDstOperand<SReg_64> { + let OperandNamespace = "AMDGPU"; + let OperandType = "OPERAND_SDWA9_VOPC_DST"; + let EncoderMethod = "getSDWA9VopcDstEncoding"; +} + class NamedMatchClass<string CName, bit Optional = 1> : AsmOperandClass { let Name = "Imm"#CName; let PredicateMethod = "is"#CName; @@ -588,6 +600,16 @@ class IntInputMods <IntInputModsMatchClass matchClass> : InputMods <matchClass> def Int32InputMods : IntInputMods<Int32InputModsMatchClass>; def Int64InputMods : IntInputMods<Int64InputModsMatchClass>; +def FPRegInputModsMatchClass : AsmOperandClass { + let Name = "RegWithFPInputMods"; + let ParserMethod = "parseRegWithFPInputMods"; + let PredicateMethod = "isRegKind"; +} + +def FPRegInputMods : InputMods <FPRegInputModsMatchClass> { + let PrintMethod = "printOperandAndFPInputMods"; +} + def FPVRegInputModsMatchClass : AsmOperandClass { let Name = "VRegWithFPInputMods"; let ParserMethod = "parseRegWithFPInputMods"; @@ -598,6 +620,17 @@ def FPVRegInputMods : InputMods <FPVRegInputModsMatchClass> { let PrintMethod = "printOperandAndFPInputMods"; } + +def IntRegInputModsMatchClass : AsmOperandClass { + let Name = "RegWithIntInputMods"; + let ParserMethod = "parseRegWithIntInputMods"; + let PredicateMethod = "isRegKind"; +} + +def IntRegInputMods : InputMods <IntRegInputModsMatchClass> { + let PrintMethod = "printOperandAndIntInputMods"; +} + def IntVRegInputModsMatchClass : AsmOperandClass { let Name = "VRegWithIntInputMods"; let ParserMethod = "parseRegWithIntInputMods"; @@ -783,6 +816,14 @@ class getVALUDstForVT<ValueType VT> { VOPDstOperand<SReg_64>)))); // else VT == i1 } +// Returns the register class to use for the destination of VOP[12C] +// instructions with GFX9 SDWA extension +class getSDWA9DstForVT<ValueType VT> { + RegisterOperand ret = !if(!eq(VT.Size, 1), + SDWA9VopcDst, // VOPC + VOPDstOperand<VGPR_32>); // VOP1/2 32-bit dst +} + // Returns the register class to use for source 0 of VOP[12C] // instructions for the given VT. class getVOPSrc0ForVT<ValueType VT> { @@ -823,7 +864,6 @@ class getVregSrcForVT<ValueType VT> { !if(!eq(VT.Size, 64), VReg_64, VGPR_32)); } - // Returns the register class to use for sources of VOP3 instructions for the // given VT. class getVOP3SrcForVT<ValueType VT> { @@ -926,6 +966,15 @@ class getSrcModExt <ValueType VT> { Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods); } +// Return type of input modifiers operand specified input operand for SDWA 9 +class getSrcModSDWA9 <ValueType VT> { + bit isFP = !if(!eq(VT.Value, f16.Value), 1, + !if(!eq(VT.Value, f32.Value), 1, + !if(!eq(VT.Value, f64.Value), 1, + 0))); + Operand ret = !if(isFP, FPRegInputMods, IntRegInputMods); +} + // Returns the input arguments for VOP[12C] instructions for the given SrcVT. class getIns32 <RegisterOperand Src0RC, RegisterClass Src1RC, int NumSrcArgs> { dag ret = !if(!eq(NumSrcArgs, 1), (ins Src0RC:$src0), // VOP1 @@ -1062,6 +1111,7 @@ class getInsSDWA <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs, // VOP1 without input operands (V_NOP) (ins), !if(!eq(NumSrcArgs, 1), + // VOP1_SDWA (ins Src0Mod:$src0_modifiers, Src0RC:$src0, clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel), @@ -1071,7 +1121,7 @@ class getInsSDWA <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs, (ins Src0Mod:$src0_modifiers, Src0RC:$src0, Src1Mod:$src1_modifiers, Src1RC:$src1, clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel), - // VOP2_SDWA or VOPC_SDWA with modifiers + // VOP2_SDWA with modifiers (ins Src0Mod:$src0_modifiers, Src0RC:$src0, Src1Mod:$src1_modifiers, Src1RC:$src1, clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, @@ -1079,12 +1129,65 @@ class getInsSDWA <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs, (ins)/* endif */))); } +// Ins for GFX9 SDWA +class getInsSDWA9 <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs, + bit HasSDWAOMod, Operand Src0Mod, Operand Src1Mod, + ValueType DstVT> { + + dag ret = !if(!eq(NumSrcArgs, 0), + // VOP1 without input operands (V_NOP) + (ins), + !if(!eq(NumSrcArgs, 1), + // VOP1 + !if(!eq(HasSDWAOMod, 0), + // VOP1_SDWA9 without omod + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + clampmod:$clamp, + dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel), + // VOP1_SDWA9 with omod + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + clampmod:$clamp, omod:$omod, + dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel)), + !if(!eq(NumSrcArgs, 2), + !if(!eq(DstVT.Size, 1), + // VOPC_SDWA9 + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + src0_sel:$src0_sel, src1_sel:$src1_sel), + // VOP2_SDWA9 + !if(!eq(HasSDWAOMod, 0), + // VOP2_SDWA9 without omod + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + clampmod:$clamp, + dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel, src1_sel:$src1_sel), + // VOP1_SDWA9 with omod + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + clampmod:$clamp, omod:$omod, + dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel, src1_sel:$src1_sel))), + (ins)/* endif */))); +} + // Outs for DPP and SDWA -class getOutsExt <bit HasDst, ValueType DstVT, RegisterOperand DstRCDPP> { +class getOutsExt <bit HasDst, ValueType DstVT, RegisterOperand DstRCExt> { dag ret = !if(HasDst, !if(!eq(DstVT.Size, 1), (outs), // no dst for VOPC, we use "vcc"-token as dst in SDWA VOPC instructions - (outs DstRCDPP:$vdst)), + (outs DstRCExt:$vdst)), + (outs)); // V_NOP +} + +// Outs for GFX9 SDWA +class getOutsSDWA9 <bit HasDst, ValueType DstVT, RegisterOperand DstRCSDWA9> { + dag ret = !if(HasDst, + !if(!eq(DstVT.Size, 1), + (outs DstRCSDWA9:$sdst), + (outs DstRCSDWA9:$vdst)), (outs)); // V_NOP } @@ -1153,8 +1256,7 @@ class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = string ret = dst#args#" $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; } -class getAsmSDWA <bit HasDst, int NumSrcArgs, bit HasFloatModifiers, - ValueType DstVT = i32> { +class getAsmSDWA <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> { string dst = !if(HasDst, !if(!eq(DstVT.Size, 1), " vcc", // use vcc token as dst for VOPC instructioins @@ -1182,6 +1284,35 @@ class getAsmSDWA <bit HasDst, int NumSrcArgs, bit HasFloatModifiers, string ret = dst#args#sdwa; } +class getAsmSDWA9 <bit HasDst, bit HasOMod, int NumSrcArgs, + ValueType DstVT = i32> { + string dst = !if(HasDst, + !if(!eq(DstVT.Size, 1), + "$sdst", // VOPC + "$vdst"), // VOP1/2 + ""); + string src0 = "$src0_modifiers"; + string src1 = "$src1_modifiers"; + string out_mods = !if(!eq(HasOMod, 0), "$clamp", "$clamp$omod"); + string args = !if(!eq(NumSrcArgs, 0), "", + !if(!eq(NumSrcArgs, 1), + ", "#src0, + ", "#src0#", "#src1 + ) + ); + string sdwa = !if(!eq(NumSrcArgs, 0), "", + !if(!eq(NumSrcArgs, 1), + out_mods#" $dst_sel $dst_unused $src0_sel", + !if(!eq(DstVT.Size, 1), + " $src0_sel $src1_sel", // No dst_sel, dst_unused and output modifiers for VOPC + out_mods#" $dst_sel $dst_unused $src0_sel $src1_sel" + ) + ) + ); + string ret = dst#args#sdwa; +} + + // Function that checks if instruction supports DPP and SDWA class getHasExt <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, ValueType Src1VT = i32> { @@ -1219,6 +1350,7 @@ class VOPProfile <list<ValueType> _ArgVT> { field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret; field RegisterOperand DstRCDPP = getVALUDstForVT<DstVT>.ret; field RegisterOperand DstRCSDWA = getVALUDstForVT<DstVT>.ret; + field RegisterOperand DstRCSDWA9 = getSDWA9DstForVT<DstVT>.ret; field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT>.ret; field RegisterClass Src1RC32 = getVregSrcForVT<Src1VT>.ret; field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret; @@ -1228,6 +1360,8 @@ class VOPProfile <list<ValueType> _ArgVT> { field RegisterClass Src1DPP = getVregSrcForVT<Src1VT>.ret; field RegisterClass Src0SDWA = getVregSrcForVT<Src0VT>.ret; field RegisterClass Src1SDWA = getVregSrcForVT<Src1VT>.ret; + field RegisterOperand Src0SDWA9 = SDWA9Src; + field RegisterOperand Src1SDWA9 = SDWA9Src; field Operand Src0Mod = getSrcMod<Src0VT>.ret; field Operand Src1Mod = getSrcMod<Src1VT>.ret; field Operand Src2Mod = getSrcMod<Src2VT>.ret; @@ -1235,6 +1369,8 @@ class VOPProfile <list<ValueType> _ArgVT> { field Operand Src1ModDPP = getSrcModExt<Src1VT>.ret; field Operand Src0ModSDWA = getSrcModExt<Src0VT>.ret; field Operand Src1ModSDWA = getSrcModExt<Src1VT>.ret; + field Operand Src0ModSDWA9 = getSrcModSDWA9<Src0VT>.ret; + field Operand Src1ModSDWA9 = getSrcModSDWA9<Src1VT>.ret; field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1); @@ -1261,14 +1397,16 @@ class VOPProfile <list<ValueType> _ArgVT> { field bit HasSrc2Mods = !if(HasModifiers, BitOr<HasSrc2FloatMods, HasSrc2IntMods>.ret, 0); field bit HasClamp = HasModifiers; - field bit HasSDWAClamp = HasSrc0; + field bit HasSDWAClamp = EmitDst; field bit HasFPClamp = BitAnd<isFloatType<DstVT>.ret, HasClamp>.ret; field bit IsPacked = isPackedType<Src0VT>.ret; field bit HasOpSel = IsPacked; field bit HasOMod = !if(HasOpSel, 0, HasModifiers); + field bit HasSDWAOMod = isFloatType<DstVT>.ret; field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; + field bit HasSDWA9 = HasExt; field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods); field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods); @@ -1282,6 +1420,7 @@ class VOPProfile <list<ValueType> _ArgVT> { field dag Outs64 = Outs; field dag OutsDPP = getOutsExt<HasDst, DstVT, DstRCDPP>.ret; field dag OutsSDWA = getOutsExt<HasDst, DstVT, DstRCSDWA>.ret; + field dag OutsSDWA9 = getOutsSDWA9<HasDst, DstVT, DstRCSDWA9>.ret; field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret; field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs, @@ -1296,16 +1435,21 @@ class VOPProfile <list<ValueType> _ArgVT> { field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs, HasModifiers, Src0ModSDWA, Src1ModSDWA, DstVT>.ret; + field dag InsSDWA9 = getInsSDWA9<Src0SDWA9, Src1SDWA9, NumSrcArgs, + HasSDWAOMod, Src0ModSDWA9, Src1ModSDWA9, + DstVT>.ret; field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret; field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasModifiers, HasOMod, DstVT>.ret; field string AsmVOP3P = getAsmVOP3P<HasDst, NumSrcArgs, HasModifiers, HasClamp, DstVT>.ret; field string AsmDPP = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret; - field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret; + field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, DstVT>.ret; + field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret; } class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> { let HasExt = 0; + let HasSDWA9 = 0; } def VOP_F16_F16 : VOPProfile <[f16, f16, untyped, untyped]>; @@ -1446,6 +1590,15 @@ def getSDWAOp : InstrMapping { let ValueCols = [["SDWA"]]; } +// Maps ordinary instructions to their SDWA GFX9 counterparts +def getSDWA9Op : InstrMapping { + let FilterClass = "VOP"; + let RowFields = ["OpName"]; + let ColFields = ["AsmVariantName"]; + let KeyCol = ["Default"]; + let ValueCols = [["SDWA9"]]; +} + def getMaskedMIMGOp : InstrMapping { let FilterClass = "MIMG_Mask"; let RowFields = ["Op"]; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 2abd4afad3b..630f469eabf 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -544,6 +544,17 @@ bool isVI(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; } +bool isGFX9(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; +} + +bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { + const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); + const unsigned FirstSubReg = TRI->getSubReg(Reg, 1); + return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) || + Reg == AMDGPU::SCC; +} + unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { switch(Reg) { diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 8e74aa2cc9a..19888ad7556 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -273,6 +273,10 @@ inline bool isKernel(CallingConv::ID CC) { bool isSI(const MCSubtargetInfo &STI); bool isCI(const MCSubtargetInfo &STI); bool isVI(const MCSubtargetInfo &STI); +bool isGFX9(const MCSubtargetInfo &STI); + +/// \brief Is Reg - scalar register +bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI); /// If \p Reg is a pseudo reg, return the correct hardware register given /// \p STI otherwise return \p Reg. diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 1febc6bf8ec..95b5ef0a49d 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -30,6 +30,15 @@ class VOP1_SDWAe <bits<8> op, VOPProfile P> : VOP_SDWAe <P> { let Inst{31-25} = 0x3f; // encoding } +class VOP1_SDWA9Ae <bits<8> op, VOPProfile P> : VOP_SDWA9Ae <P> { + bits<8> vdst; + + let Inst{8-0} = 0xf9; // sdwa + let Inst{16-9} = op; + let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); + let Inst{31-25} = 0x3f; // encoding +} + class VOP1_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP1Only = 0> : InstSI <P.Outs32, P.Ins32, "", pattern>, VOP <opName>, @@ -84,6 +93,11 @@ class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : let AsmMatchConverter = "cvtSdwaVOP1"; } +class VOP1_SDWA9_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : + VOP_SDWA9_Pseudo <OpName, P, pattern> { + let AsmMatchConverter = "cvtSdwaVOP1"; +} + class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies { list<dag> ret = !if(P.HasModifiers, @@ -103,6 +117,7 @@ multiclass VOP1Inst <string opName, VOPProfile P, def _e32 : VOP1_Pseudo <opName, P>; def _e64 : VOP3_Pseudo <opName, P, getVOP1Pat64<node, P>.ret>; def _sdwa : VOP1_SDWA_Pseudo <opName, P>; + def _sdwa9 : VOP1_SDWA9_Pseudo <opName, P>; } // Special profile for instructions which have clamp @@ -243,6 +258,7 @@ def VOP_I32_VI32_NO_EXT : VOPProfile<[i32, i32, untyped, untyped]> { let Src0RC64 = VRegSrc_32; let HasExt = 0; + let HasSDWA9 = 0; } // Special case because there are no true output operands. Hack vdst @@ -258,16 +274,21 @@ def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> { let Ins64 = (ins Src0RC64:$vdst, VSrc_b32:$src0); let InsDPP = (ins Src0RC32:$vdst, Src0RC32:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); - let InsSDWA = (ins Src0RC32:$vdst, Src0ModSDWA:$src0_modifiers, VCSrc_b32:$src0, + let InsSDWA = (ins Src0RC32:$vdst, Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel); + let InsSDWA9 = (ins Src0RC32:$vdst, Src0ModSDWA9:$src0_modifiers, Src0SDWA9:$src0, + clampmod:$clamp, omod:$omod, dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel); let Asm32 = getAsm32<1, 1>.ret; let Asm64 = getAsm64<1, 1, 0, 1>.ret; let AsmDPP = getAsmDPP<1, 1, 0>.ret; - let AsmSDWA = getAsmSDWA<1, 1, 0>.ret; + let AsmSDWA = getAsmSDWA<1, 1>.ret; + let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret; let HasExt = 0; + let HasSDWA9 = 0; let HasDst = 0; let EmitDst = 1; // force vdst emission } @@ -324,7 +345,7 @@ defm V_EXP_LEGACY_F32 : VOP1Inst <"v_exp_legacy_f32", VOP_F32_F32>; } // End SubtargetPredicate = isCIVI -let SubtargetPredicate = isVI in { +let SubtargetPredicate = Has16BitInsts in { defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>; defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>; @@ -347,7 +368,7 @@ defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16, AMDGPUcos>; } -let Predicates = [isVI] in { +let Predicates = [Has16BitInsts] in { def : Pat< (f32 (f16_to_fp i16:$src)), @@ -523,6 +544,10 @@ multiclass VOP1_Real_vi <bits<10> op> { VOP_SDWA_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, VOP1_SDWAe <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; + def _sdwa_gfx9 : + VOP_SDWA9_Real <!cast<VOP1_SDWA9_Pseudo>(NAME#"_sdwa9")>, + VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA9_Pseudo>(NAME#"_sdwa9").Pfl>; + // For now left dpp only for asm/dasm // TODO: add corresponding pseudo def _dpp : VOP1_DPP<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")>; diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 4a11d9471f1..657cacaa792 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -48,6 +48,18 @@ class VOP2_SDWAe <bits<6> op, VOPProfile P> : VOP_SDWAe <P> { let Inst{31} = 0x0; // encoding } +class VOP2_SDWA9Ae <bits<6> op, VOPProfile P> : VOP_SDWA9Ae <P> { + bits<8> vdst; + bits<9> src1; + + let Inst{8-0} = 0xf9; // sdwa + let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); + let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); + let Inst{30-25} = op; + let Inst{31} = 0x0; // encoding + let Inst{63} = !if(P.HasSrc1, src1{8}, 0); // src1_sgpr +} + class VOP2_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], string suffix = "_e32"> : InstSI <P.Outs32, P.Ins32, "", pattern>, VOP <opName>, @@ -102,6 +114,11 @@ class VOP2_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : let AsmMatchConverter = "cvtSdwaVOP2"; } +class VOP2_SDWA9_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : + VOP_SDWA9_Pseudo <OpName, P, pattern> { + let AsmMatchConverter = "cvtSdwaVOP2"; +} + class getVOP2Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies { list<dag> ret = !if(P.HasModifiers, [(set P.DstVT:$vdst, @@ -121,10 +138,10 @@ multiclass VOP2Inst <string opName, def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>, Commutable_REV<revOp#"_e64", !eq(revOp, opName)>; - def _sdwa : VOP2_SDWA_Pseudo <opName, P>; + def _sdwa : VOP2_SDWA_Pseudo <opName, P>; + def _sdwa9 : VOP2_SDWA9_Pseudo <opName, P>; } -// TODO: add SDWA pseudo instructions for VOP2bInst and VOP2eInst multiclass VOP2bInst <string opName, VOPProfile P, SDPatternOperator node = null_frag, @@ -136,7 +153,13 @@ multiclass VOP2bInst <string opName, def _e32 : VOP2_Pseudo <opName, P>, Commutable_REV<revOp#"_e32", !eq(revOp, opName)>; - def _sdwa : VOP2_SDWA_Pseudo <opName, P>; + def _sdwa : VOP2_SDWA_Pseudo <opName, P> { + let AsmMatchConverter = "cvtSdwaVOP2b"; + } + + def _sdwa9 : VOP2_SDWA9_Pseudo <opName, P> { + let AsmMatchConverter = "cvtSdwaVOP2b"; + } } def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>, @@ -203,13 +226,21 @@ class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> { VGPR_32:$src2, // stub argument clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel, src1_sel:$src1_sel); + let InsSDWA9 = (ins Src0ModSDWA9:$src0_modifiers, Src0SDWA9:$src0, + Src1ModSDWA9:$src1_modifiers, Src1SDWA9:$src1, + VGPR_32:$src2, // stub argument + clampmod:$clamp, omod:$omod, + dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel, src1_sel:$src1_sel); let Asm32 = getAsm32<1, 2, vt>.ret; let Asm64 = getAsm64<1, 2, HasModifiers, HasOMod, vt>.ret; let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt>.ret; - let AsmSDWA = getAsmSDWA<1, 2, HasModifiers, vt>.ret; + let AsmSDWA = getAsmSDWA<1, 2, vt>.ret; + let AsmSDWA9 = getAsmSDWA9<1, 1, 2, vt>.ret; let HasSrc2 = 0; let HasSrc2Mods = 0; let HasExt = 1; + let HasSDWA9 = 0; } def VOP_MAC_F16 : VOP_MAC <f16> { @@ -229,6 +260,7 @@ def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> { let Asm32 = "$vdst, vcc, $src0, $src1"; let Asm64 = "$vdst, $sdst, $src0, $src1"; let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; + let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; let AsmDPP = "$vdst, vcc, $src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; let Outs32 = (outs DstRC:$vdst); let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); @@ -246,6 +278,7 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> { let Asm32 = "$vdst, vcc, $src0, $src1, vcc"; let Asm64 = "$vdst, $sdst, $src0, $src1, $src2"; let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel"; + let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel"; let AsmDPP = "$vdst, vcc, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; let Outs32 = (outs DstRC:$vdst); let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); @@ -254,16 +287,23 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> { // implicit VCC use. let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); - let InsSDWA = (ins Src0Mod:$src0_modifiers, Src0SDWA:$src0, - Src1Mod:$src1_modifiers, Src1SDWA:$src1, + let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, + Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel, src1_sel:$src1_sel); + let InsSDWA9 = (ins Src0ModSDWA9:$src0_modifiers, Src0SDWA9:$src0, + Src1ModSDWA9:$src1_modifiers, Src1SDWA9:$src1, + clampmod:$clamp, omod:$omod, + dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel, src1_sel:$src1_sel); + let InsDPP = (ins Src0Mod:$src0_modifiers, Src0DPP:$src0, Src1Mod:$src1_modifiers, Src1DPP:$src1, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); let HasExt = 1; + let HasSDWA9 = 1; } // Read in from vcc or arbitrary SGPR @@ -387,7 +427,7 @@ defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_I32_I32_I32>; } // End let SubtargetPredicate = SICI -let SubtargetPredicate = isVI in { +let SubtargetPredicate = Has16BitInsts in { def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">; defm V_LSHLREV_B16 : VOP2Inst <"v_lshlrev_b16", VOP_I16_I16_I16>; @@ -418,7 +458,7 @@ defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_MAC_F16>; } } // End isCommutable = 1 -} // End SubtargetPredicate = isVI +} // End SubtargetPredicate = Has16BitInsts // Note: 16-bit instructions produce a 0 result in the high 16-bits. multiclass Arithmetic_i16_Pats <SDPatternOperator op, Instruction inst> { @@ -468,7 +508,7 @@ class ZExt_i16_i1_Pat <SDNode ext> : Pat < (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src) >; -let Predicates = [isVI] in { +let Predicates = [Has16BitInsts] in { defm : Arithmetic_i16_Pats<add, V_ADD_U16_e64>; defm : Arithmetic_i16_Pats<mul, V_MUL_LO_U16_e64>; @@ -513,7 +553,7 @@ def : Pat< (V_SUB_U16_e64 $src0, NegSubInlineConst16:$src1) >; -} // End Predicates = [isVI] +} // End Predicates = [Has16BitInsts] //===----------------------------------------------------------------------===// // SI @@ -686,15 +726,21 @@ multiclass VOP2_SDWA_Real <bits<6> op> { VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; } +multiclass VOP2_SDWA9_Real <bits<6> op> { + def _sdwa_gfx9 : + VOP_SDWA9_Real <!cast<VOP2_SDWA9_Pseudo>(NAME#"_sdwa9")>, + VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA9_Pseudo>(NAME#"_sdwa9").Pfl>; +} + multiclass VOP2be_Real_e32e64_vi <bits<6> op> : - Base_VOP2be_Real_e32e64_vi<op>, VOP2_SDWA_Real<op> { + Base_VOP2be_Real_e32e64_vi<op>, VOP2_SDWA_Real<op>, VOP2_SDWA9_Real<op> { // For now left dpp only for asm/dasm // TODO: add corresponding pseudo def _dpp : VOP2_DPP<op, !cast<VOP2_Pseudo>(NAME#"_e32")>; } multiclass VOP2_Real_e32e64_vi <bits<6> op> : - Base_VOP2_Real_e32e64_vi<op>, VOP2_SDWA_Real<op> { + Base_VOP2_Real_e32e64_vi<op>, VOP2_SDWA_Real<op>, VOP2_SDWA9_Real<op> { // For now left dpp only for asm/dasm // TODO: add corresponding pseudo def _dpp : VOP2_DPP<op, !cast<VOP2_Pseudo>(NAME#"_e32")>; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index c0b5069948f..001fc960b22 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -243,7 +243,7 @@ def V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>; } // End SubtargetPredicate = isCIVI -let SubtargetPredicate = isVI in { +let SubtargetPredicate = Has16BitInsts in { let isCommutable = 1 in { @@ -258,12 +258,13 @@ def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile<VOP_I16_I16_I16_I16>>; def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile<VOP_I16_I16_I16_I16>>; } // End isCommutable = 1 +} // End SubtargetPredicate = Has16BitInsts +let SubtargetPredicate = isVI in { def V_PERM_B32 : VOP3Inst <"v_perm_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>; - } // End SubtargetPredicate = isVI -let Predicates = [isVI] in { +let Predicates = [Has16BitInsts] in { multiclass Ternary_i16_Pats <SDPatternOperator op1, SDPatternOperator op2, Instruction inst, SDPatternOperator op3> { @@ -288,7 +289,7 @@ def : Pat< defm: Ternary_i16_Pats<mul, add, V_MAD_U16, zext>; defm: Ternary_i16_Pats<mul, add, V_MAD_I16, sext>; -} // End Predicates = [isVI] +} // End Predicates = [Has16BitInsts] let SubtargetPredicate = isGFX9 in { def V_PACK_B32_F16 : VOP3Inst <"v_pack_b32_f16", VOP3_Profile<VOP_B32_F16_F16>>; diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td index a3550a63677..cd347b86d30 100644 --- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td @@ -34,6 +34,17 @@ class VOPC_SDWAe <bits<8> op, VOPProfile P> : VOP_SDWAe <P> { let Inst{44-43} = SDWA.UNUSED_PRESERVE; } +class VOPC_SDWA9e <bits<8> op, VOPProfile P> : VOP_SDWA9Be <P> { + bits<9> src1; + + let Inst{8-0} = 0xf9; // sdwa + let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); + let Inst{24-17} = op; + let Inst{31-25} = 0x3e; // encoding + let Inst{63} = !if(P.HasSrc1, src1{8}, 0); // src1_sgpr +} + + //===----------------------------------------------------------------------===// // VOPC classes //===----------------------------------------------------------------------===// @@ -102,6 +113,11 @@ class VOPC_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : let AsmMatchConverter = "cvtSdwaVOPC"; } +class VOPC_SDWA9_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : + VOP_SDWA9_Pseudo <OpName, P, pattern> { + let AsmMatchConverter = "cvtSdwaVOPC"; +} + // This class is used only with VOPC instructions. Use $sdst for out operand class VOPCInstAlias <VOP3_Pseudo ps, Instruction inst, VOPProfile p = ps.Pfl> : InstAlias <ps.OpName#" "#p.Asm32, (inst)>, PredicateControl { @@ -173,6 +189,13 @@ multiclass VOPC_Pseudos <string opName, let isConvergent = DefExec; let isCompare = 1; } + + def _sdwa9 : VOPC_SDWA9_Pseudo <opName, P> { + let Defs = !if(DefExec, [VCC, EXEC], [VCC]); + let SchedRW = P.Schedule; + let isConvergent = DefExec; + let isCompare = 1; + } } def VOPC_I1_F16_F16 : VOPC_Profile<[Write32Bit], f16>; @@ -520,7 +543,11 @@ class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType vt> : let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel); + let InsSDWA9 = (ins Src0ModSDWA9:$src0_modifiers, Src0SDWA9:$src0, + Src1ModSDWA9:$src1_modifiers, Src1SDWA9:$src1, + src0_sel:$src0_sel, src1_sel:$src1_sel); let AsmSDWA = " vcc, $src0_modifiers, $src1_modifiers$clamp $src0_sel $src1_sel"; + //let AsmSDWA9 = " $sdst, $src0_modifiers, $src1_modifiers $src0_sel $src1_sel"; let HasSrc1Mods = 0; let HasClamp = 0; let HasOMod = 0; @@ -553,6 +580,12 @@ multiclass VOPC_Class_Pseudos <string opName, VOPC_Profile p, bit DefExec> { let SchedRW = p.Schedule; let isConvergent = DefExec; } + + def _sdwa9 : VOPC_SDWA9_Pseudo <opName, p> { + let Defs = !if(DefExec, [VCC, EXEC], [VCC]); + let SchedRW = p.Schedule; + let isConvergent = DefExec; + } } def VOPC_I1_F16_I32 : VOPC_Class_Profile<[Write32Bit], f16>; @@ -920,6 +953,10 @@ multiclass VOPC_Real_vi <bits<10> op> { VOP_SDWA_Real <!cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa")>, VOPC_SDWAe <op{7-0}, !cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; + def _sdwa_gfx9 : + VOP_SDWA9_Real <!cast<VOPC_SDWA9_Pseudo>(NAME#"_sdwa9")>, + VOPC_SDWA9e <op{7-0}, !cast<VOPC_SDWA9_Pseudo>(NAME#"_sdwa9").Pfl>; + def : VOPCInstAlias <!cast<VOP3_Pseudo>(NAME#"_e64"), !cast<Instruction>(NAME#"_e32_vi")> { let AssemblerPredicate = isVI; diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index 69906c419db..4da654f84f9 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -293,11 +293,52 @@ class VOP_SDWAe<VOPProfile P> : Enc64 { let Inst{44-43} = !if(P.EmitDst, dst_unused{1-0}, SDWA.UNUSED_PRESERVE); let Inst{45} = !if(P.HasSDWAClamp, clamp{0}, 0); let Inst{50-48} = !if(P.HasSrc0, src0_sel{2-0}, SDWA.DWORD); - let Inst{53-52} = !if(P.HasSrc0FloatMods, src0_modifiers{1-0}, 0); let Inst{51} = !if(P.HasSrc0IntMods, src0_modifiers{0}, 0); + let Inst{53-52} = !if(P.HasSrc0FloatMods, src0_modifiers{1-0}, 0); let Inst{58-56} = !if(P.HasSrc1, src1_sel{2-0}, SDWA.DWORD); + let Inst{59} = !if(P.HasSrc1IntMods, src1_modifiers{0}, 0); let Inst{61-60} = !if(P.HasSrc1FloatMods, src1_modifiers{1-0}, 0); +} + +// gfx9 SDWA basic encoding +class VOP_SDWA9e<VOPProfile P> : Enc64 { + bits<9> src0; // {src0_sgpr{0}, src0{7-0}} + bits<3> src0_sel; + bits<2> src0_modifiers; // float: {abs,neg}, int {sext} + bits<3> src1_sel; + bits<2> src1_modifiers; + bits<1> src1_sgpr; + + let Inst{39-32} = !if(P.HasSrc0, src0{7-0}, 0); + let Inst{50-48} = !if(P.HasSrc0, src0_sel{2-0}, SDWA.DWORD); + let Inst{51} = !if(P.HasSrc0IntMods, src0_modifiers{0}, 0); + let Inst{53-52} = !if(P.HasSrc0FloatMods, src0_modifiers{1-0}, 0); + let Inst{55} = !if(P.HasSrc0, src0{8}, 0); + let Inst{58-56} = !if(P.HasSrc1, src1_sel{2-0}, SDWA.DWORD); let Inst{59} = !if(P.HasSrc1IntMods, src1_modifiers{0}, 0); + let Inst{61-60} = !if(P.HasSrc1FloatMods, src1_modifiers{1-0}, 0); + let Inst{63} = 0; // src1_sgpr - should be specified in subclass +} + +// gfx9 SDWA-A +class VOP_SDWA9Ae<VOPProfile P> : VOP_SDWA9e<P> { + bits<3> dst_sel; + bits<2> dst_unused; + bits<1> clamp; + bits<2> omod; + + let Inst{42-40} = !if(P.EmitDst, dst_sel{2-0}, SDWA.DWORD); + let Inst{44-43} = !if(P.EmitDst, dst_unused{1-0}, SDWA.UNUSED_PRESERVE); + let Inst{45} = !if(P.HasSDWAClamp, clamp{0}, 0); + let Inst{47-46} = !if(P.HasSDWAOMod, omod{1-0}, 0); +} + +// gfx9 SDWA-B +class VOP_SDWA9Be<VOPProfile P> : VOP_SDWA9e<P> { + bits<8> sdst; // {vcc_sdst{0}, sdst{6-0}} + + let Inst{46-40} = !if(P.EmitDst, sdst{6-0}, 0); + let Inst{47} = !if(P.EmitDst, sdst{7}, 0); } class VOP_SDWA_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> : @@ -331,6 +372,50 @@ class VOP_SDWA_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> : VOPProfile Pfl = P; } +// GFX9 adds two features to SDWA: +// 1. Add 3 fields to the SDWA microcode word: S0, S1 and OMOD. +// a. S0 and S1 indicate that source 0 and 1 respectively are SGPRs rather +// than VGPRs (at most 1 can be an SGPR); +// b. OMOD is the standard output modifier (result *2, *4, /2) +// 2. Add a new version of the SDWA microcode word for VOPC: SDWAB. This +// replaces OMOD and the dest fields with SD and SDST (SGPR destination) +// field. +// a. When SD=1, the SDST is used as the destination for the compare result; +// b.when SD=0, VCC is used. +// +// In GFX9, V_MAC_F16, V_MAC_F32 opcodes cannot be used with SDWA + +class VOP_SDWA9_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> : + InstSI <P.OutsSDWA9, P.InsSDWA9, "", pattern>, + VOP <opName>, + SIMCInstr <opName#"_sdwa9", SIEncodingFamily.NONE>, + MnemonicAlias <opName#"_sdwa9", opName> { + + let isPseudo = 1; + let isCodeGenOnly = 1; + let UseNamedOperandTable = 1; + + string Mnemonic = opName; + string AsmOperands = P.AsmSDWA9; + + let Size = 8; + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + + let VALU = 1; + let SDWA = 1; + let Uses = [EXEC]; + + let SubtargetPredicate = !if(P.HasSDWA9, HasSDWA9, DisableInst); + let AssemblerPredicate = !if(P.HasSDWA9, HasSDWA9, DisableInst); + let AsmVariantName = !if(P.HasSDWA9, AMDGPUAsmVariants.SDWA9, + AMDGPUAsmVariants.Disable); + let DecoderNamespace = "SDWA9"; + + VOPProfile Pfl = P; +} + class VOP_SDWA_Real <VOP_SDWA_Pseudo ps> : InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>, SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> { @@ -358,6 +443,33 @@ class VOP_SDWA_Real <VOP_SDWA_Pseudo ps> : let TSFlags = ps.TSFlags; } +class VOP_SDWA9_Real <VOP_SDWA9_Pseudo ps> : + InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>, + SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> { + + let isPseudo = 0; + let isCodeGenOnly = 0; + + let Defs = ps.Defs; + let Uses = ps.Uses; + let SchedRW = ps.SchedRW; + let hasSideEffects = ps.hasSideEffects; + + let Constraints = ps.Constraints; + let DisableEncoding = ps.DisableEncoding; + + // Copy relevant pseudo op flags + let SubtargetPredicate = ps.SubtargetPredicate; + let AssemblerPredicate = ps.AssemblerPredicate; + let AsmMatchConverter = ps.AsmMatchConverter; + let AsmVariantName = ps.AsmVariantName; + let UseNamedOperandTable = ps.UseNamedOperandTable; + let DecoderNamespace = ps.DecoderNamespace; + let Constraints = ps.Constraints; + let DisableEncoding = ps.DisableEncoding; + let TSFlags = ps.TSFlags; +} + class VOP_DPPe<VOPProfile P> : Enc64 { bits<2> src0_modifiers; bits<8> src0; |