diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 59 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 452 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp | 154 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h | 25 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 58 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 25 |
6 files changed, 698 insertions, 75 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 8af37fa1615..c277bb4187b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -237,7 +237,14 @@ void AMDGPUAsmPrinter::EmitFunctionBodyEnd() { SmallString<128> KernelName; getNameWithPrefix(KernelName, &MF->getFunction()); getTargetStreamer()->EmitAmdhsaKernelDescriptor( - KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo)); + *getSTI(), KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo), + CurrentProgramInfo.NumVGPRsForWavesPerEU, + CurrentProgramInfo.NumSGPRsForWavesPerEU - + IsaInfo::getNumExtraSGPRs(getSTI()->getFeatureBits(), + CurrentProgramInfo.VCCUsed, + CurrentProgramInfo.FlatUsed), + CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed, + hasXNACK(*getSTI())); Streamer.PopSection(); } @@ -559,30 +566,10 @@ static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI, return false; } -static unsigned getNumExtraSGPRs(const SISubtarget &ST, - bool VCCUsed, - bool FlatScrUsed) { - unsigned ExtraSGPRs = 0; - if (VCCUsed) - ExtraSGPRs = 2; - - if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS) { - if (FlatScrUsed) - ExtraSGPRs = 4; - } else { - if (ST.isXNACKEnabled()) - ExtraSGPRs = 4; - - if (FlatScrUsed) - ExtraSGPRs = 6; - } - - return ExtraSGPRs; -} - int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs( const SISubtarget &ST) const { - return NumExplicitSGPR + getNumExtraSGPRs(ST, UsesVCC, UsesFlatScratch); + return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(), + UsesVCC, UsesFlatScratch); } AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( @@ -777,8 +764,9 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( // conservative guesses. // 48 SGPRs - vcc, - flat_scr, -xnack - int MaxSGPRGuess = 47 - getNumExtraSGPRs(ST, true, - ST.hasFlatAddressSpace()); + int MaxSGPRGuess = + 47 - IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(), true, + ST.hasFlatAddressSpace()); MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess); MaxVGPR = std::max(MaxVGPR, 23); @@ -838,9 +826,11 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, const SIInstrInfo *TII = STM.getInstrInfo(); const SIRegisterInfo *RI = &TII->getRegisterInfo(); - unsigned ExtraSGPRs = getNumExtraSGPRs(STM, - ProgInfo.VCCUsed, - ProgInfo.FlatUsed); + // TODO(scott.linder): The calculations related to SGPR/VGPR blocks are + // duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be + // unified. + unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs( + STM.getFeatureBits(), ProgInfo.VCCUsed, ProgInfo.FlatUsed); unsigned ExtraVGPRs = STM.getReservedNumVGPRs(MF); // Check the addressable register limit before we add ExtraSGPRs. @@ -923,15 +913,10 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, Ctx.diagnose(Diag); } - // SGPRBlocks is actual number of SGPR blocks minus 1. - ProgInfo.SGPRBlocks = alignTo(ProgInfo.NumSGPRsForWavesPerEU, - STM.getSGPREncodingGranule()); - ProgInfo.SGPRBlocks = ProgInfo.SGPRBlocks / STM.getSGPREncodingGranule() - 1; - - // VGPRBlocks is actual number of VGPR blocks minus 1. - ProgInfo.VGPRBlocks = alignTo(ProgInfo.NumVGPRsForWavesPerEU, - STM.getVGPREncodingGranule()); - ProgInfo.VGPRBlocks = ProgInfo.VGPRBlocks / STM.getVGPREncodingGranule() - 1; + ProgInfo.SGPRBlocks = IsaInfo::getNumSGPRBlocks( + STM.getFeatureBits(), ProgInfo.NumSGPRsForWavesPerEU); + ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks( + STM.getFeatureBits(), ProgInfo.NumVGPRsForWavesPerEU); // Record first reserved VGPR and number of reserved VGPRs. ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? ProgInfo.NumVGPR : 0; diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 6ae561d12ff..31e2885c833 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -42,6 +42,7 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/AMDGPUMetadata.h" +#include "llvm/Support/AMDHSAKernelDescriptor.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" @@ -61,6 +62,7 @@ using namespace llvm; using namespace llvm::AMDGPU; +using namespace llvm::amdhsa; namespace { @@ -845,6 +847,27 @@ class AMDGPUAsmParser : public MCTargetAsmParser { private: bool ParseAsAbsoluteExpression(uint32_t &Ret); + bool OutOfRangeError(SMRange Range); + /// Calculate VGPR/SGPR blocks required for given target, reserved + /// registers, and user-specified NextFreeXGPR values. + /// + /// \param Features [in] Target features, used for bug corrections. + /// \param VCCUsed [in] Whether VCC special SGPR is reserved. + /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. + /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. + /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. + /// \param VGPRRange [in] Token range, used for VGPR diagnostics. + /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. + /// \param SGPRRange [in] Token range, used for SGPR diagnostics. + /// \param VGPRBlocks [out] Result VGPR block count. + /// \param SGPRBlocks [out] Result SGPR block count. + bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, + bool FlatScrUsed, bool XNACKUsed, + unsigned NextFreeVGPR, SMRange VGPRRange, + unsigned NextFreeSGPR, SMRange SGPRRange, + unsigned &VGPRBlocks, unsigned &SGPRBlocks); + bool ParseDirectiveAMDGCNTarget(); + bool ParseDirectiveAMDHSAKernel(); bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); bool ParseDirectiveHSACodeObjectVersion(); bool ParseDirectiveHSACodeObjectISA(); @@ -863,6 +886,10 @@ private: bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth, unsigned *DwordRegIndex); + Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); + void initializeGprCountSymbol(RegisterKind RegKind); + bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, + unsigned RegWidth); void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, @@ -896,15 +923,25 @@ public: AMDGPU::IsaInfo::IsaVersion ISA = AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); MCContext &Ctx = getContext(); - MCSymbol *Sym = - Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); - Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); - Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); - Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); - Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); - Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); + if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { + MCSymbol *Sym = + Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); + Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); + } else { + MCSymbol *Sym = + Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); + Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); + Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); + Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); + Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); + Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); + } + if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { + initializeGprCountSymbol(IS_VGPR); + initializeGprCountSymbol(IS_SGPR); + } else + KernelScope.initialize(getContext()); } - KernelScope.initialize(getContext()); } bool hasXNACK() const { @@ -1769,6 +1806,54 @@ bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, return true; } +Optional<StringRef> +AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { + switch (RegKind) { + case IS_VGPR: + return StringRef(".amdgcn.next_free_vgpr"); + case IS_SGPR: + return StringRef(".amdgcn.next_free_sgpr"); + default: + return None; + } +} + +void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { + auto SymbolName = getGprCountSymbolName(RegKind); + assert(SymbolName && "initializing invalid register kind"); + MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); + Sym->setVariableValue(MCConstantExpr::create(0, getContext())); +} + +bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, + unsigned DwordRegIndex, + unsigned RegWidth) { + // Symbols are only defined for GCN targets + if (AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()).Major < 6) + return true; + + auto SymbolName = getGprCountSymbolName(RegKind); + if (!SymbolName) + return true; + MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); + + int64_t NewMax = DwordRegIndex + RegWidth - 1; + int64_t OldCount; + + if (!Sym->isVariable()) + return !Error(getParser().getTok().getLoc(), + ".amdgcn.next_free_{v,s}gpr symbols must be variable"); + if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) + return !Error( + getParser().getTok().getLoc(), + ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); + + if (OldCount <= NewMax) + Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); + + return true; +} + std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { const auto &Tok = Parser.getTok(); SMLoc StartLoc = Tok.getLoc(); @@ -1779,7 +1864,11 @@ std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) { return nullptr; } - KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth); + if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { + if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth)) + return nullptr; + } else + KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth); return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false); } @@ -2538,6 +2627,320 @@ bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, return false; } +bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { + if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) + return TokError("directive only supported for amdgcn architecture"); + + std::string Target; + + SMLoc TargetStart = getTok().getLoc(); + if (getParser().parseEscapedString(Target)) + return true; + SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); + + std::string ExpectedTarget; + raw_string_ostream ExpectedTargetOS(ExpectedTarget); + IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); + + if (Target != ExpectedTargetOS.str()) + return getParser().Error(TargetRange.Start, "target must match options", + TargetRange); + + getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); + return false; +} + +bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { + return getParser().Error(Range.Start, "value out of range", Range); +} + +bool AMDGPUAsmParser::calculateGPRBlocks( + const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, + bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange, + unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks, + unsigned &SGPRBlocks) { + // TODO(scott.linder): These calculations are duplicated from + // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. + IsaInfo::IsaVersion Version = IsaInfo::getIsaVersion(Features); + + unsigned NumVGPRs = NextFreeVGPR; + unsigned NumSGPRs = NextFreeSGPR; + unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(Features); + + if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && + NumSGPRs > MaxAddressableNumSGPRs) + return OutOfRangeError(SGPRRange); + + NumSGPRs += + IsaInfo::getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed, XNACKUsed); + + if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && + NumSGPRs > MaxAddressableNumSGPRs) + return OutOfRangeError(SGPRRange); + + if (Features.test(FeatureSGPRInitBug)) + NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; + + VGPRBlocks = IsaInfo::getNumVGPRBlocks(Features, NumVGPRs); + SGPRBlocks = IsaInfo::getNumSGPRBlocks(Features, NumSGPRs); + + return false; +} + +bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { + if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) + return TokError("directive only supported for amdgcn architecture"); + + if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) + return TokError("directive only supported for amdhsa OS"); + + StringRef KernelName; + if (getParser().parseIdentifier(KernelName)) + return true; + + kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(); + + StringSet<> Seen; + + IsaInfo::IsaVersion IVersion = + IsaInfo::getIsaVersion(getSTI().getFeatureBits()); + + SMRange VGPRRange; + uint64_t NextFreeVGPR = 0; + SMRange SGPRRange; + uint64_t NextFreeSGPR = 0; + unsigned UserSGPRCount = 0; + bool ReserveVCC = true; + bool ReserveFlatScr = true; + bool ReserveXNACK = hasXNACK(); + + while (true) { + while (getLexer().is(AsmToken::EndOfStatement)) + Lex(); + + if (getLexer().isNot(AsmToken::Identifier)) + return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); + + StringRef ID = getTok().getIdentifier(); + SMRange IDRange = getTok().getLocRange(); + Lex(); + + if (ID == ".end_amdhsa_kernel") + break; + + if (Seen.find(ID) != Seen.end()) + return TokError(".amdhsa_ directives cannot be repeated"); + Seen.insert(ID); + + SMLoc ValStart = getTok().getLoc(); + int64_t IVal; + if (getParser().parseAbsoluteExpression(IVal)) + return true; + SMLoc ValEnd = getTok().getLoc(); + SMRange ValRange = SMRange(ValStart, ValEnd); + + if (IVal < 0) + return OutOfRangeError(ValRange); + + uint64_t Val = IVal; + +#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ + if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ + return OutOfRangeError(RANGE); \ + AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); + + if (ID == ".amdhsa_group_segment_fixed_size") { + if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) + return OutOfRangeError(ValRange); + KD.group_segment_fixed_size = Val; + } else if (ID == ".amdhsa_private_segment_fixed_size") { + if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) + return OutOfRangeError(ValRange); + KD.private_segment_fixed_size = Val; + } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { + PARSE_BITS_ENTRY(KD.kernel_code_properties, + KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, + Val, ValRange); + UserSGPRCount++; + } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { + PARSE_BITS_ENTRY(KD.kernel_code_properties, + KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, + ValRange); + UserSGPRCount++; + } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { + PARSE_BITS_ENTRY(KD.kernel_code_properties, + KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, + ValRange); + UserSGPRCount++; + } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { + PARSE_BITS_ENTRY(KD.kernel_code_properties, + KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, + Val, ValRange); + UserSGPRCount++; + } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { + PARSE_BITS_ENTRY(KD.kernel_code_properties, + KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, + ValRange); + UserSGPRCount++; + } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { + PARSE_BITS_ENTRY(KD.kernel_code_properties, + KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, + ValRange); + UserSGPRCount++; + } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { + PARSE_BITS_ENTRY(KD.kernel_code_properties, + KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, + Val, ValRange); + UserSGPRCount++; + } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { + PARSE_BITS_ENTRY( + KD.compute_pgm_rsrc2, + COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, + ValRange); + } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, + ValRange); + } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, + ValRange); + } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, + ValRange); + } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, + ValRange); + } else if (ID == ".amdhsa_system_vgpr_workitem_id") { + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, + COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, + ValRange); + } else if (ID == ".amdhsa_next_free_vgpr") { + VGPRRange = ValRange; + NextFreeVGPR = Val; + } else if (ID == ".amdhsa_next_free_sgpr") { + SGPRRange = ValRange; + NextFreeSGPR = Val; + } else if (ID == ".amdhsa_reserve_vcc") { + if (!isUInt<1>(Val)) + return OutOfRangeError(ValRange); + ReserveVCC = Val; + } else if (ID == ".amdhsa_reserve_flat_scratch") { + if (IVersion.Major < 7) + return getParser().Error(IDRange.Start, "directive requires gfx7+", + IDRange); + if (!isUInt<1>(Val)) + return OutOfRangeError(ValRange); + ReserveFlatScr = Val; + } else if (ID == ".amdhsa_reserve_xnack_mask") { + if (IVersion.Major < 8) + return getParser().Error(IDRange.Start, "directive requires gfx8+", + IDRange); + if (!isUInt<1>(Val)) + return OutOfRangeError(ValRange); + ReserveXNACK = Val; + } else if (ID == ".amdhsa_float_round_mode_32") { + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, + COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); + } else if (ID == ".amdhsa_float_round_mode_16_64") { + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, + COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); + } else if (ID == ".amdhsa_float_denorm_mode_32") { + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, + COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); + } else if (ID == ".amdhsa_float_denorm_mode_16_64") { + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, + COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, + ValRange); + } else if (ID == ".amdhsa_dx10_clamp") { + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, + COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); + } else if (ID == ".amdhsa_ieee_mode") { + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, + Val, ValRange); + } else if (ID == ".amdhsa_fp16_overflow") { + if (IVersion.Major < 9) + return getParser().Error(IDRange.Start, "directive requires gfx9+", + IDRange); + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, + ValRange); + } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { + PARSE_BITS_ENTRY( + KD.compute_pgm_rsrc2, + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, + ValRange); + } else if (ID == ".amdhsa_exception_fp_denorm_src") { + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, + Val, ValRange); + } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { + PARSE_BITS_ENTRY( + KD.compute_pgm_rsrc2, + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, + ValRange); + } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, + Val, ValRange); + } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, + Val, ValRange); + } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, + Val, ValRange); + } else if (ID == ".amdhsa_exception_int_div_zero") { + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, + Val, ValRange); + } else { + return getParser().Error(IDRange.Start, + "unknown .amdhsa_kernel directive", IDRange); + } + +#undef PARSE_BITS_ENTRY + } + + if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) + return TokError(".amdhsa_next_free_vgpr directive is required"); + + if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) + return TokError(".amdhsa_next_free_sgpr directive is required"); + + unsigned VGPRBlocks; + unsigned SGPRBlocks; + if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, + ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR, + SGPRRange, VGPRBlocks, SGPRBlocks)) + return true; + + if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( + VGPRBlocks)) + return OutOfRangeError(VGPRRange); + AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, + COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); + + if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( + SGPRBlocks)) + return OutOfRangeError(SGPRRange); + AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, + COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, + SGPRBlocks); + + if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) + return TokError("too many user SGPRs enabled"); + AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, + UserSGPRCount); + + getTargetStreamer().EmitAmdhsaKernelDescriptor( + getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, + ReserveFlatScr, ReserveXNACK); + return false; +} + bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { uint32_t Major; uint32_t Minor; @@ -2657,7 +3060,8 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { getTargetStreamer().EmitAMDGPUSymbolType(KernelName, ELF::STT_AMDGPU_HSA_KERNEL); Lex(); - KernelScope.initialize(getContext()); + if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) + KernelScope.initialize(getContext()); return false; } @@ -2761,20 +3165,28 @@ bool AMDGPUAsmParser::ParseDirectivePALMetadata() { bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getString(); - if (IDVal == ".hsa_code_object_version") - return ParseDirectiveHSACodeObjectVersion(); + if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { + if (IDVal == ".amdgcn_target") + return ParseDirectiveAMDGCNTarget(); + + if (IDVal == ".amdhsa_kernel") + return ParseDirectiveAMDHSAKernel(); + } else { + if (IDVal == ".hsa_code_object_version") + return ParseDirectiveHSACodeObjectVersion(); - if (IDVal == ".hsa_code_object_isa") - return ParseDirectiveHSACodeObjectISA(); + if (IDVal == ".hsa_code_object_isa") + return ParseDirectiveHSACodeObjectISA(); - if (IDVal == ".amd_kernel_code_t") - return ParseDirectiveAMDKernelCodeT(); + if (IDVal == ".amd_kernel_code_t") + return ParseDirectiveAMDKernelCodeT(); - if (IDVal == ".amdgpu_hsa_kernel") - return ParseDirectiveAMDGPUHsaKernel(); + if (IDVal == ".amdgpu_hsa_kernel") + return ParseDirectiveAMDGPUHsaKernel(); - if (IDVal == ".amd_amdgpu_isa") - return ParseDirectiveISAVersion(); + if (IDVal == ".amd_amdgpu_isa") + return ParseDirectiveISAVersion(); + } if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) return ParseDirectiveHSAMetadata(); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 38dd0630159..6a41e3f650b 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -133,9 +133,12 @@ AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS) : AMDGPUTargetStreamer(S), OS(OS) { } -void -AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major, - uint32_t Minor) { +void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) { + OS << "\t.amdgcn_target \"" << Target << "\"\n"; +} + +void AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion( + uint32_t Major, uint32_t Minor) { OS << "\t.hsa_code_object_version " << Twine(Major) << "," << Twine(Minor) << '\n'; } @@ -197,9 +200,135 @@ bool AMDGPUTargetAsmStreamer::EmitPALMetadata( } void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( - StringRef KernelName, - const amdhsa::kernel_descriptor_t &KernelDescriptor) { - // FIXME: not supported yet. + const MCSubtargetInfo &STI, StringRef KernelName, + const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR, + bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) { + amdhsa::kernel_descriptor_t DefaultKD = getDefaultAmdhsaKernelDescriptor(); + + IsaInfo::IsaVersion IVersion = IsaInfo::getIsaVersion(STI.getFeatureBits()); + + OS << "\t.amdhsa_kernel " << KernelName << '\n'; + +#define PRINT_IF_NOT_DEFAULT(STREAM, DIRECTIVE, KERNEL_DESC, \ + DEFAULT_KERNEL_DESC, MEMBER_NAME, FIELD_NAME) \ + if (AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) != \ + AMDHSA_BITS_GET(DEFAULT_KERNEL_DESC.MEMBER_NAME, FIELD_NAME)) \ + STREAM << "\t\t" << DIRECTIVE << " " \ + << AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) << '\n'; + + if (KD.group_segment_fixed_size != DefaultKD.group_segment_fixed_size) + OS << "\t\t.amdhsa_group_segment_fixed_size " << KD.group_segment_fixed_size + << '\n'; + if (KD.private_segment_fixed_size != DefaultKD.private_segment_fixed_size) + OS << "\t\t.amdhsa_private_segment_fixed_size " + << KD.private_segment_fixed_size << '\n'; + + PRINT_IF_NOT_DEFAULT( + OS, ".amdhsa_user_sgpr_private_segment_buffer", KD, DefaultKD, + kernel_code_properties, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER); + PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_user_sgpr_dispatch_ptr", KD, DefaultKD, + kernel_code_properties, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR); + PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_user_sgpr_queue_ptr", KD, DefaultKD, + kernel_code_properties, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR); + PRINT_IF_NOT_DEFAULT( + OS, ".amdhsa_user_sgpr_kernarg_segment_ptr", KD, DefaultKD, + kernel_code_properties, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR); + PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_user_sgpr_dispatch_id", KD, DefaultKD, + kernel_code_properties, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID); + PRINT_IF_NOT_DEFAULT( + OS, ".amdhsa_user_sgpr_flat_scratch_init", KD, DefaultKD, + kernel_code_properties, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT); + PRINT_IF_NOT_DEFAULT( + OS, ".amdhsa_user_sgpr_private_segment_size", KD, DefaultKD, + kernel_code_properties, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE); + PRINT_IF_NOT_DEFAULT( + OS, ".amdhsa_system_sgpr_private_segment_wavefront_offset", KD, DefaultKD, + compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET); + PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_id_x", KD, DefaultKD, + compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X); + PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_id_y", KD, DefaultKD, + compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y); + PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_id_z", KD, DefaultKD, + compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z); + PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_info", KD, DefaultKD, + compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO); + PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_vgpr_workitem_id", KD, DefaultKD, + compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID); + + // These directives are required. + OS << "\t\t.amdhsa_next_free_vgpr " << NextVGPR << '\n'; + OS << "\t\t.amdhsa_next_free_sgpr " << NextSGPR << '\n'; + + if (!ReserveVCC) + OS << "\t\t.amdhsa_reserve_vcc " << ReserveVCC << '\n'; + if (IVersion.Major >= 7 && !ReserveFlatScr) + OS << "\t\t.amdhsa_reserve_flat_scratch " << ReserveFlatScr << '\n'; + if (IVersion.Major >= 8 && ReserveXNACK != hasXNACK(STI)) + OS << "\t\t.amdhsa_reserve_xnack_mask " << ReserveXNACK << '\n'; + + PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_round_mode_32", KD, DefaultKD, + compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32); + PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_round_mode_16_64", KD, DefaultKD, + compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64); + PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_denorm_mode_32", KD, DefaultKD, + compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32); + PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_denorm_mode_16_64", KD, DefaultKD, + compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64); + PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_dx10_clamp", KD, DefaultKD, + compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP); + PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_ieee_mode", KD, DefaultKD, + compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE); + if (IVersion.Major >= 9) + PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_fp16_overflow", KD, DefaultKD, + compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL); + PRINT_IF_NOT_DEFAULT( + OS, ".amdhsa_exception_fp_ieee_invalid_op", KD, DefaultKD, + compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION); + PRINT_IF_NOT_DEFAULT( + OS, ".amdhsa_exception_fp_denorm_src", KD, DefaultKD, compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE); + PRINT_IF_NOT_DEFAULT( + OS, ".amdhsa_exception_fp_ieee_div_zero", KD, DefaultKD, + compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO); + PRINT_IF_NOT_DEFAULT( + OS, ".amdhsa_exception_fp_ieee_overflow", KD, DefaultKD, + compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW); + PRINT_IF_NOT_DEFAULT( + OS, ".amdhsa_exception_fp_ieee_underflow", KD, DefaultKD, + compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW); + PRINT_IF_NOT_DEFAULT( + OS, ".amdhsa_exception_fp_ieee_inexact", KD, DefaultKD, compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT); + PRINT_IF_NOT_DEFAULT( + OS, ".amdhsa_exception_int_div_zero", KD, DefaultKD, compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO); +#undef PRINT_IF_NOT_DEFAULT + + OS << "\t.end_amdhsa_kernel\n"; } //===----------------------------------------------------------------------===// @@ -247,9 +376,10 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUNote( S.PopSection(); } -void -AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major, - uint32_t Minor) { +void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {} + +void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion( + uint32_t Major, uint32_t Minor) { EmitAMDGPUNote( MCConstantExpr::create(8, getContext()), @@ -370,8 +500,10 @@ bool AMDGPUTargetELFStreamer::EmitPALMetadata( } void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor( - StringRef KernelName, - const amdhsa::kernel_descriptor_t &KernelDescriptor) { + const MCSubtargetInfo &STI, StringRef KernelName, + const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, + uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr, + bool ReserveXNACK) { auto &Streamer = getStreamer(); auto &Context = Streamer.getContext(); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h index 4267b553af4..472da1b7359 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -40,6 +40,8 @@ public: AMDGPUTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {} + virtual void EmitDirectiveAMDGCNTarget(StringRef Target) = 0; + virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major, uint32_t Minor) = 0; @@ -65,14 +67,19 @@ public: virtual bool EmitPALMetadata(const AMDGPU::PALMD::Metadata &PALMetadata) = 0; virtual void EmitAmdhsaKernelDescriptor( - StringRef KernelName, - const amdhsa::kernel_descriptor_t &KernelDescriptor) = 0; + const MCSubtargetInfo &STI, StringRef KernelName, + const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, + uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr, + bool ReserveXNACK) = 0; }; class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer { formatted_raw_ostream &OS; public: AMDGPUTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS); + + void EmitDirectiveAMDGCNTarget(StringRef Target) override; + void EmitDirectiveHSACodeObjectVersion(uint32_t Major, uint32_t Minor) override; @@ -94,8 +101,10 @@ public: bool EmitPALMetadata(const AMDGPU::PALMD::Metadata &PALMetadata) override; void EmitAmdhsaKernelDescriptor( - StringRef KernelName, - const amdhsa::kernel_descriptor_t &KernelDescriptor) override; + const MCSubtargetInfo &STI, StringRef KernelName, + const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, + uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr, + bool ReserveXNACK) override; }; class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer { @@ -109,6 +118,8 @@ public: MCELFStreamer &getStreamer(); + void EmitDirectiveAMDGCNTarget(StringRef Target) override; + void EmitDirectiveHSACodeObjectVersion(uint32_t Major, uint32_t Minor) override; @@ -130,8 +141,10 @@ public: bool EmitPALMetadata(const AMDGPU::PALMD::Metadata &PALMetadata) override; void EmitAmdhsaKernelDescriptor( - StringRef KernelName, - const amdhsa::kernel_descriptor_t &KernelDescriptor) override; + const MCSubtargetInfo &STI, StringRef KernelName, + const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, + uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr, + bool ReserveXNACK) override; }; } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index f7bd27ab04f..08b7a71cbcc 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -198,6 +198,10 @@ void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) { << ISAVersion.Major << ISAVersion.Minor << ISAVersion.Stepping; + + if (hasXNACK(*STI)) + Stream << "+xnack"; + Stream.flush(); } @@ -334,6 +338,39 @@ unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU, return std::min(MaxNumSGPRs, AddressableNumSGPRs); } +unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed, + bool FlatScrUsed, bool XNACKUsed) { + unsigned ExtraSGPRs = 0; + if (VCCUsed) + ExtraSGPRs = 2; + + IsaVersion Version = getIsaVersion(Features); + if (Version.Major < 8) { + if (FlatScrUsed) + ExtraSGPRs = 4; + } else { + if (XNACKUsed) + ExtraSGPRs = 4; + + if (FlatScrUsed) + ExtraSGPRs = 6; + } + + return ExtraSGPRs; +} + +unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed, + bool FlatScrUsed) { + return getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed, + Features[AMDGPU::FeatureXNACK]); +} + +unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs) { + NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(Features)); + // SGPRBlocks is actual number of SGPR blocks minus 1. + return NumSGPRs / getSGPREncodingGranule(Features) - 1; +} + unsigned getVGPRAllocGranule(const FeatureBitset &Features) { return 4; } @@ -370,6 +407,12 @@ unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { return std::min(MaxNumVGPRs, AddressableNumVGPRs); } +unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumVGPRs) { + NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(Features)); + // VGPRBlocks is actual number of VGPR blocks minus 1. + return NumVGPRs / getVGPREncodingGranule(Features) - 1; +} + } // end namespace IsaInfo void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, @@ -399,6 +442,21 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, Header.private_segment_alignment = 4; } +amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor() { + amdhsa::kernel_descriptor_t KD; + memset(&KD, 0, sizeof(KD)); + AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, + amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE); + AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, 1); + AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1); + AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1); + return KD; +} + bool isGroupSegment(const GlobalValue *GV) { return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index a59571c49c6..2ee19741acc 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -16,6 +16,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/IR/CallingConv.h" #include "llvm/MC/MCInstrDesc.h" +#include "llvm/Support/AMDHSAKernelDescriptor.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include <cstdint> @@ -28,12 +29,12 @@ class Argument; class FeatureBitset; class Function; class GlobalValue; -class MachineMemOperand; class MCContext; class MCRegisterClass; class MCRegisterInfo; class MCSection; class MCSubtargetInfo; +class MachineMemOperand; class Triple; namespace AMDGPU { @@ -138,6 +139,22 @@ unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU); unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU, bool Addressable); +/// \returns Number of extra SGPRs implicitly required by given subtarget \p +/// Features when the given special registers are used. +unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed, + bool FlatScrUsed, bool XNACKUsed); + +/// \returns Number of extra SGPRs implicitly required by given subtarget \p +/// Features when the given special registers are used. XNACK is inferred from +/// \p Features. +unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed, + bool FlatScrUsed); + +/// \returns Number of SGPR blocks needed for given subtarget \p Features when +/// \p NumSGPRs are used. \p NumSGPRs should already include any special +/// register counts. +unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs); + /// \returns VGPR allocation granularity for given subtarget \p Features. unsigned getVGPRAllocGranule(const FeatureBitset &Features); @@ -158,6 +175,10 @@ unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU); /// execution unit requirement for given subtarget \p Features. unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU); +/// \returns Number of VGPR blocks needed for given subtarget \p Features when +/// \p NumVGPRs are used. +unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs); + } // end namespace IsaInfo LLVM_READONLY @@ -203,6 +224,8 @@ int getMCOpcode(uint16_t Opcode, unsigned Gen); void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const FeatureBitset &Features); +amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(); + bool isGroupSegment(const GlobalValue *GV); bool isGlobalSegment(const GlobalValue *GV); bool isReadOnlySegment(const GlobalValue *GV); |