summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp452
1 files changed, 432 insertions, 20 deletions
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 6ae561d12ff..31e2885c833 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -42,6 +42,7 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/AMDGPUMetadata.h"
+#include "llvm/Support/AMDHSAKernelDescriptor.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
@@ -61,6 +62,7 @@
using namespace llvm;
using namespace llvm::AMDGPU;
+using namespace llvm::amdhsa;
namespace {
@@ -845,6 +847,27 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
private:
bool ParseAsAbsoluteExpression(uint32_t &Ret);
+ bool OutOfRangeError(SMRange Range);
+ /// Calculate VGPR/SGPR blocks required for given target, reserved
+ /// registers, and user-specified NextFreeXGPR values.
+ ///
+ /// \param Features [in] Target features, used for bug corrections.
+ /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
+ /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
+ /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
+ /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
+ /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
+ /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
+ /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
+ /// \param VGPRBlocks [out] Result VGPR block count.
+ /// \param SGPRBlocks [out] Result SGPR block count.
+ bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
+ bool FlatScrUsed, bool XNACKUsed,
+ unsigned NextFreeVGPR, SMRange VGPRRange,
+ unsigned NextFreeSGPR, SMRange SGPRRange,
+ unsigned &VGPRBlocks, unsigned &SGPRBlocks);
+ bool ParseDirectiveAMDGCNTarget();
+ bool ParseDirectiveAMDHSAKernel();
bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
bool ParseDirectiveHSACodeObjectVersion();
bool ParseDirectiveHSACodeObjectISA();
@@ -863,6 +886,10 @@ private:
bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
unsigned& RegNum, unsigned& RegWidth,
unsigned *DwordRegIndex);
+ Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
+ void initializeGprCountSymbol(RegisterKind RegKind);
+ bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
+ unsigned RegWidth);
void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
@@ -896,15 +923,25 @@ public:
AMDGPU::IsaInfo::IsaVersion ISA =
AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
MCContext &Ctx = getContext();
- MCSymbol *Sym =
- Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
- Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
- Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
- Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
- Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
- Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
+ if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
+ MCSymbol *Sym =
+ Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
+ Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
+ } else {
+ MCSymbol *Sym =
+ Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
+ Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
+ Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
+ Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
+ Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
+ Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
+ }
+ if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
+ initializeGprCountSymbol(IS_VGPR);
+ initializeGprCountSymbol(IS_SGPR);
+ } else
+ KernelScope.initialize(getContext());
}
- KernelScope.initialize(getContext());
}
bool hasXNACK() const {
@@ -1769,6 +1806,54 @@ bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
return true;
}
+Optional<StringRef>
+AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
+ switch (RegKind) {
+ case IS_VGPR:
+ return StringRef(".amdgcn.next_free_vgpr");
+ case IS_SGPR:
+ return StringRef(".amdgcn.next_free_sgpr");
+ default:
+ return None;
+ }
+}
+
+void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
+ auto SymbolName = getGprCountSymbolName(RegKind);
+ assert(SymbolName && "initializing invalid register kind");
+ MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
+ Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
+}
+
+bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
+ unsigned DwordRegIndex,
+ unsigned RegWidth) {
+ // Symbols are only defined for GCN targets
+ if (AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()).Major < 6)
+ return true;
+
+ auto SymbolName = getGprCountSymbolName(RegKind);
+ if (!SymbolName)
+ return true;
+ MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
+
+ int64_t NewMax = DwordRegIndex + RegWidth - 1;
+ int64_t OldCount;
+
+ if (!Sym->isVariable())
+ return !Error(getParser().getTok().getLoc(),
+ ".amdgcn.next_free_{v,s}gpr symbols must be variable");
+ if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
+ return !Error(
+ getParser().getTok().getLoc(),
+ ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
+
+ if (OldCount <= NewMax)
+ Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
+
+ return true;
+}
+
std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
const auto &Tok = Parser.getTok();
SMLoc StartLoc = Tok.getLoc();
@@ -1779,7 +1864,11 @@ std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
return nullptr;
}
- KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
+ if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
+ if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
+ return nullptr;
+ } else
+ KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false);
}
@@ -2538,6 +2627,320 @@ bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
return false;
}
+bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
+ if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
+ return TokError("directive only supported for amdgcn architecture");
+
+ std::string Target;
+
+ SMLoc TargetStart = getTok().getLoc();
+ if (getParser().parseEscapedString(Target))
+ return true;
+ SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
+
+ std::string ExpectedTarget;
+ raw_string_ostream ExpectedTargetOS(ExpectedTarget);
+ IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
+
+ if (Target != ExpectedTargetOS.str())
+ return getParser().Error(TargetRange.Start, "target must match options",
+ TargetRange);
+
+ getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
+ return false;
+}
+
+bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
+ return getParser().Error(Range.Start, "value out of range", Range);
+}
+
+bool AMDGPUAsmParser::calculateGPRBlocks(
+ const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
+ bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange,
+ unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks,
+ unsigned &SGPRBlocks) {
+ // TODO(scott.linder): These calculations are duplicated from
+ // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
+ IsaInfo::IsaVersion Version = IsaInfo::getIsaVersion(Features);
+
+ unsigned NumVGPRs = NextFreeVGPR;
+ unsigned NumSGPRs = NextFreeSGPR;
+ unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(Features);
+
+ if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
+ NumSGPRs > MaxAddressableNumSGPRs)
+ return OutOfRangeError(SGPRRange);
+
+ NumSGPRs +=
+ IsaInfo::getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed, XNACKUsed);
+
+ if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
+ NumSGPRs > MaxAddressableNumSGPRs)
+ return OutOfRangeError(SGPRRange);
+
+ if (Features.test(FeatureSGPRInitBug))
+ NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
+
+ VGPRBlocks = IsaInfo::getNumVGPRBlocks(Features, NumVGPRs);
+ SGPRBlocks = IsaInfo::getNumSGPRBlocks(Features, NumSGPRs);
+
+ return false;
+}
+
+bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
+ if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
+ return TokError("directive only supported for amdgcn architecture");
+
+ if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
+ return TokError("directive only supported for amdhsa OS");
+
+ StringRef KernelName;
+ if (getParser().parseIdentifier(KernelName))
+ return true;
+
+ kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor();
+
+ StringSet<> Seen;
+
+ IsaInfo::IsaVersion IVersion =
+ IsaInfo::getIsaVersion(getSTI().getFeatureBits());
+
+ SMRange VGPRRange;
+ uint64_t NextFreeVGPR = 0;
+ SMRange SGPRRange;
+ uint64_t NextFreeSGPR = 0;
+ unsigned UserSGPRCount = 0;
+ bool ReserveVCC = true;
+ bool ReserveFlatScr = true;
+ bool ReserveXNACK = hasXNACK();
+
+ while (true) {
+ while (getLexer().is(AsmToken::EndOfStatement))
+ Lex();
+
+ if (getLexer().isNot(AsmToken::Identifier))
+ return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
+
+ StringRef ID = getTok().getIdentifier();
+ SMRange IDRange = getTok().getLocRange();
+ Lex();
+
+ if (ID == ".end_amdhsa_kernel")
+ break;
+
+ if (Seen.find(ID) != Seen.end())
+ return TokError(".amdhsa_ directives cannot be repeated");
+ Seen.insert(ID);
+
+ SMLoc ValStart = getTok().getLoc();
+ int64_t IVal;
+ if (getParser().parseAbsoluteExpression(IVal))
+ return true;
+ SMLoc ValEnd = getTok().getLoc();
+ SMRange ValRange = SMRange(ValStart, ValEnd);
+
+ if (IVal < 0)
+ return OutOfRangeError(ValRange);
+
+ uint64_t Val = IVal;
+
+#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
+ if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
+ return OutOfRangeError(RANGE); \
+ AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
+
+ if (ID == ".amdhsa_group_segment_fixed_size") {
+ if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
+ return OutOfRangeError(ValRange);
+ KD.group_segment_fixed_size = Val;
+ } else if (ID == ".amdhsa_private_segment_fixed_size") {
+ if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
+ return OutOfRangeError(ValRange);
+ KD.private_segment_fixed_size = Val;
+ } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
+ PARSE_BITS_ENTRY(KD.kernel_code_properties,
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
+ Val, ValRange);
+ UserSGPRCount++;
+ } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
+ PARSE_BITS_ENTRY(KD.kernel_code_properties,
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
+ ValRange);
+ UserSGPRCount++;
+ } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
+ PARSE_BITS_ENTRY(KD.kernel_code_properties,
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
+ ValRange);
+ UserSGPRCount++;
+ } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
+ PARSE_BITS_ENTRY(KD.kernel_code_properties,
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
+ Val, ValRange);
+ UserSGPRCount++;
+ } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
+ PARSE_BITS_ENTRY(KD.kernel_code_properties,
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
+ ValRange);
+ UserSGPRCount++;
+ } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
+ PARSE_BITS_ENTRY(KD.kernel_code_properties,
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
+ ValRange);
+ UserSGPRCount++;
+ } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
+ PARSE_BITS_ENTRY(KD.kernel_code_properties,
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
+ Val, ValRange);
+ UserSGPRCount++;
+ } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
+ PARSE_BITS_ENTRY(
+ KD.compute_pgm_rsrc2,
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
+ ValRange);
+ } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
+ ValRange);
+ } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
+ ValRange);
+ } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
+ ValRange);
+ } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
+ ValRange);
+ } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
+ COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
+ ValRange);
+ } else if (ID == ".amdhsa_next_free_vgpr") {
+ VGPRRange = ValRange;
+ NextFreeVGPR = Val;
+ } else if (ID == ".amdhsa_next_free_sgpr") {
+ SGPRRange = ValRange;
+ NextFreeSGPR = Val;
+ } else if (ID == ".amdhsa_reserve_vcc") {
+ if (!isUInt<1>(Val))
+ return OutOfRangeError(ValRange);
+ ReserveVCC = Val;
+ } else if (ID == ".amdhsa_reserve_flat_scratch") {
+ if (IVersion.Major < 7)
+ return getParser().Error(IDRange.Start, "directive requires gfx7+",
+ IDRange);
+ if (!isUInt<1>(Val))
+ return OutOfRangeError(ValRange);
+ ReserveFlatScr = Val;
+ } else if (ID == ".amdhsa_reserve_xnack_mask") {
+ if (IVersion.Major < 8)
+ return getParser().Error(IDRange.Start, "directive requires gfx8+",
+ IDRange);
+ if (!isUInt<1>(Val))
+ return OutOfRangeError(ValRange);
+ ReserveXNACK = Val;
+ } else if (ID == ".amdhsa_float_round_mode_32") {
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
+ } else if (ID == ".amdhsa_float_round_mode_16_64") {
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
+ } else if (ID == ".amdhsa_float_denorm_mode_32") {
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
+ } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
+ ValRange);
+ } else if (ID == ".amdhsa_dx10_clamp") {
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
+ } else if (ID == ".amdhsa_ieee_mode") {
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
+ Val, ValRange);
+ } else if (ID == ".amdhsa_fp16_overflow") {
+ if (IVersion.Major < 9)
+ return getParser().Error(IDRange.Start, "directive requires gfx9+",
+ IDRange);
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
+ ValRange);
+ } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
+ PARSE_BITS_ENTRY(
+ KD.compute_pgm_rsrc2,
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
+ ValRange);
+ } else if (ID == ".amdhsa_exception_fp_denorm_src") {
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
+ Val, ValRange);
+ } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
+ PARSE_BITS_ENTRY(
+ KD.compute_pgm_rsrc2,
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
+ ValRange);
+ } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
+ Val, ValRange);
+ } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
+ Val, ValRange);
+ } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
+ Val, ValRange);
+ } else if (ID == ".amdhsa_exception_int_div_zero") {
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
+ Val, ValRange);
+ } else {
+ return getParser().Error(IDRange.Start,
+ "unknown .amdhsa_kernel directive", IDRange);
+ }
+
+#undef PARSE_BITS_ENTRY
+ }
+
+ if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
+ return TokError(".amdhsa_next_free_vgpr directive is required");
+
+ if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
+ return TokError(".amdhsa_next_free_sgpr directive is required");
+
+ unsigned VGPRBlocks;
+ unsigned SGPRBlocks;
+ if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
+ ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR,
+ SGPRRange, VGPRBlocks, SGPRBlocks))
+ return true;
+
+ if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
+ VGPRBlocks))
+ return OutOfRangeError(VGPRRange);
+ AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
+
+ if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
+ SGPRBlocks))
+ return OutOfRangeError(SGPRRange);
+ AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
+ SGPRBlocks);
+
+ if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
+ return TokError("too many user SGPRs enabled");
+ AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
+ UserSGPRCount);
+
+ getTargetStreamer().EmitAmdhsaKernelDescriptor(
+ getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
+ ReserveFlatScr, ReserveXNACK);
+ return false;
+}
+
bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
uint32_t Major;
uint32_t Minor;
@@ -2657,7 +3060,8 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
ELF::STT_AMDGPU_HSA_KERNEL);
Lex();
- KernelScope.initialize(getContext());
+ if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
+ KernelScope.initialize(getContext());
return false;
}
@@ -2761,20 +3165,28 @@ bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getString();
- if (IDVal == ".hsa_code_object_version")
- return ParseDirectiveHSACodeObjectVersion();
+ if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
+ if (IDVal == ".amdgcn_target")
+ return ParseDirectiveAMDGCNTarget();
+
+ if (IDVal == ".amdhsa_kernel")
+ return ParseDirectiveAMDHSAKernel();
+ } else {
+ if (IDVal == ".hsa_code_object_version")
+ return ParseDirectiveHSACodeObjectVersion();
- if (IDVal == ".hsa_code_object_isa")
- return ParseDirectiveHSACodeObjectISA();
+ if (IDVal == ".hsa_code_object_isa")
+ return ParseDirectiveHSACodeObjectISA();
- if (IDVal == ".amd_kernel_code_t")
- return ParseDirectiveAMDKernelCodeT();
+ if (IDVal == ".amd_kernel_code_t")
+ return ParseDirectiveAMDKernelCodeT();
- if (IDVal == ".amdgpu_hsa_kernel")
- return ParseDirectiveAMDGPUHsaKernel();
+ if (IDVal == ".amdgpu_hsa_kernel")
+ return ParseDirectiveAMDGPUHsaKernel();
- if (IDVal == ".amd_amdgpu_isa")
- return ParseDirectiveISAVersion();
+ if (IDVal == ".amd_amdgpu_isa")
+ return ParseDirectiveISAVersion();
+ }
if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
return ParseDirectiveHSAMetadata();
OpenPOWER on IntegriCloud