summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp59
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp452
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp154
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h25
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp58
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h25
6 files changed, 698 insertions, 75 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 8af37fa1615..c277bb4187b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -237,7 +237,14 @@ void AMDGPUAsmPrinter::EmitFunctionBodyEnd() {
SmallString<128> KernelName;
getNameWithPrefix(KernelName, &MF->getFunction());
getTargetStreamer()->EmitAmdhsaKernelDescriptor(
- KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo));
+ *getSTI(), KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
+ CurrentProgramInfo.NumVGPRsForWavesPerEU,
+ CurrentProgramInfo.NumSGPRsForWavesPerEU -
+ IsaInfo::getNumExtraSGPRs(getSTI()->getFeatureBits(),
+ CurrentProgramInfo.VCCUsed,
+ CurrentProgramInfo.FlatUsed),
+ CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
+ hasXNACK(*getSTI()));
Streamer.PopSection();
}
@@ -559,30 +566,10 @@ static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI,
return false;
}
-static unsigned getNumExtraSGPRs(const SISubtarget &ST,
- bool VCCUsed,
- bool FlatScrUsed) {
- unsigned ExtraSGPRs = 0;
- if (VCCUsed)
- ExtraSGPRs = 2;
-
- if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS) {
- if (FlatScrUsed)
- ExtraSGPRs = 4;
- } else {
- if (ST.isXNACKEnabled())
- ExtraSGPRs = 4;
-
- if (FlatScrUsed)
- ExtraSGPRs = 6;
- }
-
- return ExtraSGPRs;
-}
-
int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs(
const SISubtarget &ST) const {
- return NumExplicitSGPR + getNumExtraSGPRs(ST, UsesVCC, UsesFlatScratch);
+ return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(),
+ UsesVCC, UsesFlatScratch);
}
AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
@@ -777,8 +764,9 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
// conservative guesses.
// 48 SGPRs - vcc, - flat_scr, -xnack
- int MaxSGPRGuess = 47 - getNumExtraSGPRs(ST, true,
- ST.hasFlatAddressSpace());
+ int MaxSGPRGuess =
+ 47 - IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(), true,
+ ST.hasFlatAddressSpace());
MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess);
MaxVGPR = std::max(MaxVGPR, 23);
@@ -838,9 +826,11 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
const SIInstrInfo *TII = STM.getInstrInfo();
const SIRegisterInfo *RI = &TII->getRegisterInfo();
- unsigned ExtraSGPRs = getNumExtraSGPRs(STM,
- ProgInfo.VCCUsed,
- ProgInfo.FlatUsed);
+ // TODO(scott.linder): The calculations related to SGPR/VGPR blocks are
+ // duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be
+ // unified.
+ unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
+ STM.getFeatureBits(), ProgInfo.VCCUsed, ProgInfo.FlatUsed);
unsigned ExtraVGPRs = STM.getReservedNumVGPRs(MF);
// Check the addressable register limit before we add ExtraSGPRs.
@@ -923,15 +913,10 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
Ctx.diagnose(Diag);
}
- // SGPRBlocks is actual number of SGPR blocks minus 1.
- ProgInfo.SGPRBlocks = alignTo(ProgInfo.NumSGPRsForWavesPerEU,
- STM.getSGPREncodingGranule());
- ProgInfo.SGPRBlocks = ProgInfo.SGPRBlocks / STM.getSGPREncodingGranule() - 1;
-
- // VGPRBlocks is actual number of VGPR blocks minus 1.
- ProgInfo.VGPRBlocks = alignTo(ProgInfo.NumVGPRsForWavesPerEU,
- STM.getVGPREncodingGranule());
- ProgInfo.VGPRBlocks = ProgInfo.VGPRBlocks / STM.getVGPREncodingGranule() - 1;
+ ProgInfo.SGPRBlocks = IsaInfo::getNumSGPRBlocks(
+ STM.getFeatureBits(), ProgInfo.NumSGPRsForWavesPerEU);
+ ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks(
+ STM.getFeatureBits(), ProgInfo.NumVGPRsForWavesPerEU);
// Record first reserved VGPR and number of reserved VGPRs.
ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? ProgInfo.NumVGPR : 0;
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 6ae561d12ff..31e2885c833 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -42,6 +42,7 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/AMDGPUMetadata.h"
+#include "llvm/Support/AMDHSAKernelDescriptor.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
@@ -61,6 +62,7 @@
using namespace llvm;
using namespace llvm::AMDGPU;
+using namespace llvm::amdhsa;
namespace {
@@ -845,6 +847,27 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
private:
bool ParseAsAbsoluteExpression(uint32_t &Ret);
+ bool OutOfRangeError(SMRange Range);
+ /// Calculate VGPR/SGPR blocks required for given target, reserved
+ /// registers, and user-specified NextFreeXGPR values.
+ ///
+ /// \param Features [in] Target features, used for bug corrections.
+ /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
+ /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
+ /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
+ /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
+ /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
+ /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
+ /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
+ /// \param VGPRBlocks [out] Result VGPR block count.
+ /// \param SGPRBlocks [out] Result SGPR block count.
+ bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
+ bool FlatScrUsed, bool XNACKUsed,
+ unsigned NextFreeVGPR, SMRange VGPRRange,
+ unsigned NextFreeSGPR, SMRange SGPRRange,
+ unsigned &VGPRBlocks, unsigned &SGPRBlocks);
+ bool ParseDirectiveAMDGCNTarget();
+ bool ParseDirectiveAMDHSAKernel();
bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
bool ParseDirectiveHSACodeObjectVersion();
bool ParseDirectiveHSACodeObjectISA();
@@ -863,6 +886,10 @@ private:
bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
unsigned& RegNum, unsigned& RegWidth,
unsigned *DwordRegIndex);
+ Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
+ void initializeGprCountSymbol(RegisterKind RegKind);
+ bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
+ unsigned RegWidth);
void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
@@ -896,15 +923,25 @@ public:
AMDGPU::IsaInfo::IsaVersion ISA =
AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
MCContext &Ctx = getContext();
- MCSymbol *Sym =
- Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
- Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
- Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
- Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
- Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
- Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
+ if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
+ MCSymbol *Sym =
+ Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
+ Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
+ } else {
+ MCSymbol *Sym =
+ Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
+ Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
+ Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
+ Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
+ Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
+ Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
+ }
+ if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
+ initializeGprCountSymbol(IS_VGPR);
+ initializeGprCountSymbol(IS_SGPR);
+ } else
+ KernelScope.initialize(getContext());
}
- KernelScope.initialize(getContext());
}
bool hasXNACK() const {
@@ -1769,6 +1806,54 @@ bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
return true;
}
+Optional<StringRef>
+AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
+ switch (RegKind) {
+ case IS_VGPR:
+ return StringRef(".amdgcn.next_free_vgpr");
+ case IS_SGPR:
+ return StringRef(".amdgcn.next_free_sgpr");
+ default:
+ return None;
+ }
+}
+
+void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
+ auto SymbolName = getGprCountSymbolName(RegKind);
+ assert(SymbolName && "initializing invalid register kind");
+ MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
+ Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
+}
+
+bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
+ unsigned DwordRegIndex,
+ unsigned RegWidth) {
+ // Symbols are only defined for GCN targets
+ if (AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()).Major < 6)
+ return true;
+
+ auto SymbolName = getGprCountSymbolName(RegKind);
+ if (!SymbolName)
+ return true;
+ MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
+
+ int64_t NewMax = DwordRegIndex + RegWidth - 1;
+ int64_t OldCount;
+
+ if (!Sym->isVariable())
+ return !Error(getParser().getTok().getLoc(),
+ ".amdgcn.next_free_{v,s}gpr symbols must be variable");
+ if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
+ return !Error(
+ getParser().getTok().getLoc(),
+ ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
+
+ if (OldCount <= NewMax)
+ Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
+
+ return true;
+}
+
std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
const auto &Tok = Parser.getTok();
SMLoc StartLoc = Tok.getLoc();
@@ -1779,7 +1864,11 @@ std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
return nullptr;
}
- KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
+ if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
+ if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
+ return nullptr;
+ } else
+ KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false);
}
@@ -2538,6 +2627,320 @@ bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
return false;
}
+bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
+ if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
+ return TokError("directive only supported for amdgcn architecture");
+
+ std::string Target;
+
+ SMLoc TargetStart = getTok().getLoc();
+ if (getParser().parseEscapedString(Target))
+ return true;
+ SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
+
+ std::string ExpectedTarget;
+ raw_string_ostream ExpectedTargetOS(ExpectedTarget);
+ IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
+
+ if (Target != ExpectedTargetOS.str())
+ return getParser().Error(TargetRange.Start, "target must match options",
+ TargetRange);
+
+ getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
+ return false;
+}
+
+bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
+ return getParser().Error(Range.Start, "value out of range", Range);
+}
+
+bool AMDGPUAsmParser::calculateGPRBlocks(
+ const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
+ bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange,
+ unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks,
+ unsigned &SGPRBlocks) {
+ // TODO(scott.linder): These calculations are duplicated from
+ // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
+ IsaInfo::IsaVersion Version = IsaInfo::getIsaVersion(Features);
+
+ unsigned NumVGPRs = NextFreeVGPR;
+ unsigned NumSGPRs = NextFreeSGPR;
+ unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(Features);
+
+ if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
+ NumSGPRs > MaxAddressableNumSGPRs)
+ return OutOfRangeError(SGPRRange);
+
+ NumSGPRs +=
+ IsaInfo::getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed, XNACKUsed);
+
+ if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
+ NumSGPRs > MaxAddressableNumSGPRs)
+ return OutOfRangeError(SGPRRange);
+
+ if (Features.test(FeatureSGPRInitBug))
+ NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
+
+ VGPRBlocks = IsaInfo::getNumVGPRBlocks(Features, NumVGPRs);
+ SGPRBlocks = IsaInfo::getNumSGPRBlocks(Features, NumSGPRs);
+
+ return false;
+}
+
+bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
+ if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
+ return TokError("directive only supported for amdgcn architecture");
+
+ if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
+ return TokError("directive only supported for amdhsa OS");
+
+ StringRef KernelName;
+ if (getParser().parseIdentifier(KernelName))
+ return true;
+
+ kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor();
+
+ StringSet<> Seen;
+
+ IsaInfo::IsaVersion IVersion =
+ IsaInfo::getIsaVersion(getSTI().getFeatureBits());
+
+ SMRange VGPRRange;
+ uint64_t NextFreeVGPR = 0;
+ SMRange SGPRRange;
+ uint64_t NextFreeSGPR = 0;
+ unsigned UserSGPRCount = 0;
+ bool ReserveVCC = true;
+ bool ReserveFlatScr = true;
+ bool ReserveXNACK = hasXNACK();
+
+ while (true) {
+ while (getLexer().is(AsmToken::EndOfStatement))
+ Lex();
+
+ if (getLexer().isNot(AsmToken::Identifier))
+ return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
+
+ StringRef ID = getTok().getIdentifier();
+ SMRange IDRange = getTok().getLocRange();
+ Lex();
+
+ if (ID == ".end_amdhsa_kernel")
+ break;
+
+ if (Seen.find(ID) != Seen.end())
+ return TokError(".amdhsa_ directives cannot be repeated");
+ Seen.insert(ID);
+
+ SMLoc ValStart = getTok().getLoc();
+ int64_t IVal;
+ if (getParser().parseAbsoluteExpression(IVal))
+ return true;
+ SMLoc ValEnd = getTok().getLoc();
+ SMRange ValRange = SMRange(ValStart, ValEnd);
+
+ if (IVal < 0)
+ return OutOfRangeError(ValRange);
+
+ uint64_t Val = IVal;
+
+#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
+ if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
+ return OutOfRangeError(RANGE); \
+ AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
+
+ if (ID == ".amdhsa_group_segment_fixed_size") {
+ if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
+ return OutOfRangeError(ValRange);
+ KD.group_segment_fixed_size = Val;
+ } else if (ID == ".amdhsa_private_segment_fixed_size") {
+ if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
+ return OutOfRangeError(ValRange);
+ KD.private_segment_fixed_size = Val;
+ } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
+ PARSE_BITS_ENTRY(KD.kernel_code_properties,
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
+ Val, ValRange);
+ UserSGPRCount++;
+ } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
+ PARSE_BITS_ENTRY(KD.kernel_code_properties,
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
+ ValRange);
+ UserSGPRCount++;
+ } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
+ PARSE_BITS_ENTRY(KD.kernel_code_properties,
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
+ ValRange);
+ UserSGPRCount++;
+ } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
+ PARSE_BITS_ENTRY(KD.kernel_code_properties,
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
+ Val, ValRange);
+ UserSGPRCount++;
+ } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
+ PARSE_BITS_ENTRY(KD.kernel_code_properties,
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
+ ValRange);
+ UserSGPRCount++;
+ } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
+ PARSE_BITS_ENTRY(KD.kernel_code_properties,
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
+ ValRange);
+ UserSGPRCount++;
+ } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
+ PARSE_BITS_ENTRY(KD.kernel_code_properties,
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
+ Val, ValRange);
+ UserSGPRCount++;
+ } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
+ PARSE_BITS_ENTRY(
+ KD.compute_pgm_rsrc2,
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
+ ValRange);
+ } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
+ ValRange);
+ } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
+ ValRange);
+ } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
+ ValRange);
+ } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
+ ValRange);
+ } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
+ COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
+ ValRange);
+ } else if (ID == ".amdhsa_next_free_vgpr") {
+ VGPRRange = ValRange;
+ NextFreeVGPR = Val;
+ } else if (ID == ".amdhsa_next_free_sgpr") {
+ SGPRRange = ValRange;
+ NextFreeSGPR = Val;
+ } else if (ID == ".amdhsa_reserve_vcc") {
+ if (!isUInt<1>(Val))
+ return OutOfRangeError(ValRange);
+ ReserveVCC = Val;
+ } else if (ID == ".amdhsa_reserve_flat_scratch") {
+ if (IVersion.Major < 7)
+ return getParser().Error(IDRange.Start, "directive requires gfx7+",
+ IDRange);
+ if (!isUInt<1>(Val))
+ return OutOfRangeError(ValRange);
+ ReserveFlatScr = Val;
+ } else if (ID == ".amdhsa_reserve_xnack_mask") {
+ if (IVersion.Major < 8)
+ return getParser().Error(IDRange.Start, "directive requires gfx8+",
+ IDRange);
+ if (!isUInt<1>(Val))
+ return OutOfRangeError(ValRange);
+ ReserveXNACK = Val;
+ } else if (ID == ".amdhsa_float_round_mode_32") {
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
+ } else if (ID == ".amdhsa_float_round_mode_16_64") {
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
+ } else if (ID == ".amdhsa_float_denorm_mode_32") {
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
+ } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
+ ValRange);
+ } else if (ID == ".amdhsa_dx10_clamp") {
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
+ } else if (ID == ".amdhsa_ieee_mode") {
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
+ Val, ValRange);
+ } else if (ID == ".amdhsa_fp16_overflow") {
+ if (IVersion.Major < 9)
+ return getParser().Error(IDRange.Start, "directive requires gfx9+",
+ IDRange);
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
+ ValRange);
+ } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
+ PARSE_BITS_ENTRY(
+ KD.compute_pgm_rsrc2,
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
+ ValRange);
+ } else if (ID == ".amdhsa_exception_fp_denorm_src") {
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
+ Val, ValRange);
+ } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
+ PARSE_BITS_ENTRY(
+ KD.compute_pgm_rsrc2,
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
+ ValRange);
+ } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
+ Val, ValRange);
+ } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
+ Val, ValRange);
+ } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
+ Val, ValRange);
+ } else if (ID == ".amdhsa_exception_int_div_zero") {
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
+ Val, ValRange);
+ } else {
+ return getParser().Error(IDRange.Start,
+ "unknown .amdhsa_kernel directive", IDRange);
+ }
+
+#undef PARSE_BITS_ENTRY
+ }
+
+ if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
+ return TokError(".amdhsa_next_free_vgpr directive is required");
+
+ if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
+ return TokError(".amdhsa_next_free_sgpr directive is required");
+
+ unsigned VGPRBlocks;
+ unsigned SGPRBlocks;
+ if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
+ ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR,
+ SGPRRange, VGPRBlocks, SGPRBlocks))
+ return true;
+
+ if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
+ VGPRBlocks))
+ return OutOfRangeError(VGPRRange);
+ AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
+
+ if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
+ SGPRBlocks))
+ return OutOfRangeError(SGPRRange);
+ AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
+ SGPRBlocks);
+
+ if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
+ return TokError("too many user SGPRs enabled");
+ AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
+ UserSGPRCount);
+
+ getTargetStreamer().EmitAmdhsaKernelDescriptor(
+ getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
+ ReserveFlatScr, ReserveXNACK);
+ return false;
+}
+
bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
uint32_t Major;
uint32_t Minor;
@@ -2657,7 +3060,8 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
ELF::STT_AMDGPU_HSA_KERNEL);
Lex();
- KernelScope.initialize(getContext());
+ if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
+ KernelScope.initialize(getContext());
return false;
}
@@ -2761,20 +3165,28 @@ bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getString();
- if (IDVal == ".hsa_code_object_version")
- return ParseDirectiveHSACodeObjectVersion();
+ if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
+ if (IDVal == ".amdgcn_target")
+ return ParseDirectiveAMDGCNTarget();
+
+ if (IDVal == ".amdhsa_kernel")
+ return ParseDirectiveAMDHSAKernel();
+ } else {
+ if (IDVal == ".hsa_code_object_version")
+ return ParseDirectiveHSACodeObjectVersion();
- if (IDVal == ".hsa_code_object_isa")
- return ParseDirectiveHSACodeObjectISA();
+ if (IDVal == ".hsa_code_object_isa")
+ return ParseDirectiveHSACodeObjectISA();
- if (IDVal == ".amd_kernel_code_t")
- return ParseDirectiveAMDKernelCodeT();
+ if (IDVal == ".amd_kernel_code_t")
+ return ParseDirectiveAMDKernelCodeT();
- if (IDVal == ".amdgpu_hsa_kernel")
- return ParseDirectiveAMDGPUHsaKernel();
+ if (IDVal == ".amdgpu_hsa_kernel")
+ return ParseDirectiveAMDGPUHsaKernel();
- if (IDVal == ".amd_amdgpu_isa")
- return ParseDirectiveISAVersion();
+ if (IDVal == ".amd_amdgpu_isa")
+ return ParseDirectiveISAVersion();
+ }
if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
return ParseDirectiveHSAMetadata();
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 38dd0630159..6a41e3f650b 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -133,9 +133,12 @@ AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S,
formatted_raw_ostream &OS)
: AMDGPUTargetStreamer(S), OS(OS) { }
-void
-AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major,
- uint32_t Minor) {
+void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {
+ OS << "\t.amdgcn_target \"" << Target << "\"\n";
+}
+
+void AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion(
+ uint32_t Major, uint32_t Minor) {
OS << "\t.hsa_code_object_version " <<
Twine(Major) << "," << Twine(Minor) << '\n';
}
@@ -197,9 +200,135 @@ bool AMDGPUTargetAsmStreamer::EmitPALMetadata(
}
void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
- StringRef KernelName,
- const amdhsa::kernel_descriptor_t &KernelDescriptor) {
- // FIXME: not supported yet.
+ const MCSubtargetInfo &STI, StringRef KernelName,
+ const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR,
+ bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) {
+ amdhsa::kernel_descriptor_t DefaultKD = getDefaultAmdhsaKernelDescriptor();
+
+ IsaInfo::IsaVersion IVersion = IsaInfo::getIsaVersion(STI.getFeatureBits());
+
+ OS << "\t.amdhsa_kernel " << KernelName << '\n';
+
+#define PRINT_IF_NOT_DEFAULT(STREAM, DIRECTIVE, KERNEL_DESC, \
+ DEFAULT_KERNEL_DESC, MEMBER_NAME, FIELD_NAME) \
+ if (AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) != \
+ AMDHSA_BITS_GET(DEFAULT_KERNEL_DESC.MEMBER_NAME, FIELD_NAME)) \
+ STREAM << "\t\t" << DIRECTIVE << " " \
+ << AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) << '\n';
+
+ if (KD.group_segment_fixed_size != DefaultKD.group_segment_fixed_size)
+ OS << "\t\t.amdhsa_group_segment_fixed_size " << KD.group_segment_fixed_size
+ << '\n';
+ if (KD.private_segment_fixed_size != DefaultKD.private_segment_fixed_size)
+ OS << "\t\t.amdhsa_private_segment_fixed_size "
+ << KD.private_segment_fixed_size << '\n';
+
+ PRINT_IF_NOT_DEFAULT(
+ OS, ".amdhsa_user_sgpr_private_segment_buffer", KD, DefaultKD,
+ kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
+ PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_user_sgpr_dispatch_ptr", KD, DefaultKD,
+ kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
+ PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_user_sgpr_queue_ptr", KD, DefaultKD,
+ kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
+ PRINT_IF_NOT_DEFAULT(
+ OS, ".amdhsa_user_sgpr_kernarg_segment_ptr", KD, DefaultKD,
+ kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
+ PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_user_sgpr_dispatch_id", KD, DefaultKD,
+ kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
+ PRINT_IF_NOT_DEFAULT(
+ OS, ".amdhsa_user_sgpr_flat_scratch_init", KD, DefaultKD,
+ kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
+ PRINT_IF_NOT_DEFAULT(
+ OS, ".amdhsa_user_sgpr_private_segment_size", KD, DefaultKD,
+ kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
+ PRINT_IF_NOT_DEFAULT(
+ OS, ".amdhsa_system_sgpr_private_segment_wavefront_offset", KD, DefaultKD,
+ compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET);
+ PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_id_x", KD, DefaultKD,
+ compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
+ PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_id_y", KD, DefaultKD,
+ compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
+ PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_id_z", KD, DefaultKD,
+ compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
+ PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_info", KD, DefaultKD,
+ compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
+ PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_vgpr_workitem_id", KD, DefaultKD,
+ compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
+
+ // These directives are required.
+ OS << "\t\t.amdhsa_next_free_vgpr " << NextVGPR << '\n';
+ OS << "\t\t.amdhsa_next_free_sgpr " << NextSGPR << '\n';
+
+ if (!ReserveVCC)
+ OS << "\t\t.amdhsa_reserve_vcc " << ReserveVCC << '\n';
+ if (IVersion.Major >= 7 && !ReserveFlatScr)
+ OS << "\t\t.amdhsa_reserve_flat_scratch " << ReserveFlatScr << '\n';
+ if (IVersion.Major >= 8 && ReserveXNACK != hasXNACK(STI))
+ OS << "\t\t.amdhsa_reserve_xnack_mask " << ReserveXNACK << '\n';
+
+ PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_round_mode_32", KD, DefaultKD,
+ compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
+ PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_round_mode_16_64", KD, DefaultKD,
+ compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
+ PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_denorm_mode_32", KD, DefaultKD,
+ compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
+ PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_denorm_mode_16_64", KD, DefaultKD,
+ compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
+ PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_dx10_clamp", KD, DefaultKD,
+ compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
+ PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_ieee_mode", KD, DefaultKD,
+ compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
+ if (IVersion.Major >= 9)
+ PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_fp16_overflow", KD, DefaultKD,
+ compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL);
+ PRINT_IF_NOT_DEFAULT(
+ OS, ".amdhsa_exception_fp_ieee_invalid_op", KD, DefaultKD,
+ compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
+ PRINT_IF_NOT_DEFAULT(
+ OS, ".amdhsa_exception_fp_denorm_src", KD, DefaultKD, compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
+ PRINT_IF_NOT_DEFAULT(
+ OS, ".amdhsa_exception_fp_ieee_div_zero", KD, DefaultKD,
+ compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
+ PRINT_IF_NOT_DEFAULT(
+ OS, ".amdhsa_exception_fp_ieee_overflow", KD, DefaultKD,
+ compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
+ PRINT_IF_NOT_DEFAULT(
+ OS, ".amdhsa_exception_fp_ieee_underflow", KD, DefaultKD,
+ compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
+ PRINT_IF_NOT_DEFAULT(
+ OS, ".amdhsa_exception_fp_ieee_inexact", KD, DefaultKD, compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
+ PRINT_IF_NOT_DEFAULT(
+ OS, ".amdhsa_exception_int_div_zero", KD, DefaultKD, compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
+#undef PRINT_IF_NOT_DEFAULT
+
+ OS << "\t.end_amdhsa_kernel\n";
}
//===----------------------------------------------------------------------===//
@@ -247,9 +376,10 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUNote(
S.PopSection();
}
-void
-AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major,
- uint32_t Minor) {
+void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {}
+
+void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(
+ uint32_t Major, uint32_t Minor) {
EmitAMDGPUNote(
MCConstantExpr::create(8, getContext()),
@@ -370,8 +500,10 @@ bool AMDGPUTargetELFStreamer::EmitPALMetadata(
}
void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
- StringRef KernelName,
- const amdhsa::kernel_descriptor_t &KernelDescriptor) {
+ const MCSubtargetInfo &STI, StringRef KernelName,
+ const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
+ uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr,
+ bool ReserveXNACK) {
auto &Streamer = getStreamer();
auto &Context = Streamer.getContext();
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index 4267b553af4..472da1b7359 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -40,6 +40,8 @@ public:
AMDGPUTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
+ virtual void EmitDirectiveAMDGCNTarget(StringRef Target) = 0;
+
virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major,
uint32_t Minor) = 0;
@@ -65,14 +67,19 @@ public:
virtual bool EmitPALMetadata(const AMDGPU::PALMD::Metadata &PALMetadata) = 0;
virtual void EmitAmdhsaKernelDescriptor(
- StringRef KernelName,
- const amdhsa::kernel_descriptor_t &KernelDescriptor) = 0;
+ const MCSubtargetInfo &STI, StringRef KernelName,
+ const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
+ uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr,
+ bool ReserveXNACK) = 0;
};
class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer {
formatted_raw_ostream &OS;
public:
AMDGPUTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
+
+ void EmitDirectiveAMDGCNTarget(StringRef Target) override;
+
void EmitDirectiveHSACodeObjectVersion(uint32_t Major,
uint32_t Minor) override;
@@ -94,8 +101,10 @@ public:
bool EmitPALMetadata(const AMDGPU::PALMD::Metadata &PALMetadata) override;
void EmitAmdhsaKernelDescriptor(
- StringRef KernelName,
- const amdhsa::kernel_descriptor_t &KernelDescriptor) override;
+ const MCSubtargetInfo &STI, StringRef KernelName,
+ const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
+ uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr,
+ bool ReserveXNACK) override;
};
class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer {
@@ -109,6 +118,8 @@ public:
MCELFStreamer &getStreamer();
+ void EmitDirectiveAMDGCNTarget(StringRef Target) override;
+
void EmitDirectiveHSACodeObjectVersion(uint32_t Major,
uint32_t Minor) override;
@@ -130,8 +141,10 @@ public:
bool EmitPALMetadata(const AMDGPU::PALMD::Metadata &PALMetadata) override;
void EmitAmdhsaKernelDescriptor(
- StringRef KernelName,
- const amdhsa::kernel_descriptor_t &KernelDescriptor) override;
+ const MCSubtargetInfo &STI, StringRef KernelName,
+ const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
+ uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr,
+ bool ReserveXNACK) override;
};
}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index f7bd27ab04f..08b7a71cbcc 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -198,6 +198,10 @@ void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
<< ISAVersion.Major
<< ISAVersion.Minor
<< ISAVersion.Stepping;
+
+ if (hasXNACK(*STI))
+ Stream << "+xnack";
+
Stream.flush();
}
@@ -334,6 +338,39 @@ unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
return std::min(MaxNumSGPRs, AddressableNumSGPRs);
}
+unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
+ bool FlatScrUsed, bool XNACKUsed) {
+ unsigned ExtraSGPRs = 0;
+ if (VCCUsed)
+ ExtraSGPRs = 2;
+
+ IsaVersion Version = getIsaVersion(Features);
+ if (Version.Major < 8) {
+ if (FlatScrUsed)
+ ExtraSGPRs = 4;
+ } else {
+ if (XNACKUsed)
+ ExtraSGPRs = 4;
+
+ if (FlatScrUsed)
+ ExtraSGPRs = 6;
+ }
+
+ return ExtraSGPRs;
+}
+
+unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
+ bool FlatScrUsed) {
+ return getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed,
+ Features[AMDGPU::FeatureXNACK]);
+}
+
+unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs) {
+ NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(Features));
+ // SGPRBlocks is actual number of SGPR blocks minus 1.
+ return NumSGPRs / getSGPREncodingGranule(Features) - 1;
+}
+
unsigned getVGPRAllocGranule(const FeatureBitset &Features) {
return 4;
}
@@ -370,6 +407,12 @@ unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
return std::min(MaxNumVGPRs, AddressableNumVGPRs);
}
+unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumVGPRs) {
+ NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(Features));
+ // VGPRBlocks is actual number of VGPR blocks minus 1.
+ return NumVGPRs / getVGPREncodingGranule(Features) - 1;
+}
+
} // end namespace IsaInfo
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
@@ -399,6 +442,21 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
Header.private_segment_alignment = 4;
}
+amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor() {
+ amdhsa::kernel_descriptor_t KD;
+ memset(&KD, 0, sizeof(KD));
+ AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
+ amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE);
+ AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, 1);
+ AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1);
+ AMDHSA_BITS_SET(KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
+ return KD;
+}
+
bool isGroupSegment(const GlobalValue *GV) {
return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index a59571c49c6..2ee19741acc 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -16,6 +16,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/AMDHSAKernelDescriptor.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include <cstdint>
@@ -28,12 +29,12 @@ class Argument;
class FeatureBitset;
class Function;
class GlobalValue;
-class MachineMemOperand;
class MCContext;
class MCRegisterClass;
class MCRegisterInfo;
class MCSection;
class MCSubtargetInfo;
+class MachineMemOperand;
class Triple;
namespace AMDGPU {
@@ -138,6 +139,22 @@ unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
bool Addressable);
+/// \returns Number of extra SGPRs implicitly required by given subtarget \p
+/// Features when the given special registers are used.
+unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
+ bool FlatScrUsed, bool XNACKUsed);
+
+/// \returns Number of extra SGPRs implicitly required by given subtarget \p
+/// Features when the given special registers are used. XNACK is inferred from
+/// \p Features.
+unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
+ bool FlatScrUsed);
+
+/// \returns Number of SGPR blocks needed for given subtarget \p Features when
+/// \p NumSGPRs are used. \p NumSGPRs should already include any special
+/// register counts.
+unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs);
+
/// \returns VGPR allocation granularity for given subtarget \p Features.
unsigned getVGPRAllocGranule(const FeatureBitset &Features);
@@ -158,6 +175,10 @@ unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
/// execution unit requirement for given subtarget \p Features.
unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
+/// \returns Number of VGPR blocks needed for given subtarget \p Features when
+/// \p NumVGPRs are used.
+unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs);
+
} // end namespace IsaInfo
LLVM_READONLY
@@ -203,6 +224,8 @@ int getMCOpcode(uint16_t Opcode, unsigned Gen);
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
const FeatureBitset &Features);
+amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor();
+
bool isGroupSegment(const GlobalValue *GV);
bool isGlobalSegment(const GlobalValue *GV);
bool isReadOnlySegment(const GlobalValue *GV);
OpenPOWER on IntegriCloud