summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorTim Renouf <tpr.llvm@botech.co.uk>2017-10-03 19:03:52 +0000
committerTim Renouf <tpr.llvm@botech.co.uk>2017-10-03 19:03:52 +0000
commit72800f0436e8420a96f54f48b187a58c161317db (patch)
tree4df7e409ed68a6c30a0493e15f8d6badd62b9f5c /llvm/lib
parent46513965842fc117f3afb0c8ee2fb0fd329d78ae (diff)
downloadbcm5719-llvm-72800f0436e8420a96f54f48b187a58c161317db.tar.gz
bcm5719-llvm-72800f0436e8420a96f54f48b187a58c161317db.zip
[AMDGPU] implemented pal metadata
Summary: For the amdpal OS type: We write an AMDGPU_PAL_METADATA record in the .note section in the ELF (or as an assembler directive). It contains key=value pairs of 32 bit ints. It is a merge of metadata from codegen of the shaders, and metadata provided by the frontend as _amdgpu_pal_metadata IR metadata. Where both sources have a key=value with the same key, the two values are ORed together. This .note record is part of the amdpal ABI and will be documented in docs/AMDGPUUsage.rst in a future commit. Eventually the amdpal OS type will stop generating the .AMDGPU.config section once the frontend has safely moved over to using the .note records above instead of .AMDGPU.config. Reviewers: arsenm, nhaehnle, dstuttard Subscribers: kzhuravl, wdng, yaxunl, llvm-commits, t-tye Differential Revision: https://reviews.llvm.org/D37753 llvm-svn: 314829
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp114
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPTNote.h33
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp19
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp21
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h6
6 files changed, 193 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 6b22136fdeb..97b47fdddfc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -110,12 +110,18 @@ AMDGPUTargetStreamer& AMDGPUAsmPrinter::getTargetStreamer() const {
}
void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
- if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
- return;
-
AMDGPU::IsaInfo::IsaVersion ISA =
AMDGPU::IsaInfo::getIsaVersion(getSTI()->getFeatureBits());
+ if (TM.getTargetTriple().getOS() == Triple::AMDPAL) {
+ readPalMetadata(M);
+ // AMDPAL wants an HSA_ISA .note.
+ getTargetStreamer().EmitDirectiveHSACodeObjectISA(
+ ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU");
+ }
+ if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
+ return;
+
getTargetStreamer().EmitDirectiveHSACodeObjectVersion(2, 1);
getTargetStreamer().EmitDirectiveHSACodeObjectISA(
ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU");
@@ -123,6 +129,17 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
}
void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
+ if (TM.getTargetTriple().getOS() == Triple::AMDPAL) {
+ // Copy the PAL metadata from the map where we collected it into a vector,
+ // then write it as a .note.
+ std::vector<uint32_t> Data;
+ for (auto i : PalMetadata) {
+ Data.push_back(i.first);
+ Data.push_back(i.second);
+ }
+ getTargetStreamer().EmitPalMetadata(Data);
+ }
+
if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
return;
@@ -190,6 +207,27 @@ bool AMDGPUAsmPrinter::doFinalization(Module &M) {
return AsmPrinter::doFinalization(M);
}
+// For the amdpal OS type, read the amdgpu.pal.metadata supplied by the
+// frontend into our PalMetadata map, ready for per-function modification. It
+// is a NamedMD containing an MDTuple containing a number of MDNodes each of
+// which is an integer value, and each two integer values forms a key=value
+// pair that we store as PalMetadata[key]=value in the map.
+void AMDGPUAsmPrinter::readPalMetadata(Module &M) {
+ auto NamedMD = M.getNamedMetadata("amdgpu.pal.metadata");
+ if (!NamedMD || !NamedMD->getNumOperands())
+ return;
+ auto Tuple = dyn_cast<MDTuple>(NamedMD->getOperand(0));
+ if (!Tuple)
+ return;
+ for (unsigned I = 0, E = Tuple->getNumOperands() & -2; I != E; I += 2) {
+ auto Key = mdconst::dyn_extract<ConstantInt>(Tuple->getOperand(I));
+ auto Val = mdconst::dyn_extract<ConstantInt>(Tuple->getOperand(I + 1));
+ if (!Key || !Val)
+ continue;
+ PalMetadata[Key->getZExtValue()] = Val->getZExtValue();
+ }
+}
+
// Print comments that apply to both callable functions and entry points.
void AMDGPUAsmPrinter::emitCommonFunctionComments(
uint32_t NumVGPR,
@@ -232,6 +270,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
Info = analyzeResourceUsage(MF);
}
+ if (STM.isAmdPalOS())
+ EmitPalMetadata(MF, CurrentProgramInfo);
if (!STM.isAmdHsaOS()) {
EmitProgramInfoSI(MF, CurrentProgramInfo);
}
@@ -923,6 +963,74 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
OutStreamer->EmitIntValue(MFI->getNumSpilledVGPRs(), 4);
}
+// This is the equivalent of EmitProgramInfoSI above, but for when the OS type
+// is AMDPAL. It stores each compute/SPI register setting and other PAL
+// metadata items into the PalMetadata map, combining with any provided by the
+// frontend as LLVM metadata. Once all functions are written, PalMetadata is
+// then written as a single block in the .note section.
+void AMDGPUAsmPrinter::EmitPalMetadata(const MachineFunction &MF,
+ const SIProgramInfo &CurrentProgramInfo) {
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ // Given the calling convention, calculate the register number for rsrc1. In
+ // principle the register number could change in future hardware, but we know
+ // it is the same for gfx6-9 (except that LS and ES don't exist on gfx9), so
+ // we can use the same fixed value that .AMDGPU.config has for Mesa. Note
+ // that we use a register number rather than a byte offset, so we need to
+ // divide by 4.
+ unsigned Rsrc1Reg = getRsrcReg(MF.getFunction()->getCallingConv()) / 4;
+ unsigned Rsrc2Reg = Rsrc1Reg + 1;
+ // Also calculate the PAL metadata key for *S_SCRATCH_SIZE. It can be used
+ // with a constant offset to access any non-register shader-specific PAL
+ // metadata key.
+ unsigned ScratchSizeKey = AMDGPU::ElfNote::AMDGPU_PAL_METADATA_CS_SCRATCH_SIZE;
+ switch (MF.getFunction()->getCallingConv()) {
+ case CallingConv::AMDGPU_PS:
+ ScratchSizeKey = AMDGPU::ElfNote::AMDGPU_PAL_METADATA_PS_SCRATCH_SIZE;
+ break;
+ case CallingConv::AMDGPU_VS:
+ ScratchSizeKey = AMDGPU::ElfNote::AMDGPU_PAL_METADATA_VS_SCRATCH_SIZE;
+ break;
+ case CallingConv::AMDGPU_GS:
+ ScratchSizeKey = AMDGPU::ElfNote::AMDGPU_PAL_METADATA_GS_SCRATCH_SIZE;
+ break;
+ case CallingConv::AMDGPU_ES:
+ ScratchSizeKey = AMDGPU::ElfNote::AMDGPU_PAL_METADATA_ES_SCRATCH_SIZE;
+ break;
+ case CallingConv::AMDGPU_HS:
+ ScratchSizeKey = AMDGPU::ElfNote::AMDGPU_PAL_METADATA_HS_SCRATCH_SIZE;
+ break;
+ case CallingConv::AMDGPU_LS:
+ ScratchSizeKey = AMDGPU::ElfNote::AMDGPU_PAL_METADATA_LS_SCRATCH_SIZE;
+ break;
+ }
+ unsigned NumUsedVgprsKey = ScratchSizeKey
+ + AMDGPU::ElfNote::AMDGPU_PAL_METADATA_VS_NUM_USED_VGPRS
+ - AMDGPU::ElfNote::AMDGPU_PAL_METADATA_VS_SCRATCH_SIZE;
+ unsigned NumUsedSgprsKey = ScratchSizeKey
+ + AMDGPU::ElfNote::AMDGPU_PAL_METADATA_VS_NUM_USED_SGPRS
+ - AMDGPU::ElfNote::AMDGPU_PAL_METADATA_VS_SCRATCH_SIZE;
+ PalMetadata[NumUsedVgprsKey] = CurrentProgramInfo.NumVGPRsForWavesPerEU;
+ PalMetadata[NumUsedSgprsKey] = CurrentProgramInfo.NumSGPRsForWavesPerEU;
+ if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) {
+ PalMetadata[Rsrc1Reg] |= CurrentProgramInfo.ComputePGMRSrc1;
+ PalMetadata[Rsrc2Reg] |= CurrentProgramInfo.ComputePGMRSrc2;
+ // ScratchSize is in bytes, 16 aligned.
+ PalMetadata[ScratchSizeKey] |= alignTo(CurrentProgramInfo.ScratchSize, 16);
+ } else {
+ PalMetadata[Rsrc1Reg] |= S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks)
+ | S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks);
+ if (CurrentProgramInfo.ScratchBlocks > 0)
+ PalMetadata[Rsrc2Reg] |= S_00B84C_SCRATCH_EN(1);
+ // ScratchSize is in bytes, 16 aligned.
+ PalMetadata[ScratchSizeKey] |= alignTo(CurrentProgramInfo.ScratchSize, 16);
+ }
+ if (MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_PS) {
+ PalMetadata[Rsrc2Reg] |= S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks);
+ PalMetadata[R_0286CC_SPI_PS_INPUT_ENA / 4] |= MFI->getPSInputEnable();
+ PalMetadata[R_0286D0_SPI_PS_INPUT_ADDR / 4] |= MFI->getPSInputAddr();
+ }
+}
+
// This is supposed to be log2(Size)
static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) {
switch (Size) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index 0a58ce06704..abfc6d070f5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -112,10 +112,12 @@ private:
SIProgramInfo CurrentProgramInfo;
DenseMap<const Function *, SIFunctionResourceInfo> CallGraphResourceInfo;
+ std::map<uint32_t, uint32_t> PalMetadata;
uint64_t getFunctionCodeSize(const MachineFunction &MF) const;
SIFunctionResourceInfo analyzeResourceUsage(const MachineFunction &MF) const;
+ void readPalMetadata(Module &M);
void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF);
void getAmdKernelCode(amd_kernel_code_t &Out, const SIProgramInfo &KernelInfo,
const MachineFunction &MF) const;
@@ -127,6 +129,7 @@ private:
/// can correctly setup the GPU state.
void EmitProgramInfoR600(const MachineFunction &MF);
void EmitProgramInfoSI(const MachineFunction &MF, const SIProgramInfo &KernelInfo);
+ void EmitPalMetadata(const MachineFunction &MF, const SIProgramInfo &KernelInfo);
void emitCommonFunctionComments(uint32_t NumVGPR,
uint32_t NumSGPR,
uint32_t ScratchSize,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h b/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h
index 71b9ab699b9..6dd0829cf22 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h
@@ -27,16 +27,49 @@ const char NoteName[] = "AMD";
// TODO: Move this enum to include/llvm/Support so it can be used in tools?
enum NoteType{
+ NT_AMDGPU_HSA_RESERVED_0 = 0,
NT_AMDGPU_HSA_CODE_OBJECT_VERSION = 1,
NT_AMDGPU_HSA_HSAIL = 2,
NT_AMDGPU_HSA_ISA = 3,
NT_AMDGPU_HSA_PRODUCER = 4,
NT_AMDGPU_HSA_PRODUCER_OPTIONS = 5,
NT_AMDGPU_HSA_EXTENSION = 6,
+ NT_AMDGPU_HSA_RESERVED_7 = 7,
+ NT_AMDGPU_HSA_RESERVED_8 = 8,
+ NT_AMDGPU_HSA_RESERVED_9 = 9,
NT_AMDGPU_HSA_CODE_OBJECT_METADATA = 10,
+ NT_AMD_AMDGPU_ISA = 11,
+ NT_AMDGPU_PAL_METADATA = 12,
NT_AMDGPU_HSA_HLDEBUG_DEBUG = 101,
NT_AMDGPU_HSA_HLDEBUG_TARGET = 102
};
+
+enum NoteAmdGpuPalMetadataKey {
+ AMDGPU_PAL_METADATA_LS_NUM_USED_VGPRS = 0x10000015,
+ AMDGPU_PAL_METADATA_HS_NUM_USED_VGPRS = 0x10000016,
+ AMDGPU_PAL_METADATA_ES_NUM_USED_VGPRS = 0x10000017,
+ AMDGPU_PAL_METADATA_GS_NUM_USED_VGPRS = 0x10000018,
+ AMDGPU_PAL_METADATA_VS_NUM_USED_VGPRS = 0x10000019,
+ AMDGPU_PAL_METADATA_PS_NUM_USED_VGPRS = 0x1000001a,
+ AMDGPU_PAL_METADATA_CS_NUM_USED_VGPRS = 0x1000001b,
+
+ AMDGPU_PAL_METADATA_LS_NUM_USED_SGPRS = 0x1000001c,
+ AMDGPU_PAL_METADATA_HS_NUM_USED_SGPRS = 0x1000001d,
+ AMDGPU_PAL_METADATA_ES_NUM_USED_SGPRS = 0x1000001e,
+ AMDGPU_PAL_METADATA_GS_NUM_USED_SGPRS = 0x1000001f,
+ AMDGPU_PAL_METADATA_VS_NUM_USED_SGPRS = 0x10000020,
+ AMDGPU_PAL_METADATA_PS_NUM_USED_SGPRS = 0x10000021,
+ AMDGPU_PAL_METADATA_CS_NUM_USED_SGPRS = 0x10000022,
+
+ AMDGPU_PAL_METADATA_LS_SCRATCH_SIZE = 0x10000038,
+ AMDGPU_PAL_METADATA_HS_SCRATCH_SIZE = 0x10000039,
+ AMDGPU_PAL_METADATA_ES_SCRATCH_SIZE = 0x1000003a,
+ AMDGPU_PAL_METADATA_GS_SCRATCH_SIZE = 0x1000003b,
+ AMDGPU_PAL_METADATA_VS_SCRATCH_SIZE = 0x1000003c,
+ AMDGPU_PAL_METADATA_PS_SCRATCH_SIZE = 0x1000003d,
+ AMDGPU_PAL_METADATA_CS_SCRATCH_SIZE = 0x1000003e,
+};
+
}
}
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 6b5e4da50f5..fa7157bb645 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -833,6 +833,7 @@ private:
bool ParseDirectiveAMDKernelCodeT();
bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
bool ParseDirectiveAMDGPUHsaKernel();
+ bool ParseDirectivePalMetadata();
bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
RegisterKind RegKind, unsigned Reg1,
unsigned RegNum);
@@ -2493,6 +2494,21 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
return false;
}
+bool AMDGPUAsmParser::ParseDirectivePalMetadata() {
+ std::vector<uint32_t> Data;
+ for (;;) {
+ uint32_t Value;
+ if (ParseAsAbsoluteExpression(Value))
+ return TokError("invalid value in .amdgpu_pal_metadata");
+ Data.push_back(Value);
+ if (getLexer().isNot(AsmToken::Comma))
+ break;
+ Lex();
+ }
+ getTargetStreamer().EmitPalMetadata(Data);
+ return false;
+}
+
bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getString();
@@ -2511,6 +2527,9 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".amdgpu_hsa_kernel")
return ParseDirectiveAMDGPUHsaKernel();
+ if (IDVal == ".amdgpu_pal_metadata")
+ return ParseDirectivePalMetadata();
+
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 2a0032fc9ad..6b8d318ecf6 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -112,6 +112,14 @@ bool AMDGPUTargetAsmStreamer::EmitCodeObjectMetadata(StringRef YamlString) {
return true;
}
+bool AMDGPUTargetAsmStreamer::EmitPalMetadata(ArrayRef<uint32_t> Data) {
+ OS << "\t.amdgpu_pal_metadata";
+ for (auto I = Data.begin(), E = Data.end(); I != E; ++I)
+ OS << (I == Data.begin() ? " 0x" : ",0x") << Twine::utohexstr(*I);
+ OS << "\n";
+ return true;
+}
+
//===----------------------------------------------------------------------===//
// AMDGPUTargetELFStreamer
//===----------------------------------------------------------------------===//
@@ -230,3 +238,16 @@ bool AMDGPUTargetELFStreamer::EmitCodeObjectMetadata(StringRef YamlString) {
return true;
}
+
+bool AMDGPUTargetELFStreamer::EmitPalMetadata(ArrayRef<uint32_t> Data) {
+ EmitAMDGPUNote(
+ MCConstantExpr::create(Data.size() * sizeof(uint32_t), getContext()),
+ ElfNote::NT_AMDGPU_PAL_METADATA,
+ [&](MCELFStreamer &OS){
+ for (auto I : Data)
+ OS.EmitIntValue(I, sizeof(uint32_t));
+ }
+ );
+ return true;
+}
+
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index 968128e94d0..db82c191a7c 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -53,6 +53,8 @@ public:
/// \returns True on success, false on failure.
virtual bool EmitCodeObjectMetadata(StringRef YamlString) = 0;
+
+ virtual bool EmitPalMetadata(ArrayRef<uint32_t> Data) = 0;
};
class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer {
@@ -72,6 +74,8 @@ public:
/// \returns True on success, false on failure.
bool EmitCodeObjectMetadata(StringRef YamlString) override;
+
+ bool EmitPalMetadata(ArrayRef<uint32_t> data) override;
};
class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer {
@@ -99,6 +103,8 @@ public:
/// \returns True on success, false on failure.
bool EmitCodeObjectMetadata(StringRef YamlString) override;
+
+ bool EmitPalMetadata(ArrayRef<uint32_t> data) override;
};
}
OpenPOWER on IntegriCloud