diff options
author | Tim Renouf <tpr.llvm@botech.co.uk> | 2017-10-03 19:03:52 +0000 |
---|---|---|
committer | Tim Renouf <tpr.llvm@botech.co.uk> | 2017-10-03 19:03:52 +0000 |
commit | 72800f0436e8420a96f54f48b187a58c161317db (patch) | |
tree | 4df7e409ed68a6c30a0493e15f8d6badd62b9f5c /llvm/lib | |
parent | 46513965842fc117f3afb0c8ee2fb0fd329d78ae (diff) | |
download | bcm5719-llvm-72800f0436e8420a96f54f48b187a58c161317db.tar.gz bcm5719-llvm-72800f0436e8420a96f54f48b187a58c161317db.zip |
[AMDGPU] implemented pal metadata
Summary:
For the amdpal OS type:
We write an AMDGPU_PAL_METADATA record in the .note section in the ELF
(or as an assembler directive). It contains key=value pairs of 32 bit
ints. It is a merge of metadata from codegen of the shaders, and
metadata provided by the frontend as _amdgpu_pal_metadata IR metadata.
Where both sources have a key=value with the same key, the two values
are ORed together.
This .note record is part of the amdpal ABI and will be documented in
docs/AMDGPUUsage.rst in a future commit.
Eventually the amdpal OS type will stop generating the .AMDGPU.config
section once the frontend has safely moved over to using the .note
records above instead of .AMDGPU.config.
Reviewers: arsenm, nhaehnle, dstuttard
Subscribers: kzhuravl, wdng, yaxunl, llvm-commits, t-tye
Differential Revision: https://reviews.llvm.org/D37753
llvm-svn: 314829
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 114 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUPTNote.h | 33 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 19 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp | 21 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h | 6 |
6 files changed, 193 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 6b22136fdeb..97b47fdddfc 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -110,12 +110,18 @@ AMDGPUTargetStreamer& AMDGPUAsmPrinter::getTargetStreamer() const { } void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) { - if (TM.getTargetTriple().getOS() != Triple::AMDHSA) - return; - AMDGPU::IsaInfo::IsaVersion ISA = AMDGPU::IsaInfo::getIsaVersion(getSTI()->getFeatureBits()); + if (TM.getTargetTriple().getOS() == Triple::AMDPAL) { + readPalMetadata(M); + // AMDPAL wants an HSA_ISA .note. + getTargetStreamer().EmitDirectiveHSACodeObjectISA( + ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU"); + } + if (TM.getTargetTriple().getOS() != Triple::AMDHSA) + return; + getTargetStreamer().EmitDirectiveHSACodeObjectVersion(2, 1); getTargetStreamer().EmitDirectiveHSACodeObjectISA( ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU"); @@ -123,6 +129,17 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) { } void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) { + if (TM.getTargetTriple().getOS() == Triple::AMDPAL) { + // Copy the PAL metadata from the map where we collected it into a vector, + // then write it as a .note. + std::vector<uint32_t> Data; + for (auto i : PalMetadata) { + Data.push_back(i.first); + Data.push_back(i.second); + } + getTargetStreamer().EmitPalMetadata(Data); + } + if (TM.getTargetTriple().getOS() != Triple::AMDHSA) return; @@ -190,6 +207,27 @@ bool AMDGPUAsmPrinter::doFinalization(Module &M) { return AsmPrinter::doFinalization(M); } +// For the amdpal OS type, read the amdgpu.pal.metadata supplied by the +// frontend into our PalMetadata map, ready for per-function modification. It +// is a NamedMD containing an MDTuple containing a number of MDNodes each of +// which is an integer value, and each two integer values forms a key=value +// pair that we store as PalMetadata[key]=value in the map. +void AMDGPUAsmPrinter::readPalMetadata(Module &M) { + auto NamedMD = M.getNamedMetadata("amdgpu.pal.metadata"); + if (!NamedMD || !NamedMD->getNumOperands()) + return; + auto Tuple = dyn_cast<MDTuple>(NamedMD->getOperand(0)); + if (!Tuple) + return; + for (unsigned I = 0, E = Tuple->getNumOperands() & -2; I != E; I += 2) { + auto Key = mdconst::dyn_extract<ConstantInt>(Tuple->getOperand(I)); + auto Val = mdconst::dyn_extract<ConstantInt>(Tuple->getOperand(I + 1)); + if (!Key || !Val) + continue; + PalMetadata[Key->getZExtValue()] = Val->getZExtValue(); + } +} + // Print comments that apply to both callable functions and entry points. void AMDGPUAsmPrinter::emitCommonFunctionComments( uint32_t NumVGPR, @@ -232,6 +270,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { Info = analyzeResourceUsage(MF); } + if (STM.isAmdPalOS()) + EmitPalMetadata(MF, CurrentProgramInfo); if (!STM.isAmdHsaOS()) { EmitProgramInfoSI(MF, CurrentProgramInfo); } @@ -923,6 +963,74 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF, OutStreamer->EmitIntValue(MFI->getNumSpilledVGPRs(), 4); } +// This is the equivalent of EmitProgramInfoSI above, but for when the OS type +// is AMDPAL. It stores each compute/SPI register setting and other PAL +// metadata items into the PalMetadata map, combining with any provided by the +// frontend as LLVM metadata. Once all functions are written, PalMetadata is +// then written as a single block in the .note section. +void AMDGPUAsmPrinter::EmitPalMetadata(const MachineFunction &MF, + const SIProgramInfo &CurrentProgramInfo) { + const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); + // Given the calling convention, calculate the register number for rsrc1. In + // principle the register number could change in future hardware, but we know + // it is the same for gfx6-9 (except that LS and ES don't exist on gfx9), so + // we can use the same fixed value that .AMDGPU.config has for Mesa. Note + // that we use a register number rather than a byte offset, so we need to + // divide by 4. + unsigned Rsrc1Reg = getRsrcReg(MF.getFunction()->getCallingConv()) / 4; + unsigned Rsrc2Reg = Rsrc1Reg + 1; + // Also calculate the PAL metadata key for *S_SCRATCH_SIZE. It can be used + // with a constant offset to access any non-register shader-specific PAL + // metadata key. + unsigned ScratchSizeKey = AMDGPU::ElfNote::AMDGPU_PAL_METADATA_CS_SCRATCH_SIZE; + switch (MF.getFunction()->getCallingConv()) { + case CallingConv::AMDGPU_PS: + ScratchSizeKey = AMDGPU::ElfNote::AMDGPU_PAL_METADATA_PS_SCRATCH_SIZE; + break; + case CallingConv::AMDGPU_VS: + ScratchSizeKey = AMDGPU::ElfNote::AMDGPU_PAL_METADATA_VS_SCRATCH_SIZE; + break; + case CallingConv::AMDGPU_GS: + ScratchSizeKey = AMDGPU::ElfNote::AMDGPU_PAL_METADATA_GS_SCRATCH_SIZE; + break; + case CallingConv::AMDGPU_ES: + ScratchSizeKey = AMDGPU::ElfNote::AMDGPU_PAL_METADATA_ES_SCRATCH_SIZE; + break; + case CallingConv::AMDGPU_HS: + ScratchSizeKey = AMDGPU::ElfNote::AMDGPU_PAL_METADATA_HS_SCRATCH_SIZE; + break; + case CallingConv::AMDGPU_LS: + ScratchSizeKey = AMDGPU::ElfNote::AMDGPU_PAL_METADATA_LS_SCRATCH_SIZE; + break; + } + unsigned NumUsedVgprsKey = ScratchSizeKey + + AMDGPU::ElfNote::AMDGPU_PAL_METADATA_VS_NUM_USED_VGPRS + - AMDGPU::ElfNote::AMDGPU_PAL_METADATA_VS_SCRATCH_SIZE; + unsigned NumUsedSgprsKey = ScratchSizeKey + + AMDGPU::ElfNote::AMDGPU_PAL_METADATA_VS_NUM_USED_SGPRS + - AMDGPU::ElfNote::AMDGPU_PAL_METADATA_VS_SCRATCH_SIZE; + PalMetadata[NumUsedVgprsKey] = CurrentProgramInfo.NumVGPRsForWavesPerEU; + PalMetadata[NumUsedSgprsKey] = CurrentProgramInfo.NumSGPRsForWavesPerEU; + if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) { + PalMetadata[Rsrc1Reg] |= CurrentProgramInfo.ComputePGMRSrc1; + PalMetadata[Rsrc2Reg] |= CurrentProgramInfo.ComputePGMRSrc2; + // ScratchSize is in bytes, 16 aligned. + PalMetadata[ScratchSizeKey] |= alignTo(CurrentProgramInfo.ScratchSize, 16); + } else { + PalMetadata[Rsrc1Reg] |= S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) + | S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks); + if (CurrentProgramInfo.ScratchBlocks > 0) + PalMetadata[Rsrc2Reg] |= S_00B84C_SCRATCH_EN(1); + // ScratchSize is in bytes, 16 aligned. + PalMetadata[ScratchSizeKey] |= alignTo(CurrentProgramInfo.ScratchSize, 16); + } + if (MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_PS) { + PalMetadata[Rsrc2Reg] |= S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks); + PalMetadata[R_0286CC_SPI_PS_INPUT_ENA / 4] |= MFI->getPSInputEnable(); + PalMetadata[R_0286D0_SPI_PS_INPUT_ADDR / 4] |= MFI->getPSInputAddr(); + } +} + // This is supposed to be log2(Size) static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) { switch (Size) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index 0a58ce06704..abfc6d070f5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -112,10 +112,12 @@ private: SIProgramInfo CurrentProgramInfo; DenseMap<const Function *, SIFunctionResourceInfo> CallGraphResourceInfo; + std::map<uint32_t, uint32_t> PalMetadata; uint64_t getFunctionCodeSize(const MachineFunction &MF) const; SIFunctionResourceInfo analyzeResourceUsage(const MachineFunction &MF) const; + void readPalMetadata(Module &M); void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF); void getAmdKernelCode(amd_kernel_code_t &Out, const SIProgramInfo &KernelInfo, const MachineFunction &MF) const; @@ -127,6 +129,7 @@ private: /// can correctly setup the GPU state. void EmitProgramInfoR600(const MachineFunction &MF); void EmitProgramInfoSI(const MachineFunction &MF, const SIProgramInfo &KernelInfo); + void EmitPalMetadata(const MachineFunction &MF, const SIProgramInfo &KernelInfo); void emitCommonFunctionComments(uint32_t NumVGPR, uint32_t NumSGPR, uint32_t ScratchSize, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h b/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h index 71b9ab699b9..6dd0829cf22 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h @@ -27,16 +27,49 @@ const char NoteName[] = "AMD"; // TODO: Move this enum to include/llvm/Support so it can be used in tools? enum NoteType{ + NT_AMDGPU_HSA_RESERVED_0 = 0, NT_AMDGPU_HSA_CODE_OBJECT_VERSION = 1, NT_AMDGPU_HSA_HSAIL = 2, NT_AMDGPU_HSA_ISA = 3, NT_AMDGPU_HSA_PRODUCER = 4, NT_AMDGPU_HSA_PRODUCER_OPTIONS = 5, NT_AMDGPU_HSA_EXTENSION = 6, + NT_AMDGPU_HSA_RESERVED_7 = 7, + NT_AMDGPU_HSA_RESERVED_8 = 8, + NT_AMDGPU_HSA_RESERVED_9 = 9, NT_AMDGPU_HSA_CODE_OBJECT_METADATA = 10, + NT_AMD_AMDGPU_ISA = 11, + NT_AMDGPU_PAL_METADATA = 12, NT_AMDGPU_HSA_HLDEBUG_DEBUG = 101, NT_AMDGPU_HSA_HLDEBUG_TARGET = 102 }; + +enum NoteAmdGpuPalMetadataKey { + AMDGPU_PAL_METADATA_LS_NUM_USED_VGPRS = 0x10000015, + AMDGPU_PAL_METADATA_HS_NUM_USED_VGPRS = 0x10000016, + AMDGPU_PAL_METADATA_ES_NUM_USED_VGPRS = 0x10000017, + AMDGPU_PAL_METADATA_GS_NUM_USED_VGPRS = 0x10000018, + AMDGPU_PAL_METADATA_VS_NUM_USED_VGPRS = 0x10000019, + AMDGPU_PAL_METADATA_PS_NUM_USED_VGPRS = 0x1000001a, + AMDGPU_PAL_METADATA_CS_NUM_USED_VGPRS = 0x1000001b, + + AMDGPU_PAL_METADATA_LS_NUM_USED_SGPRS = 0x1000001c, + AMDGPU_PAL_METADATA_HS_NUM_USED_SGPRS = 0x1000001d, + AMDGPU_PAL_METADATA_ES_NUM_USED_SGPRS = 0x1000001e, + AMDGPU_PAL_METADATA_GS_NUM_USED_SGPRS = 0x1000001f, + AMDGPU_PAL_METADATA_VS_NUM_USED_SGPRS = 0x10000020, + AMDGPU_PAL_METADATA_PS_NUM_USED_SGPRS = 0x10000021, + AMDGPU_PAL_METADATA_CS_NUM_USED_SGPRS = 0x10000022, + + AMDGPU_PAL_METADATA_LS_SCRATCH_SIZE = 0x10000038, + AMDGPU_PAL_METADATA_HS_SCRATCH_SIZE = 0x10000039, + AMDGPU_PAL_METADATA_ES_SCRATCH_SIZE = 0x1000003a, + AMDGPU_PAL_METADATA_GS_SCRATCH_SIZE = 0x1000003b, + AMDGPU_PAL_METADATA_VS_SCRATCH_SIZE = 0x1000003c, + AMDGPU_PAL_METADATA_PS_SCRATCH_SIZE = 0x1000003d, + AMDGPU_PAL_METADATA_CS_SCRATCH_SIZE = 0x1000003e, +}; + } } diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 6b5e4da50f5..fa7157bb645 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -833,6 +833,7 @@ private: bool ParseDirectiveAMDKernelCodeT(); bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; bool ParseDirectiveAMDGPUHsaKernel(); + bool ParseDirectivePalMetadata(); bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, RegisterKind RegKind, unsigned Reg1, unsigned RegNum); @@ -2493,6 +2494,21 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { return false; } +bool AMDGPUAsmParser::ParseDirectivePalMetadata() { + std::vector<uint32_t> Data; + for (;;) { + uint32_t Value; + if (ParseAsAbsoluteExpression(Value)) + return TokError("invalid value in .amdgpu_pal_metadata"); + Data.push_back(Value); + if (getLexer().isNot(AsmToken::Comma)) + break; + Lex(); + } + getTargetStreamer().EmitPalMetadata(Data); + return false; +} + bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getString(); @@ -2511,6 +2527,9 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { if (IDVal == ".amdgpu_hsa_kernel") return ParseDirectiveAMDGPUHsaKernel(); + if (IDVal == ".amdgpu_pal_metadata") + return ParseDirectivePalMetadata(); + return true; } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 2a0032fc9ad..6b8d318ecf6 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -112,6 +112,14 @@ bool AMDGPUTargetAsmStreamer::EmitCodeObjectMetadata(StringRef YamlString) { return true; } +bool AMDGPUTargetAsmStreamer::EmitPalMetadata(ArrayRef<uint32_t> Data) { + OS << "\t.amdgpu_pal_metadata"; + for (auto I = Data.begin(), E = Data.end(); I != E; ++I) + OS << (I == Data.begin() ? " 0x" : ",0x") << Twine::utohexstr(*I); + OS << "\n"; + return true; +} + //===----------------------------------------------------------------------===// // AMDGPUTargetELFStreamer //===----------------------------------------------------------------------===// @@ -230,3 +238,16 @@ bool AMDGPUTargetELFStreamer::EmitCodeObjectMetadata(StringRef YamlString) { return true; } + +bool AMDGPUTargetELFStreamer::EmitPalMetadata(ArrayRef<uint32_t> Data) { + EmitAMDGPUNote( + MCConstantExpr::create(Data.size() * sizeof(uint32_t), getContext()), + ElfNote::NT_AMDGPU_PAL_METADATA, + [&](MCELFStreamer &OS){ + for (auto I : Data) + OS.EmitIntValue(I, sizeof(uint32_t)); + } + ); + return true; +} + diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h index 968128e94d0..db82c191a7c 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -53,6 +53,8 @@ public: /// \returns True on success, false on failure. virtual bool EmitCodeObjectMetadata(StringRef YamlString) = 0; + + virtual bool EmitPalMetadata(ArrayRef<uint32_t> Data) = 0; }; class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer { @@ -72,6 +74,8 @@ public: /// \returns True on success, false on failure. bool EmitCodeObjectMetadata(StringRef YamlString) override; + + bool EmitPalMetadata(ArrayRef<uint32_t> data) override; }; class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer { @@ -99,6 +103,8 @@ public: /// \returns True on success, false on failure. bool EmitCodeObjectMetadata(StringRef YamlString) override; + + bool EmitPalMetadata(ArrayRef<uint32_t> data) override; }; } |