diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 103 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h | 10 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp | 60 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h | 13 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 4 |
6 files changed, 184 insertions, 10 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 0cb9cc0ff0c..8e56ec60eff 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -116,6 +116,10 @@ AMDGPUTargetStreamer* AMDGPUAsmPrinter::getTargetStreamer() const { } void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) { + if (IsaInfo::hasCodeObjectV3(getSTI()) && + TM.getTargetTriple().getOS() == Triple::AMDHSA) + return; + if (TM.getTargetTriple().getOS() != Triple::AMDHSA && TM.getTargetTriple().getOS() != Triple::AMDPAL) return; @@ -126,10 +130,6 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) { if (TM.getTargetTriple().getOS() == Triple::AMDPAL) readPALMetadata(M); - // Deprecated notes are not emitted for code object v3. - if (IsaInfo::hasCodeObjectV3(getSTI()->getFeatureBits())) - return; - // HSA emits NT_AMDGPU_HSA_CODE_OBJECT_VERSION for code objects v2. if (TM.getTargetTriple().getOS() == Triple::AMDHSA) getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1); @@ -141,6 +141,10 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) { } void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) { + // TODO: Add metadata to code object v3. + if (IsaInfo::hasCodeObjectV3(getSTI()) && + TM.getTargetTriple().getOS() == Triple::AMDHSA) + return; // Following code requires TargetStreamer to be present. if (!getTargetStreamer()) @@ -186,8 +190,11 @@ bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough( } void AMDGPUAsmPrinter::EmitFunctionBodyStart() { - const AMDGPUMachineFunction *MFI = MF->getInfo<AMDGPUMachineFunction>(); - if (!MFI->isEntryFunction()) + const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>(); + if (!MFI.isEntryFunction()) + return; + if (IsaInfo::hasCodeObjectV3(getSTI()) && + TM.getTargetTriple().getOS() == Triple::AMDHSA) return; const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>(); @@ -205,7 +212,27 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() { getHSADebugProps(*MF, CurrentProgramInfo)); } +void AMDGPUAsmPrinter::EmitFunctionBodyEnd() { + const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>(); + if (!MFI.isEntryFunction()) + return; + if (!IsaInfo::hasCodeObjectV3(getSTI()) || + TM.getTargetTriple().getOS() != Triple::AMDHSA) + return; + + SmallString<128> KernelName; + getNameWithPrefix(KernelName, &MF->getFunction()); + getTargetStreamer()->EmitAmdhsaKernelDescriptor( + KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo)); +} + void AMDGPUAsmPrinter::EmitFunctionEntryLabel() { + if (IsaInfo::hasCodeObjectV3(getSTI()) && + TM.getTargetTriple().getOS() == Triple::AMDHSA) { + AsmPrinter::EmitFunctionEntryLabel(); + return; + } + const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>(); if (MFI->isEntryFunction() && STM.isAmdCodeObjectV2(MF->getFunction())) { @@ -288,6 +315,70 @@ void AMDGPUAsmPrinter::emitCommonFunctionComments( false); } +uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties( + const MachineFunction &MF) const { + const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); + uint16_t KernelCodeProperties = 0; + + if (MFI.hasPrivateSegmentBuffer()) { + KernelCodeProperties |= + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER; + } + if (MFI.hasDispatchPtr()) { + KernelCodeProperties |= + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; + } + if (MFI.hasQueuePtr()) { + KernelCodeProperties |= + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR; + } + if (MFI.hasKernargSegmentPtr()) { + KernelCodeProperties |= + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR; + } + if (MFI.hasDispatchID()) { + KernelCodeProperties |= + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID; + } + if (MFI.hasFlatScratchInit()) { + KernelCodeProperties |= + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT; + } + if (MFI.hasGridWorkgroupCountX()) { + KernelCodeProperties |= + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X; + } + if (MFI.hasGridWorkgroupCountY()) { + KernelCodeProperties |= + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y; + } + if (MFI.hasGridWorkgroupCountZ()) { + KernelCodeProperties |= + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z; + } + + return KernelCodeProperties; +} + +amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor( + const MachineFunction &MF, + const SIProgramInfo &PI) const { + amdhsa::kernel_descriptor_t KernelDescriptor; + memset(&KernelDescriptor, 0x0, sizeof(KernelDescriptor)); + + assert(isUInt<32>(PI.ScratchSize)); + assert(isUInt<32>(PI.ComputePGMRSrc1)); + assert(isUInt<32>(PI.ComputePGMRSrc2)); + + KernelDescriptor.group_segment_fixed_size = PI.LDSSize; + KernelDescriptor.private_segment_fixed_size = PI.ScratchSize; + KernelDescriptor.compute_pgm_rsrc1 = PI.ComputePGMRSrc1; + KernelDescriptor.compute_pgm_rsrc2 = PI.ComputePGMRSrc2; + KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF); + + return KernelDescriptor; +} + bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { CurrentProgramInfo = SIProgramInfo(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index bc2cb4b608a..e1a95595aa6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -20,6 +20,7 @@ #include "MCTargetDesc/AMDGPUHSAMetadataStreamer.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/Support/AMDHSAKernelDescriptor.h" #include <cstddef> #include <cstdint> #include <limits> @@ -148,6 +149,13 @@ private: uint64_t CodeSize, const AMDGPUMachineFunction* MFI); + uint16_t getAmdhsaKernelCodeProperties( + const MachineFunction &MF) const; + + amdhsa::kernel_descriptor_t getAmdhsaKernelDescriptor( + const MachineFunction &MF, + const SIProgramInfo &PI) const; + public: explicit AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer); @@ -180,6 +188,8 @@ public: void EmitFunctionBodyStart() override; + void EmitFunctionBodyEnd() override; + void EmitFunctionEntryLabel() override; void EmitBasicBlockStart(const MachineBasicBlock &MBB) const override; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 2455a937c86..6a4b2055468 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -196,6 +196,12 @@ bool AMDGPUTargetAsmStreamer::EmitPALMetadata( return true; } +void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( + StringRef KernelName, + const amdhsa::kernel_descriptor_t &KernelDescriptor) { + // FIXME: not supported yet. +} + //===----------------------------------------------------------------------===// // AMDGPUTargetELFStreamer //===----------------------------------------------------------------------===// @@ -362,3 +368,57 @@ bool AMDGPUTargetELFStreamer::EmitPALMetadata( ); return true; } + +void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor( + StringRef KernelName, + const amdhsa::kernel_descriptor_t &KernelDescriptor) { + auto &Streamer = getStreamer(); + auto &Context = Streamer.getContext(); + auto &ObjectFileInfo = *Context.getObjectFileInfo(); + auto &ReadOnlySection = *ObjectFileInfo.getReadOnlySection(); + + Streamer.PushSection(); + Streamer.SwitchSection(&ReadOnlySection); + + // CP microcode requires the kernel descriptor to be allocated on 64 byte + // alignment. + Streamer.EmitValueToAlignment(64, 0, 1, 0); + if (ReadOnlySection.getAlignment() < 64) + ReadOnlySection.setAlignment(64); + + MCSymbolELF *KernelDescriptorSymbol = cast<MCSymbolELF>( + Context.getOrCreateSymbol(Twine(KernelName) + Twine(".kd"))); + KernelDescriptorSymbol->setBinding(ELF::STB_GLOBAL); + KernelDescriptorSymbol->setType(ELF::STT_OBJECT); + KernelDescriptorSymbol->setSize( + MCConstantExpr::create(sizeof(KernelDescriptor), Context)); + + MCSymbolELF *KernelCodeSymbol = cast<MCSymbolELF>( + Context.getOrCreateSymbol(Twine(KernelName))); + KernelCodeSymbol->setBinding(ELF::STB_LOCAL); + + Streamer.EmitLabel(KernelDescriptorSymbol); + Streamer.EmitBytes(StringRef( + (const char*)&(KernelDescriptor), + offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset))); + // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The + // expression being created is: + // (start of kernel code) - (start of kernel descriptor) + // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64. + Streamer.EmitValue(MCBinaryExpr::createSub( + MCSymbolRefExpr::create( + KernelCodeSymbol, MCSymbolRefExpr::VK_AMDGPU_REL64, Context), + MCSymbolRefExpr::create( + KernelDescriptorSymbol, MCSymbolRefExpr::VK_None, Context), + Context), + sizeof(KernelDescriptor.kernel_code_entry_byte_offset)); + Streamer.EmitBytes(StringRef( + (const char*)&(KernelDescriptor) + + offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset) + + sizeof(KernelDescriptor.kernel_code_entry_byte_offset), + sizeof(KernelDescriptor) - + offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset) - + sizeof(KernelDescriptor.kernel_code_entry_byte_offset))); + + Streamer.PopSection(); +} diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h index 74c15e43e25..4267b553af4 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -14,6 +14,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/AMDGPUMetadata.h" +#include "llvm/Support/AMDHSAKernelDescriptor.h" namespace llvm { #include "AMDGPUPTNote.h" @@ -62,6 +63,10 @@ public: /// \returns True on success, false on failure. virtual bool EmitPALMetadata(const AMDGPU::PALMD::Metadata &PALMetadata) = 0; + + virtual void EmitAmdhsaKernelDescriptor( + StringRef KernelName, + const amdhsa::kernel_descriptor_t &KernelDescriptor) = 0; }; class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer { @@ -87,6 +92,10 @@ public: /// \returns True on success, false on failure. bool EmitPALMetadata(const AMDGPU::PALMD::Metadata &PALMetadata) override; + + void EmitAmdhsaKernelDescriptor( + StringRef KernelName, + const amdhsa::kernel_descriptor_t &KernelDescriptor) override; }; class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer { @@ -119,6 +128,10 @@ public: /// \returns True on success, false on failure. bool EmitPALMetadata(const AMDGPU::PALMD::Metadata &PALMetadata) override; + + void EmitAmdhsaKernelDescriptor( + StringRef KernelName, + const amdhsa::kernel_descriptor_t &KernelDescriptor) override; }; } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 3896bcc0b0e..da9b98bb155 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -248,8 +248,8 @@ void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) { Stream.flush(); } -bool hasCodeObjectV3(const FeatureBitset &Features) { - return Features.test(FeatureCodeObjectV3); +bool hasCodeObjectV3(const MCSubtargetInfo *STI) { + return STI->getFeatureBits().test(FeatureCodeObjectV3); } unsigned getWavefrontSize(const FeatureBitset &Features) { diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 54932d2bd3e..2abc18dc0ce 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -59,9 +59,9 @@ IsaVersion getIsaVersion(const FeatureBitset &Features); /// Streams isa version string for given subtarget \p STI into \p Stream. void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream); -/// \returns True if given subtarget \p Features support code object version 3, +/// \returns True if given subtarget \p STI supports code object version 3, /// false otherwise. -bool hasCodeObjectV3(const FeatureBitset &Features); +bool hasCodeObjectV3(const MCSubtargetInfo *STI); /// \returns Wavefront size for given subtarget \p Features. unsigned getWavefrontSize(const FeatureBitset &Features); |