summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp103
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h10
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp60
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h13
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h4
6 files changed, 184 insertions, 10 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 0cb9cc0ff0c..8e56ec60eff 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -116,6 +116,10 @@ AMDGPUTargetStreamer* AMDGPUAsmPrinter::getTargetStreamer() const {
}
void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
+ if (IsaInfo::hasCodeObjectV3(getSTI()) &&
+ TM.getTargetTriple().getOS() == Triple::AMDHSA)
+ return;
+
if (TM.getTargetTriple().getOS() != Triple::AMDHSA &&
TM.getTargetTriple().getOS() != Triple::AMDPAL)
return;
@@ -126,10 +130,6 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
if (TM.getTargetTriple().getOS() == Triple::AMDPAL)
readPALMetadata(M);
- // Deprecated notes are not emitted for code object v3.
- if (IsaInfo::hasCodeObjectV3(getSTI()->getFeatureBits()))
- return;
-
// HSA emits NT_AMDGPU_HSA_CODE_OBJECT_VERSION for code objects v2.
if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1);
@@ -141,6 +141,10 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
}
void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
+ // TODO: Add metadata to code object v3.
+ if (IsaInfo::hasCodeObjectV3(getSTI()) &&
+ TM.getTargetTriple().getOS() == Triple::AMDHSA)
+ return;
// Following code requires TargetStreamer to be present.
if (!getTargetStreamer())
@@ -186,8 +190,11 @@ bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
}
void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
- const AMDGPUMachineFunction *MFI = MF->getInfo<AMDGPUMachineFunction>();
- if (!MFI->isEntryFunction())
+ const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
+ if (!MFI.isEntryFunction())
+ return;
+ if (IsaInfo::hasCodeObjectV3(getSTI()) &&
+ TM.getTargetTriple().getOS() == Triple::AMDHSA)
return;
const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
@@ -205,7 +212,27 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
getHSADebugProps(*MF, CurrentProgramInfo));
}
+void AMDGPUAsmPrinter::EmitFunctionBodyEnd() {
+ const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
+ if (!MFI.isEntryFunction())
+ return;
+ if (!IsaInfo::hasCodeObjectV3(getSTI()) ||
+ TM.getTargetTriple().getOS() != Triple::AMDHSA)
+ return;
+
+ SmallString<128> KernelName;
+ getNameWithPrefix(KernelName, &MF->getFunction());
+ getTargetStreamer()->EmitAmdhsaKernelDescriptor(
+ KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo));
+}
+
void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
+ if (IsaInfo::hasCodeObjectV3(getSTI()) &&
+ TM.getTargetTriple().getOS() == Triple::AMDHSA) {
+ AsmPrinter::EmitFunctionEntryLabel();
+ return;
+ }
+
const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
if (MFI->isEntryFunction() && STM.isAmdCodeObjectV2(MF->getFunction())) {
@@ -288,6 +315,70 @@ void AMDGPUAsmPrinter::emitCommonFunctionComments(
false);
}
+uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
+ const MachineFunction &MF) const {
+ const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+ uint16_t KernelCodeProperties = 0;
+
+ if (MFI.hasPrivateSegmentBuffer()) {
+ KernelCodeProperties |=
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
+ }
+ if (MFI.hasDispatchPtr()) {
+ KernelCodeProperties |=
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
+ }
+ if (MFI.hasQueuePtr()) {
+ KernelCodeProperties |=
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
+ }
+ if (MFI.hasKernargSegmentPtr()) {
+ KernelCodeProperties |=
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
+ }
+ if (MFI.hasDispatchID()) {
+ KernelCodeProperties |=
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
+ }
+ if (MFI.hasFlatScratchInit()) {
+ KernelCodeProperties |=
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
+ }
+ if (MFI.hasGridWorkgroupCountX()) {
+ KernelCodeProperties |=
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X;
+ }
+ if (MFI.hasGridWorkgroupCountY()) {
+ KernelCodeProperties |=
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y;
+ }
+ if (MFI.hasGridWorkgroupCountZ()) {
+ KernelCodeProperties |=
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z;
+ }
+
+ return KernelCodeProperties;
+}
+
+amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(
+ const MachineFunction &MF,
+ const SIProgramInfo &PI) const {
+ amdhsa::kernel_descriptor_t KernelDescriptor;
+ memset(&KernelDescriptor, 0x0, sizeof(KernelDescriptor));
+
+ assert(isUInt<32>(PI.ScratchSize));
+ assert(isUInt<32>(PI.ComputePGMRSrc1));
+ assert(isUInt<32>(PI.ComputePGMRSrc2));
+
+ KernelDescriptor.group_segment_fixed_size = PI.LDSSize;
+ KernelDescriptor.private_segment_fixed_size = PI.ScratchSize;
+ KernelDescriptor.compute_pgm_rsrc1 = PI.ComputePGMRSrc1;
+ KernelDescriptor.compute_pgm_rsrc2 = PI.ComputePGMRSrc2;
+ KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF);
+
+ return KernelDescriptor;
+}
+
bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
CurrentProgramInfo = SIProgramInfo();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index bc2cb4b608a..e1a95595aa6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -20,6 +20,7 @@
#include "MCTargetDesc/AMDGPUHSAMetadataStreamer.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/AMDHSAKernelDescriptor.h"
#include <cstddef>
#include <cstdint>
#include <limits>
@@ -148,6 +149,13 @@ private:
uint64_t CodeSize,
const AMDGPUMachineFunction* MFI);
+ uint16_t getAmdhsaKernelCodeProperties(
+ const MachineFunction &MF) const;
+
+ amdhsa::kernel_descriptor_t getAmdhsaKernelDescriptor(
+ const MachineFunction &MF,
+ const SIProgramInfo &PI) const;
+
public:
explicit AMDGPUAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer);
@@ -180,6 +188,8 @@ public:
void EmitFunctionBodyStart() override;
+ void EmitFunctionBodyEnd() override;
+
void EmitFunctionEntryLabel() override;
void EmitBasicBlockStart(const MachineBasicBlock &MBB) const override;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 2455a937c86..6a4b2055468 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -196,6 +196,12 @@ bool AMDGPUTargetAsmStreamer::EmitPALMetadata(
return true;
}
+void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
+ StringRef KernelName,
+ const amdhsa::kernel_descriptor_t &KernelDescriptor) {
+ // FIXME: not supported yet.
+}
+
//===----------------------------------------------------------------------===//
// AMDGPUTargetELFStreamer
//===----------------------------------------------------------------------===//
@@ -362,3 +368,57 @@ bool AMDGPUTargetELFStreamer::EmitPALMetadata(
);
return true;
}
+
+void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
+ StringRef KernelName,
+ const amdhsa::kernel_descriptor_t &KernelDescriptor) {
+ auto &Streamer = getStreamer();
+ auto &Context = Streamer.getContext();
+ auto &ObjectFileInfo = *Context.getObjectFileInfo();
+ auto &ReadOnlySection = *ObjectFileInfo.getReadOnlySection();
+
+ Streamer.PushSection();
+ Streamer.SwitchSection(&ReadOnlySection);
+
+ // CP microcode requires the kernel descriptor to be allocated on 64 byte
+ // alignment.
+ Streamer.EmitValueToAlignment(64, 0, 1, 0);
+ if (ReadOnlySection.getAlignment() < 64)
+ ReadOnlySection.setAlignment(64);
+
+ MCSymbolELF *KernelDescriptorSymbol = cast<MCSymbolELF>(
+ Context.getOrCreateSymbol(Twine(KernelName) + Twine(".kd")));
+ KernelDescriptorSymbol->setBinding(ELF::STB_GLOBAL);
+ KernelDescriptorSymbol->setType(ELF::STT_OBJECT);
+ KernelDescriptorSymbol->setSize(
+ MCConstantExpr::create(sizeof(KernelDescriptor), Context));
+
+ MCSymbolELF *KernelCodeSymbol = cast<MCSymbolELF>(
+ Context.getOrCreateSymbol(Twine(KernelName)));
+ KernelCodeSymbol->setBinding(ELF::STB_LOCAL);
+
+ Streamer.EmitLabel(KernelDescriptorSymbol);
+ Streamer.EmitBytes(StringRef(
+ (const char*)&(KernelDescriptor),
+ offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset)));
+ // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The
+ // expression being created is:
+ // (start of kernel code) - (start of kernel descriptor)
+ // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64.
+ Streamer.EmitValue(MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(
+ KernelCodeSymbol, MCSymbolRefExpr::VK_AMDGPU_REL64, Context),
+ MCSymbolRefExpr::create(
+ KernelDescriptorSymbol, MCSymbolRefExpr::VK_None, Context),
+ Context),
+ sizeof(KernelDescriptor.kernel_code_entry_byte_offset));
+ Streamer.EmitBytes(StringRef(
+ (const char*)&(KernelDescriptor) +
+ offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset) +
+ sizeof(KernelDescriptor.kernel_code_entry_byte_offset),
+ sizeof(KernelDescriptor) -
+ offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset) -
+ sizeof(KernelDescriptor.kernel_code_entry_byte_offset)));
+
+ Streamer.PopSection();
+}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index 74c15e43e25..4267b553af4 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -14,6 +14,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/AMDGPUMetadata.h"
+#include "llvm/Support/AMDHSAKernelDescriptor.h"
namespace llvm {
#include "AMDGPUPTNote.h"
@@ -62,6 +63,10 @@ public:
/// \returns True on success, false on failure.
virtual bool EmitPALMetadata(const AMDGPU::PALMD::Metadata &PALMetadata) = 0;
+
+ virtual void EmitAmdhsaKernelDescriptor(
+ StringRef KernelName,
+ const amdhsa::kernel_descriptor_t &KernelDescriptor) = 0;
};
class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer {
@@ -87,6 +92,10 @@ public:
/// \returns True on success, false on failure.
bool EmitPALMetadata(const AMDGPU::PALMD::Metadata &PALMetadata) override;
+
+ void EmitAmdhsaKernelDescriptor(
+ StringRef KernelName,
+ const amdhsa::kernel_descriptor_t &KernelDescriptor) override;
};
class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer {
@@ -119,6 +128,10 @@ public:
/// \returns True on success, false on failure.
bool EmitPALMetadata(const AMDGPU::PALMD::Metadata &PALMetadata) override;
+
+ void EmitAmdhsaKernelDescriptor(
+ StringRef KernelName,
+ const amdhsa::kernel_descriptor_t &KernelDescriptor) override;
};
}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 3896bcc0b0e..da9b98bb155 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -248,8 +248,8 @@ void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
Stream.flush();
}
-bool hasCodeObjectV3(const FeatureBitset &Features) {
- return Features.test(FeatureCodeObjectV3);
+bool hasCodeObjectV3(const MCSubtargetInfo *STI) {
+ return STI->getFeatureBits().test(FeatureCodeObjectV3);
}
unsigned getWavefrontSize(const FeatureBitset &Features) {
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 54932d2bd3e..2abc18dc0ce 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -59,9 +59,9 @@ IsaVersion getIsaVersion(const FeatureBitset &Features);
/// Streams isa version string for given subtarget \p STI into \p Stream.
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
-/// \returns True if given subtarget \p Features support code object version 3,
+/// \returns True if given subtarget \p STI supports code object version 3,
/// false otherwise.
-bool hasCodeObjectV3(const FeatureBitset &Features);
+bool hasCodeObjectV3(const MCSubtargetInfo *STI);
/// \returns Wavefront size for given subtarget \p Features.
unsigned getWavefrontSize(const FeatureBitset &Features);
OpenPOWER on IntegriCloud