diff options
author | Scott Linder <scott@scottlinder.com> | 2018-12-12 19:39:27 +0000 |
---|---|---|
committer | Scott Linder <scott@scottlinder.com> | 2018-12-12 19:39:27 +0000 |
commit | f5b36e56fb4d89eab442559a37239eeafe7690d3 (patch) | |
tree | 049bb5435cb2728f5d3996f1a79cbc958796a046 /llvm/lib/Target/AMDGPU | |
parent | 3f8f004daf62d1b1007e5c226e8da2dab96eea14 (diff) | |
download | bcm5719-llvm-f5b36e56fb4d89eab442559a37239eeafe7690d3.tar.gz bcm5719-llvm-f5b36e56fb4d89eab442559a37239eeafe7690d3.zip |
[AMDGPU] Emit MessagePack HSA Metadata for v3 code object
Continue to present HSA metadata as YAML in ASM and when output by tools
(e.g. llvm-readobj), but encode it in Messagepack in the code object.
Differential Revision: https://reviews.llvm.org/D48179
llvm-svn: 348963
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 63 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp | 576 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h | 108 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUPTNote.h | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 32 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/LLVMBuild.txt | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp | 157 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h | 28 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt | 2 |
10 files changed, 831 insertions, 142 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 3ff764ee4e5..2ded7cdb648 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -46,6 +46,7 @@ using namespace llvm; using namespace llvm::AMDGPU; +using namespace llvm::AMDGPU::HSAMD; // TODO: This should get the default rounding mode from the kernel. We just set // the default here, but this could change if the OpenCL rounding mode pragmas @@ -99,6 +100,10 @@ extern "C" void LLVMInitializeAMDGPUAsmPrinter() { AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer) : AsmPrinter(TM, std::move(Streamer)) { + if (IsaInfo::hasCodeObjectV3(getSTI())) + HSAMetadataStream.reset(new MetadataStreamerV3()); + else + HSAMetadataStream.reset(new MetadataStreamerV2()); } StringRef AMDGPUAsmPrinter::getPassName() const { @@ -122,9 +127,6 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) { IsaInfo::streamIsaVersion(getSTI(), ExpectedTargetOS); getTargetStreamer()->EmitDirectiveAMDGCNTarget(ExpectedTarget); - - if (TM.getTargetTriple().getOS() == Triple::AMDHSA) - return; } if (TM.getTargetTriple().getOS() != Triple::AMDHSA && @@ -132,11 +134,14 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) { return; if (TM.getTargetTriple().getOS() == Triple::AMDHSA) - HSAMetadataStream.begin(M); + HSAMetadataStream->begin(M); if (TM.getTargetTriple().getOS() == Triple::AMDPAL) readPALMetadata(M); + if (IsaInfo::hasCodeObjectV3(getSTI())) + return; + // HSA emits NT_AMDGPU_HSA_CODE_OBJECT_VERSION for code objects v2. if (TM.getTargetTriple().getOS() == Triple::AMDHSA) getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1); @@ -148,37 +153,38 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) { } void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) { - // TODO: Add metadata to code object v3. - if (IsaInfo::hasCodeObjectV3(getSTI()) && - TM.getTargetTriple().getOS() == Triple::AMDHSA) - return; - // Following code requires TargetStreamer to be present. if (!getTargetStreamer()) return; - // Emit ISA Version (NT_AMD_AMDGPU_ISA). - std::string ISAVersionString; - raw_string_ostream ISAVersionStream(ISAVersionString); - IsaInfo::streamIsaVersion(getSTI(), ISAVersionStream); - getTargetStreamer()->EmitISAVersion(ISAVersionStream.str()); + if (!IsaInfo::hasCodeObjectV3(getSTI())) { + // Emit ISA Version (NT_AMD_AMDGPU_ISA). + std::string ISAVersionString; + raw_string_ostream ISAVersionStream(ISAVersionString); + IsaInfo::streamIsaVersion(getSTI(), ISAVersionStream); + getTargetStreamer()->EmitISAVersion(ISAVersionStream.str()); + } // Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA). if (TM.getTargetTriple().getOS() == Triple::AMDHSA) { - HSAMetadataStream.end(); - getTargetStreamer()->EmitHSAMetadata(HSAMetadataStream.getHSAMetadata()); + HSAMetadataStream->end(); + bool Success = HSAMetadataStream->emitTo(*getTargetStreamer()); + (void)Success; + assert(Success && "Malformed HSA Metadata"); } - // Emit PAL Metadata (NT_AMD_AMDGPU_PAL_METADATA). - if (TM.getTargetTriple().getOS() == Triple::AMDPAL) { - // Copy the PAL metadata from the map where we collected it into a vector, - // then write it as a .note. - PALMD::Metadata PALMetadataVector; - for (auto i : PALMetadataMap) { - PALMetadataVector.push_back(i.first); - PALMetadataVector.push_back(i.second); + if (!IsaInfo::hasCodeObjectV3(getSTI())) { + // Emit PAL Metadata (NT_AMD_AMDGPU_PAL_METADATA). + if (TM.getTargetTriple().getOS() == Triple::AMDPAL) { + // Copy the PAL metadata from the map where we collected it into a vector, + // then write it as a .note. + PALMD::Metadata PALMetadataVector; + for (auto i : PALMetadataMap) { + PALMetadataVector.push_back(i.first); + PALMetadataVector.push_back(i.second); + } + getTargetStreamer()->EmitPALMetadata(PALMetadataVector); } - getTargetStreamer()->EmitPALMetadata(PALMetadataVector); } } @@ -211,11 +217,8 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() { getTargetStreamer()->EmitAMDKernelCodeT(KernelCode); } - if (TM.getTargetTriple().getOS() != Triple::AMDHSA) - return; - - if (!STM.hasCodeObjectV3() && STM.isAmdHsaOS()) - HSAMetadataStream.emitKernel(*MF, CurrentProgramInfo); + if (STM.isAmdHsaOS()) + HSAMetadataStream->emitKernel(*MF, CurrentProgramInfo); } void AMDGPUAsmPrinter::EmitFunctionBodyEnd() { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index 462b5feca6a..167ac4b21e1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -56,7 +56,7 @@ private: SIProgramInfo CurrentProgramInfo; DenseMap<const Function *, SIFunctionResourceInfo> CallGraphResourceInfo; - AMDGPU::HSAMD::MetadataStreamer HSAMetadataStream; + std::unique_ptr<AMDGPU::HSAMD::MetadataStreamer> HSAMetadataStream; std::map<uint32_t, uint32_t> PALMetadataMap; uint64_t getFunctionCodeSize(const MachineFunction &MF) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp index fadc833e014..c38b0e61558 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -16,6 +16,7 @@ #include "AMDGPUHSAMetadataStreamer.h" #include "AMDGPU.h" #include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUTargetStreamer.h" #include "SIMachineFunctionInfo.h" #include "SIProgramInfo.h" #include "Utils/AMDGPUBaseInfo.h" @@ -36,11 +37,14 @@ static cl::opt<bool> VerifyHSAMetadata( namespace AMDGPU { namespace HSAMD { -void MetadataStreamer::dump(StringRef HSAMetadataString) const { +//===----------------------------------------------------------------------===// +// HSAMetadataStreamerV2 +//===----------------------------------------------------------------------===// +void MetadataStreamerV2::dump(StringRef HSAMetadataString) const { errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; } -void MetadataStreamer::verify(StringRef HSAMetadataString) const { +void MetadataStreamerV2::verify(StringRef HSAMetadataString) const { errs() << "AMDGPU HSA Metadata Parser Test: "; HSAMD::Metadata FromHSAMetadataString; @@ -63,7 +67,8 @@ void MetadataStreamer::verify(StringRef HSAMetadataString) const { } } -AccessQualifier MetadataStreamer::getAccessQualifier(StringRef AccQual) const { +AccessQualifier +MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const { if (AccQual.empty()) return AccessQualifier::Unknown; @@ -74,7 +79,8 @@ AccessQualifier MetadataStreamer::getAccessQualifier(StringRef AccQual) const { .Default(AccessQualifier::Default); } -AddressSpaceQualifier MetadataStreamer::getAddressSpaceQualifer( +AddressSpaceQualifier +MetadataStreamerV2::getAddressSpaceQualifier( unsigned AddressSpace) const { switch (AddressSpace) { case AMDGPUAS::PRIVATE_ADDRESS: @@ -94,8 +100,8 @@ AddressSpaceQualifier MetadataStreamer::getAddressSpaceQualifer( } } -ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual, - StringRef BaseTypeName) const { +ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual, + StringRef BaseTypeName) const { if (TypeQual.find("pipe") != StringRef::npos) return ValueKind::Pipe; @@ -122,7 +128,7 @@ ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual, ValueKind::ByValue); } -ValueType MetadataStreamer::getValueType(Type *Ty, StringRef TypeName) const { +ValueType MetadataStreamerV2::getValueType(Type *Ty, StringRef TypeName) const { switch (Ty->getTypeID()) { case Type::IntegerTyID: { auto Signed = !TypeName.startswith("u"); @@ -154,7 +160,7 @@ ValueType MetadataStreamer::getValueType(Type *Ty, StringRef TypeName) const { } } -std::string MetadataStreamer::getTypeName(Type *Ty, bool Signed) const { +std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const { switch (Ty->getTypeID()) { case Type::IntegerTyID: { if (!Signed) @@ -191,8 +197,8 @@ std::string MetadataStreamer::getTypeName(Type *Ty, bool Signed) const { } } -std::vector<uint32_t> MetadataStreamer::getWorkGroupDimensions( - MDNode *Node) const { +std::vector<uint32_t> +MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const { std::vector<uint32_t> Dims; if (Node->getNumOperands() != 3) return Dims; @@ -202,9 +208,9 @@ std::vector<uint32_t> MetadataStreamer::getWorkGroupDimensions( return Dims; } -Kernel::CodeProps::Metadata MetadataStreamer::getHSACodeProps( - const MachineFunction &MF, - const SIProgramInfo &ProgramInfo) const { +Kernel::CodeProps::Metadata +MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF, + const SIProgramInfo &ProgramInfo) const { const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); HSAMD::Kernel::CodeProps::Metadata HSACodeProps; @@ -231,9 +237,9 @@ Kernel::CodeProps::Metadata MetadataStreamer::getHSACodeProps( return HSACodeProps; } -Kernel::DebugProps::Metadata MetadataStreamer::getHSADebugProps( - const MachineFunction &MF, - const SIProgramInfo &ProgramInfo) const { +Kernel::DebugProps::Metadata +MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF, + const SIProgramInfo &ProgramInfo) const { const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); HSAMD::Kernel::DebugProps::Metadata HSADebugProps; @@ -253,14 +259,14 @@ Kernel::DebugProps::Metadata MetadataStreamer::getHSADebugProps( return HSADebugProps; } -void MetadataStreamer::emitVersion() { +void MetadataStreamerV2::emitVersion() { auto &Version = HSAMetadata.mVersion; Version.push_back(VersionMajor); Version.push_back(VersionMinor); } -void MetadataStreamer::emitPrintf(const Module &Mod) { +void MetadataStreamerV2::emitPrintf(const Module &Mod) { auto &Printf = HSAMetadata.mPrintf; auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); @@ -272,7 +278,7 @@ void MetadataStreamer::emitPrintf(const Module &Mod) { Printf.push_back(cast<MDString>(Op->getOperand(0))->getString()); } -void MetadataStreamer::emitKernelLanguage(const Function &Func) { +void MetadataStreamerV2::emitKernelLanguage(const Function &Func) { auto &Kernel = HSAMetadata.mKernels.back(); // TODO: What about other languages? @@ -290,7 +296,7 @@ void MetadataStreamer::emitKernelLanguage(const Function &Func) { mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()); } -void MetadataStreamer::emitKernelAttrs(const Function &Func) { +void MetadataStreamerV2::emitKernelAttrs(const Function &Func) { auto &Attrs = HSAMetadata.mKernels.back().mAttrs; if (auto Node = Func.getMetadata("reqd_work_group_size")) @@ -308,14 +314,14 @@ void MetadataStreamer::emitKernelAttrs(const Function &Func) { } } -void MetadataStreamer::emitKernelArgs(const Function &Func) { +void MetadataStreamerV2::emitKernelArgs(const Function &Func) { for (auto &Arg : Func.args()) emitKernelArg(Arg); emitHiddenKernelArgs(Func); } -void MetadataStreamer::emitKernelArg(const Argument &Arg) { +void MetadataStreamerV2::emitKernelArg(const Argument &Arg) { auto Func = Arg.getParent(); auto ArgNo = Arg.getArgNo(); const MDNode *Node; @@ -368,12 +374,12 @@ void MetadataStreamer::emitKernelArg(const Argument &Arg) { PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual); } -void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty, - ValueKind ValueKind, - unsigned PointeeAlign, - StringRef Name, - StringRef TypeName, StringRef BaseTypeName, - StringRef AccQual, StringRef TypeQual) { +void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty, + ValueKind ValueKind, + unsigned PointeeAlign, StringRef Name, + StringRef TypeName, + StringRef BaseTypeName, + StringRef AccQual, StringRef TypeQual) { HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata()); auto &Arg = HSAMetadata.mKernels.back().mArgs.back(); @@ -386,7 +392,7 @@ void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty, Arg.mPointeeAlign = PointeeAlign; if (auto PtrTy = dyn_cast<PointerType>(Ty)) - Arg.mAddrSpaceQual = getAddressSpaceQualifer(PtrTy->getAddressSpace()); + Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace()); Arg.mAccQual = getAccessQualifier(AccQual); @@ -406,7 +412,7 @@ void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty, } } -void MetadataStreamer::emitHiddenKernelArgs(const Function &Func) { +void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) { int HiddenArgNumBytes = getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0); @@ -448,12 +454,16 @@ void MetadataStreamer::emitHiddenKernelArgs(const Function &Func) { } } -void MetadataStreamer::begin(const Module &Mod) { +bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) { + return TargetStreamer.EmitHSAMetadata(getHSAMetadata()); +} + +void MetadataStreamerV2::begin(const Module &Mod) { emitVersion(); emitPrintf(Mod); } -void MetadataStreamer::end() { +void MetadataStreamerV2::end() { std::string HSAMetadataString; if (toString(HSAMetadata, HSAMetadataString)) return; @@ -464,7 +474,8 @@ void MetadataStreamer::end() { verify(HSAMetadataString); } -void MetadataStreamer::emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) { +void MetadataStreamerV2::emitKernel(const MachineFunction &MF, + const SIProgramInfo &ProgramInfo) { auto &Func = MF.getFunction(); if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL) return; @@ -484,6 +495,505 @@ void MetadataStreamer::emitKernel(const MachineFunction &MF, const SIProgramInfo HSAMetadata.mKernels.back().mDebugProps = DebugProps; } +//===----------------------------------------------------------------------===// +// HSAMetadataStreamerV3 +//===----------------------------------------------------------------------===// + +void MetadataStreamerV3::dump(StringRef HSAMetadataString) const { + errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; +} + +void MetadataStreamerV3::verify(StringRef HSAMetadataString) const { + errs() << "AMDGPU HSA Metadata Parser Test: "; + + std::shared_ptr<msgpack::Node> FromHSAMetadataString = + std::make_shared<msgpack::MapNode>(); + + yaml::Input YIn(HSAMetadataString); + YIn >> FromHSAMetadataString; + if (YIn.error()) { + errs() << "FAIL\n"; + return; + } + + std::string ToHSAMetadataString; + raw_string_ostream StrOS(ToHSAMetadataString); + yaml::Output YOut(StrOS); + YOut << FromHSAMetadataString; + + errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n'; + if (HSAMetadataString != ToHSAMetadataString) { + errs() << "Original input: " << HSAMetadataString << '\n' + << "Produced output: " << StrOS.str() << '\n'; + } +} + +Optional<StringRef> +MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const { + return StringSwitch<Optional<StringRef>>(AccQual) + .Case("read_only", StringRef("read_only")) + .Case("write_only", StringRef("write_only")) + .Case("read_write", StringRef("read_write")) + .Default(None); +} + +Optional<StringRef> +MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const { + switch (AddressSpace) { + case AMDGPUAS::PRIVATE_ADDRESS: + return StringRef("private"); + case AMDGPUAS::GLOBAL_ADDRESS: + return StringRef("global"); + case AMDGPUAS::CONSTANT_ADDRESS: + return StringRef("constant"); + case AMDGPUAS::LOCAL_ADDRESS: + return StringRef("local"); + case AMDGPUAS::FLAT_ADDRESS: + return StringRef("generic"); + case AMDGPUAS::REGION_ADDRESS: + return StringRef("region"); + default: + return None; + } +} + +StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual, + StringRef BaseTypeName) const { + if (TypeQual.find("pipe") != StringRef::npos) + return "pipe"; + + return StringSwitch<StringRef>(BaseTypeName) + .Case("image1d_t", "image") + .Case("image1d_array_t", "image") + .Case("image1d_buffer_t", "image") + .Case("image2d_t", "image") + .Case("image2d_array_t", "image") + .Case("image2d_array_depth_t", "image") + .Case("image2d_array_msaa_t", "image") + .Case("image2d_array_msaa_depth_t", "image") + .Case("image2d_depth_t", "image") + .Case("image2d_msaa_t", "image") + .Case("image2d_msaa_depth_t", "image") + .Case("image3d_t", "image") + .Case("sampler_t", "sampler") + .Case("queue_t", "queue") + .Default(isa<PointerType>(Ty) + ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS + ? "dynamic_shared_pointer" + : "global_buffer") + : "by_value"); +} + +StringRef MetadataStreamerV3::getValueType(Type *Ty, StringRef TypeName) const { + switch (Ty->getTypeID()) { + case Type::IntegerTyID: { + auto Signed = !TypeName.startswith("u"); + switch (Ty->getIntegerBitWidth()) { + case 8: + return Signed ? "i8" : "u8"; + case 16: + return Signed ? "i16" : "u16"; + case 32: + return Signed ? "i32" : "u32"; + case 64: + return Signed ? "i64" : "u64"; + default: + return "struct"; + } + } + case Type::HalfTyID: + return "f16"; + case Type::FloatTyID: + return "f32"; + case Type::DoubleTyID: + return "f64"; + case Type::PointerTyID: + return getValueType(Ty->getPointerElementType(), TypeName); + case Type::VectorTyID: + return getValueType(Ty->getVectorElementType(), TypeName); + default: + return "struct"; + } +} + +std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const { + switch (Ty->getTypeID()) { + case Type::IntegerTyID: { + if (!Signed) + return (Twine('u') + getTypeName(Ty, true)).str(); + + auto BitWidth = Ty->getIntegerBitWidth(); + switch (BitWidth) { + case 8: + return "char"; + case 16: + return "short"; + case 32: + return "int"; + case 64: + return "long"; + default: + return (Twine('i') + Twine(BitWidth)).str(); + } + } + case Type::HalfTyID: + return "half"; + case Type::FloatTyID: + return "float"; + case Type::DoubleTyID: + return "double"; + case Type::VectorTyID: { + auto VecTy = cast<VectorType>(Ty); + auto ElTy = VecTy->getElementType(); + auto NumElements = VecTy->getVectorNumElements(); + return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); + } + default: + return "unknown"; + } +} + +std::shared_ptr<msgpack::ArrayNode> +MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const { + auto Dims = std::make_shared<msgpack::ArrayNode>(); + if (Node->getNumOperands() != 3) + return Dims; + + for (auto &Op : Node->operands()) + Dims->push_back(std::make_shared<msgpack::ScalarNode>( + mdconst::extract<ConstantInt>(Op)->getZExtValue())); + return Dims; +} + +void MetadataStreamerV3::emitVersion() { + auto Version = std::make_shared<msgpack::ArrayNode>(); + Version->push_back(std::make_shared<msgpack::ScalarNode>(V3::VersionMajor)); + Version->push_back(std::make_shared<msgpack::ScalarNode>(V3::VersionMinor)); + getRootMetadata("amdhsa.version") = std::move(Version); +} + +void MetadataStreamerV3::emitPrintf(const Module &Mod) { + auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); + if (!Node) + return; + + auto Printf = std::make_shared<msgpack::ArrayNode>(); + for (auto Op : Node->operands()) + if (Op->getNumOperands()) + Printf->push_back(std::make_shared<msgpack::ScalarNode>( + cast<MDString>(Op->getOperand(0))->getString())); + getRootMetadata("amdhsa.printf") = std::move(Printf); +} + +void MetadataStreamerV3::emitKernelLanguage(const Function &Func, + msgpack::MapNode &Kern) { + // TODO: What about other languages? + auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); + if (!Node || !Node->getNumOperands()) + return; + auto Op0 = Node->getOperand(0); + if (Op0->getNumOperands() <= 1) + return; + + Kern[".language"] = std::make_shared<msgpack::ScalarNode>("OpenCL C"); + auto LanguageVersion = std::make_shared<msgpack::ArrayNode>(); + LanguageVersion->push_back(std::make_shared<msgpack::ScalarNode>( + mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue())); + LanguageVersion->push_back(std::make_shared<msgpack::ScalarNode>( + mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue())); + Kern[".language_version"] = std::move(LanguageVersion); +} + +void MetadataStreamerV3::emitKernelAttrs(const Function &Func, + msgpack::MapNode &Kern) { + + if (auto Node = Func.getMetadata("reqd_work_group_size")) + Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node); + if (auto Node = Func.getMetadata("work_group_size_hint")) + Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node); + if (auto Node = Func.getMetadata("vec_type_hint")) { + Kern[".vec_type_hint"] = std::make_shared<msgpack::ScalarNode>(getTypeName( + cast<ValueAsMetadata>(Node->getOperand(0))->getType(), + mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue())); + } + if (Func.hasFnAttribute("runtime-handle")) { + Kern[".device_enqueue_symbol"] = std::make_shared<msgpack::ScalarNode>( + Func.getFnAttribute("runtime-handle").getValueAsString().str()); + } +} + +void MetadataStreamerV3::emitKernelArgs(const Function &Func, + msgpack::MapNode &Kern) { + unsigned Offset = 0; + auto Args = std::make_shared<msgpack::ArrayNode>(); + for (auto &Arg : Func.args()) + emitKernelArg(Arg, Offset, *Args); + + emitHiddenKernelArgs(Func, Offset, *Args); + + // TODO: What about other languages? + if (Func.getParent()->getNamedMetadata("opencl.ocl.version")) { + auto &DL = Func.getParent()->getDataLayout(); + auto Int64Ty = Type::getInt64Ty(Func.getContext()); + + emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, *Args); + emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, *Args); + emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, *Args); + + auto Int8PtrTy = + Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); + + // Emit "printf buffer" argument if printf is used, otherwise emit dummy + // "none" argument. + if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) + emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, *Args); + else + emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args); + + // Emit "default queue" and "completion action" arguments if enqueue kernel + // is used, otherwise emit dummy "none" arguments. + if (Func.hasFnAttribute("calls-enqueue-kernel")) { + emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, *Args); + emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, *Args); + } else { + emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args); + emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args); + } + } + + Kern[".args"] = std::move(Args); +} + +void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset, + msgpack::ArrayNode &Args) { + auto Func = Arg.getParent(); + auto ArgNo = Arg.getArgNo(); + const MDNode *Node; + + StringRef Name; + Node = Func->getMetadata("kernel_arg_name"); + if (Node && ArgNo < Node->getNumOperands()) + Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); + else if (Arg.hasName()) + Name = Arg.getName(); + + StringRef TypeName; + Node = Func->getMetadata("kernel_arg_type"); + if (Node && ArgNo < Node->getNumOperands()) + TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); + + StringRef BaseTypeName; + Node = Func->getMetadata("kernel_arg_base_type"); + if (Node && ArgNo < Node->getNumOperands()) + BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); + + StringRef AccQual; + if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() && + Arg.hasNoAliasAttr()) { + AccQual = "read_only"; + } else { + Node = Func->getMetadata("kernel_arg_access_qual"); + if (Node && ArgNo < Node->getNumOperands()) + AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); + } + + StringRef TypeQual; + Node = Func->getMetadata("kernel_arg_type_qual"); + if (Node && ArgNo < Node->getNumOperands()) + TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); + + Type *Ty = Arg.getType(); + const DataLayout &DL = Func->getParent()->getDataLayout(); + + unsigned PointeeAlign = 0; + if (auto PtrTy = dyn_cast<PointerType>(Ty)) { + if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { + PointeeAlign = Arg.getParamAlignment(); + if (PointeeAlign == 0) + PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType()); + } + } + + emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(), + getValueKind(Arg.getType(), TypeQual, BaseTypeName), Offset, + Args, PointeeAlign, Name, TypeName, BaseTypeName, AccQual, + TypeQual); +} + +void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty, + StringRef ValueKind, unsigned &Offset, + msgpack::ArrayNode &Args, + unsigned PointeeAlign, StringRef Name, + StringRef TypeName, + StringRef BaseTypeName, + StringRef AccQual, StringRef TypeQual) { + auto ArgPtr = std::make_shared<msgpack::MapNode>(); + auto &Arg = *ArgPtr; + + if (!Name.empty()) + Arg[".name"] = std::make_shared<msgpack::ScalarNode>(Name); + if (!TypeName.empty()) + Arg[".type_name"] = std::make_shared<msgpack::ScalarNode>(TypeName); + auto Size = DL.getTypeAllocSize(Ty); + auto Align = DL.getABITypeAlignment(Ty); + Arg[".size"] = std::make_shared<msgpack::ScalarNode>(Size); + Offset = alignTo(Offset, Align); + Arg[".offset"] = std::make_shared<msgpack::ScalarNode>(Offset); + Offset += Size; + Arg[".value_kind"] = std::make_shared<msgpack::ScalarNode>(ValueKind); + Arg[".value_type"] = + std::make_shared<msgpack::ScalarNode>(getValueType(Ty, BaseTypeName)); + if (PointeeAlign) + Arg[".pointee_align"] = std::make_shared<msgpack::ScalarNode>(PointeeAlign); + + if (auto PtrTy = dyn_cast<PointerType>(Ty)) + if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace())) + Arg[".address_space"] = std::make_shared<msgpack::ScalarNode>(*Qualifier); + + if (auto AQ = getAccessQualifier(AccQual)) + Arg[".access"] = std::make_shared<msgpack::ScalarNode>(*AQ); + + // TODO: Emit Arg[".actual_access"]. + + SmallVector<StringRef, 1> SplitTypeQuals; + TypeQual.split(SplitTypeQuals, " ", -1, false); + for (StringRef Key : SplitTypeQuals) { + if (Key == "const") + Arg[".is_const"] = std::make_shared<msgpack::ScalarNode>(true); + else if (Key == "restrict") + Arg[".is_restrict"] = std::make_shared<msgpack::ScalarNode>(true); + else if (Key == "volatile") + Arg[".is_volatile"] = std::make_shared<msgpack::ScalarNode>(true); + else if (Key == "pipe") + Arg[".is_pipe"] = std::make_shared<msgpack::ScalarNode>(true); + } + + Args.push_back(std::move(ArgPtr)); +} + +void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func, + unsigned &Offset, + msgpack::ArrayNode &Args) { + int HiddenArgNumBytes = + getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0); + + if (!HiddenArgNumBytes) + return; + + auto &DL = Func.getParent()->getDataLayout(); + auto Int64Ty = Type::getInt64Ty(Func.getContext()); + + if (HiddenArgNumBytes >= 8) + emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, Args); + if (HiddenArgNumBytes >= 16) + emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, Args); + if (HiddenArgNumBytes >= 24) + emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, Args); + + auto Int8PtrTy = + Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); + + // Emit "printf buffer" argument if printf is used, otherwise emit dummy + // "none" argument. + if (HiddenArgNumBytes >= 32) { + if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) + emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args); + else + emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args); + } + + // Emit "default queue" and "completion action" arguments if enqueue kernel is + // used, otherwise emit dummy "none" arguments. + if (HiddenArgNumBytes >= 48) { + if (Func.hasFnAttribute("calls-enqueue-kernel")) { + emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, Args); + emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, Args); + } else { + emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args); + emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args); + } + } +} + +std::shared_ptr<msgpack::MapNode> +MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF, + const SIProgramInfo &ProgramInfo) const { + const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); + const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); + const Function &F = MF.getFunction(); + + auto HSAKernelProps = std::make_shared<msgpack::MapNode>(); + auto &Kern = *HSAKernelProps; + + unsigned MaxKernArgAlign; + Kern[".kernarg_segment_size"] = std::make_shared<msgpack::ScalarNode>( + STM.getKernArgSegmentSize(F, MaxKernArgAlign)); + Kern[".group_segment_fixed_size"] = + std::make_shared<msgpack::ScalarNode>(ProgramInfo.LDSSize); + Kern[".private_segment_fixed_size"] = + std::make_shared<msgpack::ScalarNode>(ProgramInfo.ScratchSize); + Kern[".kernarg_segment_align"] = + std::make_shared<msgpack::ScalarNode>(std::max(uint32_t(4), MaxKernArgAlign)); + Kern[".wavefront_size"] = + std::make_shared<msgpack::ScalarNode>(STM.getWavefrontSize()); + Kern[".sgpr_count"] = std::make_shared<msgpack::ScalarNode>(ProgramInfo.NumSGPR); + Kern[".vgpr_count"] = std::make_shared<msgpack::ScalarNode>(ProgramInfo.NumVGPR); + Kern[".max_flat_workgroup_size"] = + std::make_shared<msgpack::ScalarNode>(MFI.getMaxFlatWorkGroupSize()); + Kern[".sgpr_spill_count"] = + std::make_shared<msgpack::ScalarNode>(MFI.getNumSpilledSGPRs()); + Kern[".vgpr_spill_count"] = + std::make_shared<msgpack::ScalarNode>(MFI.getNumSpilledVGPRs()); + + return HSAKernelProps; +} + +bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) { + return TargetStreamer.EmitHSAMetadata(getHSAMetadataRoot(), true); +} + +void MetadataStreamerV3::begin(const Module &Mod) { + emitVersion(); + emitPrintf(Mod); + getRootMetadata("amdhsa.kernels").reset(new msgpack::ArrayNode()); +} + +void MetadataStreamerV3::end() { + std::string HSAMetadataString; + raw_string_ostream StrOS(HSAMetadataString); + yaml::Output YOut(StrOS); + YOut << HSAMetadataRoot; + + if (DumpHSAMetadata) + dump(StrOS.str()); + if (VerifyHSAMetadata) + verify(StrOS.str()); +} + +void MetadataStreamerV3::emitKernel(const MachineFunction &MF, + const SIProgramInfo &ProgramInfo) { + auto &Func = MF.getFunction(); + auto KernelProps = getHSAKernelProps(MF, ProgramInfo); + + assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL || + Func.getCallingConv() == CallingConv::SPIR_KERNEL); + + auto &KernelsNode = getRootMetadata("amdhsa.kernels"); + auto Kernels = cast<msgpack::ArrayNode>(KernelsNode.get()); + + { + auto &Kern = *KernelProps; + Kern[".name"] = std::make_shared<msgpack::ScalarNode>(Func.getName()); + Kern[".symbol"] = std::make_shared<msgpack::ScalarNode>( + (Twine(Func.getName()) + Twine(".kd")).str()); + emitKernelLanguage(Func, Kern); + emitKernelAttrs(Func, Kern); + emitKernelArgs(Func, Kern); + } + + Kernels->push_back(std::move(KernelProps)); +} + } // end namespace HSAMD } // end namespace AMDGPU } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h index a1e08235a5e..afc09baf952 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h @@ -19,10 +19,12 @@ #include "AMDGPU.h" #include "AMDKernelCodeT.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/MsgPackTypes.h" #include "llvm/Support/AMDGPUMetadata.h" namespace llvm { +class AMDGPUTargetStreamer; class Argument; class DataLayout; class Function; @@ -34,7 +36,92 @@ class Type; namespace AMDGPU { namespace HSAMD { -class MetadataStreamer final { +class MetadataStreamer { +public: + virtual ~MetadataStreamer(){}; + + virtual bool emitTo(AMDGPUTargetStreamer &TargetStreamer) = 0; + + virtual void begin(const Module &Mod) = 0; + + virtual void end() = 0; + + virtual void emitKernel(const MachineFunction &MF, + const SIProgramInfo &ProgramInfo) = 0; +}; + +class MetadataStreamerV3 final : public MetadataStreamer { +private: + std::shared_ptr<msgpack::Node> HSAMetadataRoot = + std::make_shared<msgpack::MapNode>(); + + void dump(StringRef HSAMetadataString) const; + + void verify(StringRef HSAMetadataString) const; + + Optional<StringRef> getAccessQualifier(StringRef AccQual) const; + + Optional<StringRef> getAddressSpaceQualifier(unsigned AddressSpace) const; + + StringRef getValueKind(Type *Ty, StringRef TypeQual, + StringRef BaseTypeName) const; + + StringRef getValueType(Type *Ty, StringRef TypeName) const; + + std::string getTypeName(Type *Ty, bool Signed) const; + + std::shared_ptr<msgpack::ArrayNode> + getWorkGroupDimensions(MDNode *Node) const; + + std::shared_ptr<msgpack::MapNode> + getHSAKernelProps(const MachineFunction &MF, + const SIProgramInfo &ProgramInfo) const; + + void emitVersion(); + + void emitPrintf(const Module &Mod); + + void emitKernelLanguage(const Function &Func, msgpack::MapNode &Kern); + + void emitKernelAttrs(const Function &Func, msgpack::MapNode &Kern); + + void emitKernelArgs(const Function &Func, msgpack::MapNode &Kern); + + void emitKernelArg(const Argument &Arg, unsigned &Offset, + msgpack::ArrayNode &Args); + + void emitKernelArg(const DataLayout &DL, Type *Ty, StringRef ValueKind, + unsigned &Offset, msgpack::ArrayNode &Args, + unsigned PointeeAlign = 0, StringRef Name = "", + StringRef TypeName = "", StringRef BaseTypeName = "", + StringRef AccQual = "", StringRef TypeQual = ""); + + void emitHiddenKernelArgs(const Function &Func, unsigned &Offset, + msgpack::ArrayNode &Args); + + std::shared_ptr<msgpack::Node> &getRootMetadata(StringRef Key) { + return (*cast<msgpack::MapNode>(HSAMetadataRoot.get()))[Key]; + } + + std::shared_ptr<msgpack::Node> &getHSAMetadataRoot() { + return HSAMetadataRoot; + } + +public: + MetadataStreamerV3() = default; + ~MetadataStreamerV3() = default; + + bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override; + + void begin(const Module &Mod) override; + + void end() override; + + void emitKernel(const MachineFunction &MF, + const SIProgramInfo &ProgramInfo) override; +}; + +class MetadataStreamerV2 final : public MetadataStreamer { private: Metadata HSAMetadata; @@ -44,7 +131,7 @@ private: AccessQualifier getAccessQualifier(StringRef AccQual) const; - AddressSpaceQualifier getAddressSpaceQualifer(unsigned AddressSpace) const; + AddressSpaceQualifier getAddressSpaceQualifier(unsigned AddressSpace) const; ValueKind getValueKind(Type *Ty, StringRef TypeQual, StringRef BaseTypeName) const; @@ -82,19 +169,22 @@ private: void emitHiddenKernelArgs(const Function &Func); -public: - MetadataStreamer() = default; - ~MetadataStreamer() = default; - const Metadata &getHSAMetadata() const { return HSAMetadata; } - void begin(const Module &Mod); +public: + MetadataStreamerV2() = default; + ~MetadataStreamerV2() = default; + + bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override; + + void begin(const Module &Mod) override; - void end(); + void end() override; - void emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo); + void emitKernel(const MachineFunction &MF, + const SIProgramInfo &ProgramInfo) override; }; } // end namespace HSAMD diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h b/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h index b50a2eb8e9e..2feff14d34a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h @@ -23,7 +23,8 @@ namespace ElfNote { const char SectionName[] = ".note"; -const char NoteName[] = "AMD"; +const char NoteNameV2[] = "AMD"; +const char NoteNameV3[] = "AMDGPU"; // TODO: Remove this file once we drop code object v2. enum NoteType{ diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index e6a718bcb30..3f9af27a2e5 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -3065,9 +3065,18 @@ bool AMDGPUAsmParser::ParseDirectiveISAVersion() { } bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { + const char *AssemblerDirectiveBegin; + const char *AssemblerDirectiveEnd; + std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = + AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) + ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, + HSAMD::V3::AssemblerDirectiveEnd) + : std::make_tuple(HSAMD::AssemblerDirectiveBegin, + HSAMD::AssemblerDirectiveEnd); + if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { return Error(getParser().getTok().getLoc(), - (Twine(HSAMD::AssemblerDirectiveBegin) + Twine(" directive is " + (Twine(AssemblerDirectiveBegin) + Twine(" directive is " "not available on non-amdhsa OSes")).str()); } @@ -3085,7 +3094,7 @@ bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { if (getLexer().is(AsmToken::Identifier)) { StringRef ID = getLexer().getTok().getIdentifier(); - if (ID == AMDGPU::HSAMD::AssemblerDirectiveEnd) { + if (ID == AssemblerDirectiveEnd) { Lex(); FoundEnd = true; break; @@ -3107,8 +3116,13 @@ bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { YamlStream.flush(); - if (!getTargetStreamer().EmitHSAMetadata(HSAMetadataString)) - return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); + if (IsaInfo::hasCodeObjectV3(&getSTI())) { + if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) + return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); + } else { + if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) + return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); + } return false; } @@ -3145,6 +3159,10 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { if (IDVal == ".amdhsa_kernel") return ParseDirectiveAMDHSAKernel(); + + // TODO: Restructure/combine with PAL metadata directive. + if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) + return ParseDirectiveHSAMetadata(); } else { if (IDVal == ".hsa_code_object_version") return ParseDirectiveHSACodeObjectVersion(); @@ -3160,10 +3178,10 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { if (IDVal == ".amd_amdgpu_isa") return ParseDirectiveISAVersion(); - } - if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) - return ParseDirectiveHSAMetadata(); + if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) + return ParseDirectiveHSAMetadata(); + } if (IDVal == PALMD::AssemblerDirective) return ParseDirectivePALMetadata(); diff --git a/llvm/lib/Target/AMDGPU/LLVMBuild.txt b/llvm/lib/Target/AMDGPU/LLVMBuild.txt index c54a13c4b4d..e591d756a54 100644 --- a/llvm/lib/Target/AMDGPU/LLVMBuild.txt +++ b/llvm/lib/Target/AMDGPU/LLVMBuild.txt @@ -30,5 +30,5 @@ has_disassembler = 1 type = Library name = AMDGPUCodeGen parent = AMDGPU -required_libraries = Analysis AsmPrinter CodeGen Core IPO MC AMDGPUAsmPrinter AMDGPUDesc AMDGPUInfo AMDGPUUtils Scalar SelectionDAG Support Target TransformUtils Vectorize GlobalISel +required_libraries = Analysis AsmPrinter CodeGen Core IPO MC AMDGPUAsmPrinter AMDGPUDesc AMDGPUInfo AMDGPUUtils Scalar SelectionDAG Support Target TransformUtils Vectorize GlobalISel BinaryFormat add_to_library_groups = AMDGPU diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 225bf5b7816..c17fe126546 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -17,7 +17,9 @@ #include "Utils/AMDGPUBaseInfo.h" #include "Utils/AMDKernelCodeTUtils.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h" #include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/MsgPackTypes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Metadata.h" @@ -35,12 +37,13 @@ namespace llvm { using namespace llvm; using namespace llvm::AMDGPU; +using namespace llvm::AMDGPU::HSAMD; //===----------------------------------------------------------------------===// // AMDGPUTargetStreamer //===----------------------------------------------------------------------===// -bool AMDGPUTargetStreamer::EmitHSAMetadata(StringRef HSAMetadataString) { +bool AMDGPUTargetStreamer::EmitHSAMetadataV2(StringRef HSAMetadataString) { HSAMD::Metadata HSAMetadata; if (HSAMD::fromString(HSAMetadataString, HSAMetadata)) return false; @@ -48,6 +51,15 @@ bool AMDGPUTargetStreamer::EmitHSAMetadata(StringRef HSAMetadataString) { return EmitHSAMetadata(HSAMetadata); } +bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) { + std::shared_ptr<msgpack::Node> HSAMetadataRoot; + yaml::Input YIn(HSAMetadataString); + YIn >> HSAMetadataRoot; + if (YIn.error()) + return false; + return EmitHSAMetadata(HSAMetadataRoot, false); +} + StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) { AMDGPU::GPUKind AK; @@ -195,9 +207,26 @@ bool AMDGPUTargetAsmStreamer::EmitHSAMetadata( if (HSAMD::toString(HSAMetadata, HSAMetadataString)) return false; - OS << '\t' << HSAMD::AssemblerDirectiveBegin << '\n'; + OS << '\t' << AssemblerDirectiveBegin << '\n'; OS << HSAMetadataString << '\n'; - OS << '\t' << HSAMD::AssemblerDirectiveEnd << '\n'; + OS << '\t' << AssemblerDirectiveEnd << '\n'; + return true; +} + +bool AMDGPUTargetAsmStreamer::EmitHSAMetadata( + std::shared_ptr<msgpack::Node> &HSAMetadataRoot, bool Strict) { + V3::MetadataVerifier Verifier(Strict); + if (!Verifier.verify(*HSAMetadataRoot)) + return false; + + std::string HSAMetadataString; + raw_string_ostream StrOS(HSAMetadataString); + yaml::Output YOut(StrOS); + YOut << HSAMetadataRoot; + + OS << '\t' << V3::AssemblerDirectiveBegin << '\n'; + OS << StrOS.str() << '\n'; + OS << '\t' << V3::AssemblerDirectiveEnd << '\n'; return true; } @@ -358,13 +387,13 @@ MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() { return static_cast<MCELFStreamer &>(Streamer); } -void AMDGPUTargetELFStreamer::EmitAMDGPUNote( - const MCExpr *DescSZ, unsigned NoteType, +void AMDGPUTargetELFStreamer::EmitNote( + StringRef Name, const MCExpr *DescSZ, unsigned NoteType, function_ref<void(MCELFStreamer &)> EmitDesc) { auto &S = getStreamer(); auto &Context = S.getContext(); - auto NameSZ = sizeof(ElfNote::NoteName); + auto NameSZ = Name.size() + 1; S.PushSection(); S.SwitchSection(Context.getELFSection( @@ -372,7 +401,7 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUNote( S.EmitIntValue(NameSZ, 4); // namesz S.EmitValue(DescSZ, 4); // descz S.EmitIntValue(NoteType, 4); // type - S.EmitBytes(StringRef(ElfNote::NoteName, NameSZ)); // name + S.EmitBytes(Name); // name S.EmitValueToAlignment(4, 0, 1, 0); // padding 0 EmitDesc(S); // desc S.EmitValueToAlignment(4, 0, 1, 0); // padding 0 @@ -384,14 +413,11 @@ void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {} void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion( uint32_t Major, uint32_t Minor) { - EmitAMDGPUNote( - MCConstantExpr::create(8, getContext()), - ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION, - [&](MCELFStreamer &OS){ - OS.EmitIntValue(Major, 4); - OS.EmitIntValue(Minor, 4); - } - ); + EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(8, getContext()), + ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION, [&](MCELFStreamer &OS) { + OS.EmitIntValue(Major, 4); + OS.EmitIntValue(Minor, 4); + }); } void @@ -407,21 +433,18 @@ AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major, sizeof(Major) + sizeof(Minor) + sizeof(Stepping) + VendorNameSize + ArchNameSize; - EmitAMDGPUNote( - MCConstantExpr::create(DescSZ, getContext()), - ElfNote::NT_AMDGPU_HSA_ISA, - [&](MCELFStreamer &OS) { - OS.EmitIntValue(VendorNameSize, 2); - OS.EmitIntValue(ArchNameSize, 2); - OS.EmitIntValue(Major, 4); - OS.EmitIntValue(Minor, 4); - OS.EmitIntValue(Stepping, 4); - OS.EmitBytes(VendorName); - OS.EmitIntValue(0, 1); // NULL terminate VendorName - OS.EmitBytes(ArchName); - OS.EmitIntValue(0, 1); // NULL terminte ArchName - } - ); + EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(DescSZ, getContext()), + ElfNote::NT_AMDGPU_HSA_ISA, [&](MCELFStreamer &OS) { + OS.EmitIntValue(VendorNameSize, 2); + OS.EmitIntValue(ArchNameSize, 2); + OS.EmitIntValue(Major, 4); + OS.EmitIntValue(Minor, 4); + OS.EmitIntValue(Stepping, 4); + OS.EmitBytes(VendorName); + OS.EmitIntValue(0, 1); // NULL terminate VendorName + OS.EmitBytes(ArchName); + OS.EmitIntValue(0, 1); // NULL terminte ArchName + }); } void @@ -450,15 +473,41 @@ bool AMDGPUTargetELFStreamer::EmitISAVersion(StringRef IsaVersionString) { MCSymbolRefExpr::create(DescEnd, Context), MCSymbolRefExpr::create(DescBegin, Context), Context); - EmitAMDGPUNote( - DescSZ, - ELF::NT_AMD_AMDGPU_ISA, - [&](MCELFStreamer &OS) { - OS.EmitLabel(DescBegin); - OS.EmitBytes(IsaVersionString); - OS.EmitLabel(DescEnd); - } - ); + EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_ISA, + [&](MCELFStreamer &OS) { + OS.EmitLabel(DescBegin); + OS.EmitBytes(IsaVersionString); + OS.EmitLabel(DescEnd); + }); + return true; +} + +bool AMDGPUTargetELFStreamer::EmitHSAMetadata( + std::shared_ptr<msgpack::Node> &HSAMetadataRoot, bool Strict) { + V3::MetadataVerifier Verifier(Strict); + if (!Verifier.verify(*HSAMetadataRoot)) + return false; + + std::string HSAMetadataString; + raw_string_ostream StrOS(HSAMetadataString); + msgpack::Writer MPWriter(StrOS); + HSAMetadataRoot->write(MPWriter); + + // Create two labels to mark the beginning and end of the desc field + // and a MCExpr to calculate the size of the desc field. + auto &Context = getContext(); + auto *DescBegin = Context.createTempSymbol(); + auto *DescEnd = Context.createTempSymbol(); + auto *DescSZ = MCBinaryExpr::createSub( + MCSymbolRefExpr::create(DescEnd, Context), + MCSymbolRefExpr::create(DescBegin, Context), Context); + + EmitNote(ElfNote::NoteNameV3, DescSZ, ELF::NT_AMDGPU_METADATA, + [&](MCELFStreamer &OS) { + OS.EmitLabel(DescBegin); + OS.EmitBytes(StrOS.str()); + OS.EmitLabel(DescEnd); + }); return true; } @@ -477,28 +526,24 @@ bool AMDGPUTargetELFStreamer::EmitHSAMetadata( MCSymbolRefExpr::create(DescEnd, Context), MCSymbolRefExpr::create(DescBegin, Context), Context); - EmitAMDGPUNote( - DescSZ, - ELF::NT_AMD_AMDGPU_HSA_METADATA, - [&](MCELFStreamer &OS) { - OS.EmitLabel(DescBegin); - OS.EmitBytes(HSAMetadataString); - OS.EmitLabel(DescEnd); - } - ); + EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_HSA_METADATA, + [&](MCELFStreamer &OS) { + OS.EmitLabel(DescBegin); + OS.EmitBytes(HSAMetadataString); + OS.EmitLabel(DescEnd); + }); return true; } bool AMDGPUTargetELFStreamer::EmitPALMetadata( const PALMD::Metadata &PALMetadata) { - EmitAMDGPUNote( - MCConstantExpr::create(PALMetadata.size() * sizeof(uint32_t), getContext()), - ELF::NT_AMD_AMDGPU_PAL_METADATA, - [&](MCELFStreamer &OS){ - for (auto I : PALMetadata) - OS.EmitIntValue(I, sizeof(uint32_t)); - } - ); + EmitNote(ElfNote::NoteNameV2, + MCConstantExpr::create(PALMetadata.size() * sizeof(uint32_t), + getContext()), + ELF::NT_AMD_AMDGPU_PAL_METADATA, [&](MCELFStreamer &OS) { + for (auto I : PALMetadata) + OS.EmitIntValue(I, sizeof(uint32_t)); + }); return true; } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h index 2b885592f32..9a807c804f9 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -11,6 +11,7 @@ #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H #include "AMDKernelCodeT.h" +#include "llvm/BinaryFormat/MsgPackTypes.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/AMDGPUMetadata.h" @@ -52,7 +53,20 @@ public: virtual bool EmitISAVersion(StringRef IsaVersionString) = 0; /// \returns True on success, false on failure. - virtual bool EmitHSAMetadata(StringRef HSAMetadataString); + virtual bool EmitHSAMetadataV2(StringRef HSAMetadataString); + + /// \returns True on success, false on failure. + virtual bool EmitHSAMetadataV3(StringRef HSAMetadataString); + + /// Emit HSA Metadata + /// + /// When \p Strict is true, known metadata elements must already be + /// well-typed. When \p Strict is false, known types are inferred and + /// the \p HSAMetadata structure is updated with the correct types. + /// + /// \returns True on success, false on failure. + virtual bool EmitHSAMetadata(std::shared_ptr<msgpack::Node> &HSAMetadata, + bool Strict) = 0; /// \returns True on success, false on failure. virtual bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) = 0; @@ -92,6 +106,10 @@ public: bool EmitISAVersion(StringRef IsaVersionString) override; /// \returns True on success, false on failure. + bool EmitHSAMetadata(std::shared_ptr<msgpack::Node> &HSAMetadata, + bool Strict) override; + + /// \returns True on success, false on failure. bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override; /// \returns True on success, false on failure. @@ -107,8 +125,8 @@ public: class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer { MCStreamer &Streamer; - void EmitAMDGPUNote(const MCExpr *DescSize, unsigned NoteType, - function_ref<void(MCELFStreamer &)> EmitDesc); + void EmitNote(StringRef Name, const MCExpr *DescSize, unsigned NoteType, + function_ref<void(MCELFStreamer &)> EmitDesc); public: AMDGPUTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI); @@ -132,6 +150,10 @@ public: bool EmitISAVersion(StringRef IsaVersionString) override; /// \returns True on success, false on failure. + bool EmitHSAMetadata(std::shared_ptr<msgpack::Node> &HSAMetadata, + bool Strict) override; + + /// \returns True on success, false on failure. bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override; /// \returns True on success, false on failure. diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt b/llvm/lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt index 773ee7c0a4b..bc910a470d7 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = AMDGPUDesc parent = AMDGPU -required_libraries = Core MC AMDGPUAsmPrinter AMDGPUInfo AMDGPUUtils Support +required_libraries = Core MC AMDGPUAsmPrinter AMDGPUInfo AMDGPUUtils Support BinaryFormat add_to_library_groups = AMDGPU |