diff options
author | Yaxun Liu <Yaxun.Liu@amd.com> | 2016-12-14 17:16:52 +0000 |
---|---|---|
committer | Yaxun Liu <Yaxun.Liu@amd.com> | 2016-12-14 17:16:52 +0000 |
commit | 07d659bc76fdd5752cc60cec7c56689164ea3078 (patch) | |
tree | bb5d7e680de2252bde3639314bc2fe2135af7be8 /llvm/lib/Target | |
parent | ebd8110aa1c49d3873be91eb28da5cb9d3cd9beb (diff) | |
download | bcm5719-llvm-07d659bc76fdd5752cc60cec7c56689164ea3078.tar.gz bcm5719-llvm-07d659bc76fdd5752cc60cec7c56689164ea3078.zip |
AMDGPU: Emit runtime metadata version 2 as YAML
Differential Revision: https://reviews.llvm.org/D25046
llvm-svn: 289674
Diffstat (limited to 'llvm/lib/Target')
7 files changed, 550 insertions, 403 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 7b5ebc57436..5402298db78 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -119,7 +119,7 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) { "AMD", "AMDGPU"); // Emit runtime metadata. - TS->emitRuntimeMetadataAsNoteElement(M); + TS->emitRuntimeMetadata(M); } bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough( @@ -824,4 +824,3 @@ bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, *TM.getSubtargetImpl(*MF->getFunction())->getRegisterInfo()); return false; } - diff --git a/llvm/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h b/llvm/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h index f39db6cc943..563db5f3300 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h @@ -14,17 +14,12 @@ /// Runtime requests certain information (metadata) about kernels to be able /// to execute the kernels and answer the queries about the kernels. /// The metadata is represented as a note element in the .note ELF section of a -/// binary (code object). The desc field of the note element consists of -/// key-value pairs. Each key is an 8 bit unsigned integer. Each value can be -/// an integer, a string, or a stream of key-value pairs. There are 3 levels of -/// key-value pair streams. At the beginning of the ELF section is the top level -/// key-value pair stream. A kernel-level key-value pair stream starts after -/// encountering KeyKernelBegin and ends immediately before encountering -/// KeyKernelEnd. A kernel-argument-level key-value pair stream starts -/// after encountering KeyArgBegin and ends immediately before encountering -/// KeyArgEnd. A kernel-level key-value pair stream can only appear in a top -/// level key-value pair stream. A kernel-argument-level key-value pair stream -/// can only appear in a kernel-level key-value pair stream. +/// binary (code object). The desc field of the note element is a YAML string +/// consisting of key-value pairs. Each key is a string. Each value can be +/// an integer, a string, or an YAML sequence. There are 3 levels of YAML maps. +/// At the beginning of the YAML string is the module level YAML map. A +/// kernel-level YAML map is in the amd.Kernels sequence. A +/// kernel-argument-level map is in the amd.Args sequence. /// /// The format should be kept backward compatible. New enum values and bit /// fields should be appended at the end. It is suggested to bump up the @@ -37,64 +32,46 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H +#include <cstdint> +#include <vector> +#include <string> + namespace AMDGPU { namespace RuntimeMD { // Version and revision of runtime metadata - const unsigned char MDVersion = 1; + const unsigned char MDVersion = 2; const unsigned char MDRevision = 0; - // Enumeration values of keys in runtime metadata. - enum Key { - KeyNull = 0, // Place holder. Ignored when encountered - KeyMDVersion = 1, // Runtime metadata version - KeyLanguage = 2, // Language - KeyLanguageVersion = 3, // Language version - KeyKernelBegin = 4, // Beginning of kernel-level stream - KeyKernelEnd = 5, // End of kernel-level stream - KeyKernelName = 6, // Kernel name - KeyArgBegin = 7, // Beginning of kernel-arg-level stream - KeyArgEnd = 8, // End of kernel-arg-level stream - KeyArgSize = 9, // Kernel arg size - KeyArgAlign = 10, // Kernel arg alignment - KeyArgTypeName = 11, // Kernel type name - KeyArgName = 12, // Kernel name - KeyArgKind = 13, // Kernel argument kind - KeyArgValueType = 14, // Kernel argument value type - KeyArgAddrQual = 15, // Kernel argument address qualifier - KeyArgAccQual = 16, // Kernel argument access qualifier - KeyArgIsConst = 17, // Kernel argument is const qualified - KeyArgIsRestrict = 18, // Kernel argument is restrict qualified - KeyArgIsVolatile = 19, // Kernel argument is volatile qualified - KeyArgIsPipe = 20, // Kernel argument is pipe qualified - KeyReqdWorkGroupSize = 21, // Required work group size - KeyWorkGroupSizeHint = 22, // Work group size hint - KeyVecTypeHint = 23, // Vector type hint - KeyKernelIndex = 24, // Kernel index for device enqueue - KeyMinWavesPerSIMD = 25, // Minimum number of waves per SIMD - KeyMaxWavesPerSIMD = 26, // Maximum number of waves per SIMD - KeyFlatWorkGroupSizeLimits = 27, // Flat work group size limits - KeyMaxWorkGroupSize = 28, // Maximum work group size - KeyNoPartialWorkGroups = 29, // No partial work groups - KeyPrintfInfo = 30, // Prinf function call information - KeyArgActualAcc = 31, // The actual kernel argument access qualifier - KeyArgPointeeAlign = 32, // Alignment of pointee type - }; - - enum Language : uint8_t { - OpenCL_C = 0, - HCC = 1, - OpenMP = 2, - OpenCL_CPP = 3, -}; - - enum LanguageVersion : uint16_t { - V100 = 100, - V110 = 110, - V120 = 120, - V200 = 200, - V210 = 210, + // Name of keys for runtime metadata. + namespace KeyName { + const char MDVersion[] = "amd.MDVersion"; // Runtime metadata version + const char Language[] = "amd.Language"; // Language + const char LanguageVersion[] = "amd.LanguageVersion"; // Language version + const char Kernels[] = "amd.Kernels"; // Kernels + const char KernelName[] = "amd.KernelName"; // Kernel name + const char Args[] = "amd.Args"; // Kernel arguments + const char ArgSize[] = "amd.ArgSize"; // Kernel arg size + const char ArgAlign[] = "amd.ArgAlign"; // Kernel arg alignment + const char ArgTypeName[] = "amd.ArgTypeName"; // Kernel type name + const char ArgName[] = "amd.ArgName"; // Kernel name + const char ArgKind[] = "amd.ArgKind"; // Kernel argument kind + const char ArgValueType[] = "amd.ArgValueType"; // Kernel argument value type + const char ArgAddrQual[] = "amd.ArgAddrQual"; // Kernel argument address qualifier + const char ArgAccQual[] = "amd.ArgAccQual"; // Kernel argument access qualifier + const char ArgIsConst[] = "amd.ArgIsConst"; // Kernel argument is const qualified + const char ArgIsRestrict[] = "amd.ArgIsRestrict"; // Kernel argument is restrict qualified + const char ArgIsVolatile[] = "amd.ArgIsVolatile"; // Kernel argument is volatile qualified + const char ArgIsPipe[] = "amd.ArgIsPipe"; // Kernel argument is pipe qualified + const char ReqdWorkGroupSize[] = "amd.ReqdWorkGroupSize"; // Required work group size + const char WorkGroupSizeHint[] = "amd.WorkGroupSizeHint"; // Work group size hint + const char VecTypeHint[] = "amd.VecTypeHint"; // Vector type hint + const char KernelIndex[] = "amd.KernelIndex"; // Kernel index for device enqueue + const char NoPartialWorkGroups[] = "amd.NoPartialWorkGroups"; // No partial work groups + const char PrintfInfo[] = "amd.PrintfInfo"; // Prinf function call information + const char ArgActualAcc[] = "amd.ArgActualAcc"; // The actual kernel argument access qualifier + const char ArgPointeeAlign[] = "amd.ArgPointeeAlign"; // Alignment of pointee type }; namespace KernelArg { @@ -130,8 +107,9 @@ namespace RuntimeMD { F64 = 11, }; + // Avoid using 'None' since it conflicts with a macro in X11 header file. enum AccessQualifer : uint8_t { - None = 0, + AccNone = 0, ReadOnly = 1, WriteOnly = 2, ReadWrite = 3, @@ -146,6 +124,69 @@ namespace RuntimeMD { Region = 5, }; } // namespace KernelArg + + // Invalid values are used to indicate an optional key should not be emitted. + const uint8_t INVALID_ADDR_QUAL = 0xff; + const uint8_t INVALID_ACC_QUAL = 0xff; + const uint32_t INVALID_KERNEL_INDEX = ~0U; + + namespace KernelArg { + // In-memory representation of kernel argument information. + struct Metadata { + uint32_t Size; + uint32_t Align; + uint32_t PointeeAlign; + uint8_t Kind; + uint16_t ValueType; + std::string TypeName; + std::string Name; + uint8_t AddrQual; + uint8_t AccQual; + uint8_t IsVolatile; + uint8_t IsConst; + uint8_t IsRestrict; + uint8_t IsPipe; + Metadata() : Size(0), Align(0), PointeeAlign(0), Kind(0), ValueType(0), + AddrQual(INVALID_ADDR_QUAL), AccQual(INVALID_ACC_QUAL), IsVolatile(0), + IsConst(0), IsRestrict(0), IsPipe(0) {} + }; + } + + namespace Kernel { + // In-memory representation of kernel information. + struct Metadata { + std::string Name; + std::string Language; + std::vector<uint8_t> LanguageVersion; + std::vector<uint32_t> ReqdWorkGroupSize; + std::vector<uint32_t> WorkGroupSizeHint; + std::string VecTypeHint; + uint32_t KernelIndex; + uint8_t NoPartialWorkGroups; + std::vector<KernelArg::Metadata> Args; + Metadata() : KernelIndex(INVALID_KERNEL_INDEX), NoPartialWorkGroups(0) {} + }; + } + + namespace Program { + // In-memory representation of program information. + struct Metadata { + std::vector<uint8_t> MDVersionSeq; + std::vector<std::string> PrintfInfo; + std::vector<Kernel::Metadata> Kernels; + + explicit Metadata(){} + + // Construct from an YAML string. + explicit Metadata(const std::string &YAML); + + // Convert to YAML string. + std::string toYAML(); + + // Convert from YAML string. + static Metadata fromYAML(const std::string &S); + }; + } } // namespace RuntimeMD } // namespace AMDGPU diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp new file mode 100644 index 00000000000..95387ad1627 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp @@ -0,0 +1,408 @@ +//===-- AMDGPURuntimeMD.cpp - Generates runtime metadata ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// +/// Generates AMDGPU runtime metadata for YAML mapping. +// +//===----------------------------------------------------------------------===// +// + +#include "AMDGPU.h" +#include "AMDGPURuntimeMetadata.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/YAMLTraits.h" +#include <vector> +#include "AMDGPURuntimeMD.h" + +using namespace llvm; +using namespace ::AMDGPU::RuntimeMD; + +static cl::opt<bool> +DumpRuntimeMD("amdgpu-dump-rtmd", + cl::desc("Dump AMDGPU runtime metadata")); + +static cl::opt<bool> +CheckRuntimeMDParser("amdgpu-check-rtmd-parser", cl::Hidden, + cl::desc("Check AMDGPU runtime metadata YAML parser")); + +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint8_t) +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t) +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string) +LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata) +LLVM_YAML_IS_SEQUENCE_VECTOR(KernelArg::Metadata) + +namespace llvm { +namespace yaml { + +template <> struct MappingTraits<KernelArg::Metadata> { + static void mapping(IO &YamlIO, KernelArg::Metadata &A) { + YamlIO.mapRequired(KeyName::ArgSize, A.Size); + YamlIO.mapRequired(KeyName::ArgAlign, A.Align); + YamlIO.mapOptional(KeyName::ArgPointeeAlign, A.PointeeAlign, 0U); + YamlIO.mapRequired(KeyName::ArgKind, A.Kind); + YamlIO.mapRequired(KeyName::ArgValueType, A.ValueType); + YamlIO.mapOptional(KeyName::ArgTypeName, A.TypeName, std::string()); + YamlIO.mapOptional(KeyName::ArgName, A.Name, std::string()); + YamlIO.mapOptional(KeyName::ArgAddrQual, A.AddrQual, INVALID_ADDR_QUAL); + YamlIO.mapOptional(KeyName::ArgAccQual, A.AccQual, INVALID_ACC_QUAL); + YamlIO.mapOptional(KeyName::ArgIsVolatile, A.IsVolatile, uint8_t(0)); + YamlIO.mapOptional(KeyName::ArgIsConst, A.IsConst, uint8_t(0)); + YamlIO.mapOptional(KeyName::ArgIsRestrict, A.IsRestrict, uint8_t(0)); + YamlIO.mapOptional(KeyName::ArgIsPipe, A.IsPipe, uint8_t(0)); + } + static const bool flow = true; +}; + +template <> struct MappingTraits<Kernel::Metadata> { + static void mapping(IO &YamlIO, Kernel::Metadata &K) { + YamlIO.mapRequired(KeyName::KernelName, K.Name); + YamlIO.mapOptional(KeyName::Language, K.Language, std::string()); + YamlIO.mapOptional(KeyName::LanguageVersion, K.LanguageVersion); + YamlIO.mapOptional(KeyName::ReqdWorkGroupSize, K.ReqdWorkGroupSize); + YamlIO.mapOptional(KeyName::WorkGroupSizeHint, K.WorkGroupSizeHint); + YamlIO.mapOptional(KeyName::VecTypeHint, K.VecTypeHint, std::string()); + YamlIO.mapOptional(KeyName::KernelIndex, K.KernelIndex, + INVALID_KERNEL_INDEX); + YamlIO.mapOptional(KeyName::NoPartialWorkGroups, K.NoPartialWorkGroups, + uint8_t(0)); + YamlIO.mapRequired(KeyName::Args, K.Args); + } + static const bool flow = true; +}; + +template <> struct MappingTraits<Program::Metadata> { + static void mapping(IO &YamlIO, Program::Metadata &Prog) { + YamlIO.mapRequired(KeyName::MDVersion, Prog.MDVersionSeq); + YamlIO.mapOptional(KeyName::PrintfInfo, Prog.PrintfInfo); + YamlIO.mapOptional(KeyName::Kernels, Prog.Kernels); + } + static const bool flow = true; +}; + +} // end namespace yaml +} // end namespace llvm + +// Get a vector of three integer values from MDNode \p Node; +static std::vector<uint32_t> getThreeInt32(MDNode *Node) { + assert(Node->getNumOperands() == 3); + std::vector<uint32_t> V; + for (const MDOperand &Op : Node->operands()) { + const ConstantInt *CI = mdconst::extract<ConstantInt>(Op); + V.push_back(CI->getZExtValue()); + } + return V; +} + +static std::string getOCLTypeName(Type *Ty, bool Signed) { + switch (Ty->getTypeID()) { + case Type::HalfTyID: + return "half"; + case Type::FloatTyID: + return "float"; + case Type::DoubleTyID: + return "double"; + case Type::IntegerTyID: { + if (!Signed) + return (Twine('u') + getOCLTypeName(Ty, true)).str(); + unsigned BW = Ty->getIntegerBitWidth(); + switch (BW) { + case 8: + return "char"; + case 16: + return "short"; + case 32: + return "int"; + case 64: + return "long"; + default: + return (Twine('i') + Twine(BW)).str(); + } + } + case Type::VectorTyID: { + VectorType *VecTy = cast<VectorType>(Ty); + Type *EleTy = VecTy->getElementType(); + unsigned Size = VecTy->getVectorNumElements(); + return (Twine(getOCLTypeName(EleTy, Signed)) + Twine(Size)).str(); + } + default: + return "unknown"; + } +} + +static KernelArg::ValueType getRuntimeMDValueType( + Type *Ty, StringRef TypeName) { + switch (Ty->getTypeID()) { + case Type::HalfTyID: + return KernelArg::F16; + case Type::FloatTyID: + return KernelArg::F32; + case Type::DoubleTyID: + return KernelArg::F64; + case Type::IntegerTyID: { + bool Signed = !TypeName.startswith("u"); + switch (Ty->getIntegerBitWidth()) { + case 8: + return Signed ? KernelArg::I8 : KernelArg::U8; + case 16: + return Signed ? KernelArg::I16 : KernelArg::U16; + case 32: + return Signed ? KernelArg::I32 : KernelArg::U32; + case 64: + return Signed ? KernelArg::I64 : KernelArg::U64; + default: + // Runtime does not recognize other integer types. Report as struct type. + return KernelArg::Struct; + } + } + case Type::VectorTyID: + return getRuntimeMDValueType(Ty->getVectorElementType(), TypeName); + case Type::PointerTyID: + return getRuntimeMDValueType(Ty->getPointerElementType(), TypeName); + default: + return KernelArg::Struct; + } +} + +static KernelArg::AddressSpaceQualifer getRuntimeAddrSpace( + AMDGPUAS::AddressSpaces A) { + switch (A) { + case AMDGPUAS::GLOBAL_ADDRESS: + return KernelArg::Global; + case AMDGPUAS::CONSTANT_ADDRESS: + return KernelArg::Constant; + case AMDGPUAS::LOCAL_ADDRESS: + return KernelArg::Local; + case AMDGPUAS::FLAT_ADDRESS: + return KernelArg::Generic; + case AMDGPUAS::REGION_ADDRESS: + return KernelArg::Region; + default: + return KernelArg::Private; + } +} + +static KernelArg::Metadata getRuntimeMDForKernelArg(const DataLayout &DL, + Type *T, KernelArg::Kind Kind, StringRef BaseTypeName = "", + StringRef TypeName = "", StringRef ArgName = "", StringRef TypeQual = "", + StringRef AccQual = "") { + + KernelArg::Metadata Arg; + + // Set ArgSize and ArgAlign. + Arg.Size = DL.getTypeAllocSize(T); + Arg.Align = DL.getABITypeAlignment(T); + if (auto PT = dyn_cast<PointerType>(T)) { + auto ET = PT->getElementType(); + if (PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && ET->isSized()) + Arg.PointeeAlign = DL.getABITypeAlignment(ET); + } + + // Set ArgTypeName. + Arg.TypeName = TypeName; + + // Set ArgName. + Arg.Name = ArgName; + + // Set ArgIsVolatile, ArgIsRestrict, ArgIsConst and ArgIsPipe. + SmallVector<StringRef, 1> SplitQ; + TypeQual.split(SplitQ, " ", -1, false /* Drop empty entry */); + + for (StringRef KeyName : SplitQ) { + auto *P = StringSwitch<uint8_t *>(KeyName) + .Case("volatile", &Arg.IsVolatile) + .Case("restrict", &Arg.IsRestrict) + .Case("const", &Arg.IsConst) + .Case("pipe", &Arg.IsPipe) + .Default(nullptr); + if (P) + *P = 1; + } + + // Set ArgKind. + Arg.Kind = Kind; + + // Set ArgValueType. + Arg.ValueType = getRuntimeMDValueType(T, BaseTypeName); + + // Set ArgAccQual. + if (!AccQual.empty()) { + Arg.AccQual = StringSwitch<KernelArg::AccessQualifer>(AccQual) + .Case("read_only", KernelArg::ReadOnly) + .Case("write_only", KernelArg::WriteOnly) + .Case("read_write", KernelArg::ReadWrite) + .Default(KernelArg::AccNone); + } + + // Set ArgAddrQual. + if (auto *PT = dyn_cast<PointerType>(T)) { + Arg.AddrQual = getRuntimeAddrSpace(static_cast<AMDGPUAS::AddressSpaces>( + PT->getAddressSpace())); + } + + return Arg; +} + +static Kernel::Metadata getRuntimeMDForKernel(const Function &F) { + Kernel::Metadata Kernel; + Kernel.Name = F.getName(); + auto &M = *F.getParent(); + + // Set Language and LanguageVersion. + if (auto MD = M.getNamedMetadata("opencl.ocl.version")) { + if (MD->getNumOperands() != 0) { + auto Node = MD->getOperand(0); + if (Node->getNumOperands() > 1) { + Kernel.Language = "OpenCL C"; + uint16_t Major = mdconst::extract<ConstantInt>(Node->getOperand(0)) + ->getZExtValue(); + uint16_t Minor = mdconst::extract<ConstantInt>(Node->getOperand(1)) + ->getZExtValue(); + Kernel.LanguageVersion.push_back(Major); + Kernel.LanguageVersion.push_back(Minor); + } + } + } + + const DataLayout &DL = F.getParent()->getDataLayout(); + for (auto &Arg : F.args()) { + unsigned I = Arg.getArgNo(); + Type *T = Arg.getType(); + auto TypeName = dyn_cast<MDString>(F.getMetadata( + "kernel_arg_type")->getOperand(I))->getString(); + auto BaseTypeName = cast<MDString>(F.getMetadata( + "kernel_arg_base_type")->getOperand(I))->getString(); + StringRef ArgName; + if (auto ArgNameMD = F.getMetadata("kernel_arg_name")) + ArgName = cast<MDString>(ArgNameMD->getOperand(I))->getString(); + auto TypeQual = cast<MDString>(F.getMetadata( + "kernel_arg_type_qual")->getOperand(I))->getString(); + auto AccQual = cast<MDString>(F.getMetadata( + "kernel_arg_access_qual")->getOperand(I))->getString(); + KernelArg::Kind Kind; + if (TypeQual.find("pipe") != StringRef::npos) + Kind = KernelArg::Pipe; + else Kind = StringSwitch<KernelArg::Kind>(BaseTypeName) + .Case("sampler_t", KernelArg::Sampler) + .Case("queue_t", KernelArg::Queue) + .Cases("image1d_t", "image1d_array_t", "image1d_buffer_t", + "image2d_t" , "image2d_array_t", KernelArg::Image) + .Cases("image2d_depth_t", "image2d_array_depth_t", + "image2d_msaa_t", "image2d_array_msaa_t", + "image2d_msaa_depth_t", KernelArg::Image) + .Cases("image2d_array_msaa_depth_t", "image3d_t", + KernelArg::Image) + .Default(isa<PointerType>(T) ? + (T->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ? + KernelArg::DynamicSharedPointer : + KernelArg::GlobalBuffer) : + KernelArg::ByValue); + Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, T, Kind, + BaseTypeName, TypeName, ArgName, TypeQual, AccQual)); + } + + // Emit hidden kernel arguments for OpenCL kernels. + if (F.getParent()->getNamedMetadata("opencl.ocl.version")) { + auto Int64T = Type::getInt64Ty(F.getContext()); + Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T, + KernelArg::HiddenGlobalOffsetX)); + Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T, + KernelArg::HiddenGlobalOffsetY)); + Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T, + KernelArg::HiddenGlobalOffsetZ)); + if (F.getParent()->getNamedMetadata("llvm.printf.fmts")) { + auto Int8PtrT = Type::getInt8PtrTy(F.getContext(), + KernelArg::Global); + Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int8PtrT, + KernelArg::HiddenPrintfBuffer)); + } + } + + // Set ReqdWorkGroupSize, WorkGroupSizeHint, and VecTypeHint. + if (auto RWGS = F.getMetadata("reqd_work_group_size")) + Kernel.ReqdWorkGroupSize = getThreeInt32(RWGS); + + if (auto WGSH = F.getMetadata("work_group_size_hint")) + Kernel.WorkGroupSizeHint = getThreeInt32(WGSH); + + if (auto VTH = F.getMetadata("vec_type_hint")) + Kernel.VecTypeHint = getOCLTypeName(cast<ValueAsMetadata>( + VTH->getOperand(0))->getType(), mdconst::extract<ConstantInt>( + VTH->getOperand(1))->getZExtValue()); + + return Kernel; +} + +Program::Metadata::Metadata(const std::string &YAML) { + yaml::Input Input(YAML); + Input >> *this; +} + +std::string Program::Metadata::toYAML(void) { + std::string Text; + raw_string_ostream Stream(Text); + yaml::Output Output(Stream, nullptr, INT_MAX /* do not wrap line */); + Output << *this; + return Stream.str(); +} + +Program::Metadata Program::Metadata::fromYAML(const std::string &S) { + return Program::Metadata(S); +} + +// Check if the YAML string can be parsed. +static void checkRuntimeMDYAMLString(const std::string &YAML) { + auto P = Program::Metadata::fromYAML(YAML); + auto S = P.toYAML(); + llvm::errs() << "AMDGPU runtime metadata parser test " + << (YAML == S ? "passes" : "fails") << ".\n"; + if (YAML != S) { + llvm::errs() << "First output: " << YAML << '\n' + << "Second output: " << S << '\n'; + } +} + +std::string llvm::getRuntimeMDYAMLString(Module &M) { + Program::Metadata Prog; + Prog.MDVersionSeq.push_back(MDVersion); + Prog.MDVersionSeq.push_back(MDRevision); + + // Set PrintfInfo. + if (auto MD = M.getNamedMetadata("llvm.printf.fmts")) { + for (unsigned I = 0; I < MD->getNumOperands(); ++I) { + auto Node = MD->getOperand(I); + if (Node->getNumOperands() > 0) + Prog.PrintfInfo.push_back(cast<MDString>(Node->getOperand(0)) + ->getString()); + } + } + + // Set Kernels. + for (auto &F: M.functions()) { + if (!F.getMetadata("kernel_arg_type")) + continue; + Prog.Kernels.emplace_back(getRuntimeMDForKernel(F)); + } + + auto YAML = Prog.toYAML(); + + if (DumpRuntimeMD) + llvm::errs() << "AMDGPU runtime metadata:\n" << YAML << '\n'; + + if (CheckRuntimeMDParser) + checkRuntimeMDYAMLString(YAML); + + return YAML; +} diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h new file mode 100644 index 00000000000..a92fdd4bebc --- /dev/null +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h @@ -0,0 +1,26 @@ +//===- AMDGPURuntimeMD.h - Generate runtime metadata ---------------*- C++ -*-// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares functions for generating runtime metadata. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPURUNTIMEMD_H +#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPURUNTIMEMD_H + +#include <string> + +namespace llvm { +class Module; + +// Get runtime metadata as YAML string. +std::string getRuntimeMDYAMLString(Module &M); + +} +#endif diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 1ebf92707cc..7eef5ed6f66 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -27,6 +27,7 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/Support/ELF.h" #include "llvm/Support/FormattedStream.h" +#include "AMDGPURuntimeMD.h" namespace llvm { #include "AMDGPUPTNote.h" @@ -197,305 +198,7 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUHsaProgramScopeGlobal( Symbol->setBinding(ELF::STB_GLOBAL); } -void AMDGPUTargetStreamer::emitRuntimeMDIntValue(RuntimeMD::Key K, uint64_t V, - unsigned Size) { - auto &S = getStreamer(); - S.EmitIntValue(K, 1); - S.EmitIntValue(V, Size); -} - -void AMDGPUTargetStreamer::emitRuntimeMDStringValue(RuntimeMD::Key K, - StringRef R) { - auto &S = getStreamer(); - S.EmitIntValue(K, 1); - S.EmitIntValue(R.size(), 4); - S.EmitBytes(R); -} - -void AMDGPUTargetStreamer::emitRuntimeMDThreeIntValues(RuntimeMD::Key K, - MDNode *Node, - unsigned Size) { - assert(Node->getNumOperands() == 3); - - auto &S = getStreamer(); - S.EmitIntValue(K, 1); - for (const MDOperand &Op : Node->operands()) { - const ConstantInt *CI = mdconst::extract<ConstantInt>(Op); - S.EmitIntValue(CI->getZExtValue(), Size); - } -} - -void AMDGPUTargetStreamer::emitStartOfRuntimeMetadata(const Module &M) { - emitRuntimeMDIntValue(RuntimeMD::KeyMDVersion, - RuntimeMD::MDVersion << 8 | RuntimeMD::MDRevision, 2); - if (auto MD = M.getNamedMetadata("opencl.ocl.version")) { - if (MD->getNumOperands() != 0) { - auto Node = MD->getOperand(0); - if (Node->getNumOperands() > 1) { - emitRuntimeMDIntValue(RuntimeMD::KeyLanguage, - RuntimeMD::OpenCL_C, 1); - uint16_t Major = mdconst::extract<ConstantInt>(Node->getOperand(0)) - ->getZExtValue(); - uint16_t Minor = mdconst::extract<ConstantInt>(Node->getOperand(1)) - ->getZExtValue(); - emitRuntimeMDIntValue(RuntimeMD::KeyLanguageVersion, - Major * 100 + Minor * 10, 2); - } - } - } - - if (auto MD = M.getNamedMetadata("llvm.printf.fmts")) { - for (unsigned I = 0; I < MD->getNumOperands(); ++I) { - auto Node = MD->getOperand(I); - if (Node->getNumOperands() > 0) - emitRuntimeMDStringValue(RuntimeMD::KeyPrintfInfo, - cast<MDString>(Node->getOperand(0))->getString()); - } - } -} - -static std::string getOCLTypeName(Type *Ty, bool Signed) { - switch (Ty->getTypeID()) { - case Type::HalfTyID: - return "half"; - case Type::FloatTyID: - return "float"; - case Type::DoubleTyID: - return "double"; - case Type::IntegerTyID: { - if (!Signed) - return (Twine('u') + getOCLTypeName(Ty, true)).str(); - unsigned BW = Ty->getIntegerBitWidth(); - switch (BW) { - case 8: - return "char"; - case 16: - return "short"; - case 32: - return "int"; - case 64: - return "long"; - default: - return (Twine('i') + Twine(BW)).str(); - } - } - case Type::VectorTyID: { - VectorType *VecTy = cast<VectorType>(Ty); - Type *EleTy = VecTy->getElementType(); - unsigned Size = VecTy->getVectorNumElements(); - return (Twine(getOCLTypeName(EleTy, Signed)) + Twine(Size)).str(); - } - default: - return "unknown"; - } -} - -static RuntimeMD::KernelArg::ValueType getRuntimeMDValueType( - Type *Ty, StringRef TypeName) { - switch (Ty->getTypeID()) { - case Type::HalfTyID: - return RuntimeMD::KernelArg::F16; - case Type::FloatTyID: - return RuntimeMD::KernelArg::F32; - case Type::DoubleTyID: - return RuntimeMD::KernelArg::F64; - case Type::IntegerTyID: { - bool Signed = !TypeName.startswith("u"); - switch (Ty->getIntegerBitWidth()) { - case 8: - return Signed ? RuntimeMD::KernelArg::I8 : RuntimeMD::KernelArg::U8; - case 16: - return Signed ? RuntimeMD::KernelArg::I16 : RuntimeMD::KernelArg::U16; - case 32: - return Signed ? RuntimeMD::KernelArg::I32 : RuntimeMD::KernelArg::U32; - case 64: - return Signed ? RuntimeMD::KernelArg::I64 : RuntimeMD::KernelArg::U64; - default: - // Runtime does not recognize other integer types. Report as struct type. - return RuntimeMD::KernelArg::Struct; - } - } - case Type::VectorTyID: - return getRuntimeMDValueType(Ty->getVectorElementType(), TypeName); - case Type::PointerTyID: - return getRuntimeMDValueType(Ty->getPointerElementType(), TypeName); - default: - return RuntimeMD::KernelArg::Struct; - } -} - -static RuntimeMD::KernelArg::AddressSpaceQualifer getRuntimeAddrSpace( - AMDGPUAS::AddressSpaces A) { - switch (A) { - case AMDGPUAS::GLOBAL_ADDRESS: - return RuntimeMD::KernelArg::Global; - case AMDGPUAS::CONSTANT_ADDRESS: - return RuntimeMD::KernelArg::Constant; - case AMDGPUAS::LOCAL_ADDRESS: - return RuntimeMD::KernelArg::Local; - case AMDGPUAS::FLAT_ADDRESS: - return RuntimeMD::KernelArg::Generic; - case AMDGPUAS::REGION_ADDRESS: - return RuntimeMD::KernelArg::Region; - default: - return RuntimeMD::KernelArg::Private; - } -} - -void AMDGPUTargetStreamer::emitRuntimeMetadataForKernelArg(const DataLayout &DL, - Type *T, RuntimeMD::KernelArg::Kind Kind, - StringRef BaseTypeName, StringRef TypeName, - StringRef ArgName, StringRef TypeQual, StringRef AccQual) { - auto &S = getStreamer(); - - // Emit KeyArgBegin. - S.EmitIntValue(RuntimeMD::KeyArgBegin, 1); - - // Emit KeyArgSize and KeyArgAlign. - emitRuntimeMDIntValue(RuntimeMD::KeyArgSize, - DL.getTypeAllocSize(T), 4); - emitRuntimeMDIntValue(RuntimeMD::KeyArgAlign, - DL.getABITypeAlignment(T), 4); - if (auto PT = dyn_cast<PointerType>(T)) { - auto ET = PT->getElementType(); - if (PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && ET->isSized()) - emitRuntimeMDIntValue(RuntimeMD::KeyArgPointeeAlign, - DL.getABITypeAlignment(ET), 4); - } - - // Emit KeyArgTypeName. - if (!TypeName.empty()) - emitRuntimeMDStringValue(RuntimeMD::KeyArgTypeName, TypeName); - - // Emit KeyArgName. - if (!ArgName.empty()) - emitRuntimeMDStringValue(RuntimeMD::KeyArgName, ArgName); - - // Emit KeyArgIsVolatile, KeyArgIsRestrict, KeyArgIsConst and KeyArgIsPipe. - SmallVector<StringRef, 1> SplitQ; - TypeQual.split(SplitQ, " ", -1, false /* Drop empty entry */); - - for (StringRef KeyName : SplitQ) { - auto Key = StringSwitch<RuntimeMD::Key>(KeyName) - .Case("volatile", RuntimeMD::KeyArgIsVolatile) - .Case("restrict", RuntimeMD::KeyArgIsRestrict) - .Case("const", RuntimeMD::KeyArgIsConst) - .Case("pipe", RuntimeMD::KeyArgIsPipe) - .Default(RuntimeMD::KeyNull); - S.EmitIntValue(Key, 1); - } - - // Emit KeyArgKind. - emitRuntimeMDIntValue(RuntimeMD::KeyArgKind, Kind, 1); - - // Emit KeyArgValueType. - emitRuntimeMDIntValue(RuntimeMD::KeyArgValueType, - getRuntimeMDValueType(T, BaseTypeName), 2); - - // Emit KeyArgAccQual. - if (!AccQual.empty()) { - auto AQ = StringSwitch<RuntimeMD::KernelArg::AccessQualifer>(AccQual) - .Case("read_only", RuntimeMD::KernelArg::ReadOnly) - .Case("write_only", RuntimeMD::KernelArg::WriteOnly) - .Case("read_write", RuntimeMD::KernelArg::ReadWrite) - .Default(RuntimeMD::KernelArg::None); - emitRuntimeMDIntValue(RuntimeMD::KeyArgAccQual, AQ, 1); - } - - // Emit KeyArgAddrQual. - if (auto *PT = dyn_cast<PointerType>(T)) - emitRuntimeMDIntValue(RuntimeMD::KeyArgAddrQual, - getRuntimeAddrSpace(static_cast<AMDGPUAS::AddressSpaces>( - PT->getAddressSpace())), 1); - - // Emit KeyArgEnd - S.EmitIntValue(RuntimeMD::KeyArgEnd, 1); -} - -void AMDGPUTargetStreamer::emitRuntimeMetadata(const Function &F) { - if (!F.getMetadata("kernel_arg_type")) - return; - auto &S = getStreamer(); - S.EmitIntValue(RuntimeMD::KeyKernelBegin, 1); - emitRuntimeMDStringValue(RuntimeMD::KeyKernelName, F.getName()); - - const DataLayout &DL = F.getParent()->getDataLayout(); - for (auto &Arg : F.args()) { - unsigned I = Arg.getArgNo(); - Type *T = Arg.getType(); - auto TypeName = dyn_cast<MDString>(F.getMetadata( - "kernel_arg_type")->getOperand(I))->getString(); - auto BaseTypeName = cast<MDString>(F.getMetadata( - "kernel_arg_base_type")->getOperand(I))->getString(); - StringRef ArgName; - if (auto ArgNameMD = F.getMetadata("kernel_arg_name")) - ArgName = cast<MDString>(ArgNameMD->getOperand(I))->getString(); - auto TypeQual = cast<MDString>(F.getMetadata( - "kernel_arg_type_qual")->getOperand(I))->getString(); - auto AccQual = cast<MDString>(F.getMetadata( - "kernel_arg_access_qual")->getOperand(I))->getString(); - RuntimeMD::KernelArg::Kind Kind; - if (TypeQual.find("pipe") != StringRef::npos) - Kind = RuntimeMD::KernelArg::Pipe; - else Kind = StringSwitch<RuntimeMD::KernelArg::Kind>(BaseTypeName) - .Case("sampler_t", RuntimeMD::KernelArg::Sampler) - .Case("queue_t", RuntimeMD::KernelArg::Queue) - .Cases("image1d_t", "image1d_array_t", "image1d_buffer_t", - "image2d_t" , "image2d_array_t", RuntimeMD::KernelArg::Image) - .Cases("image2d_depth_t", "image2d_array_depth_t", - "image2d_msaa_t", "image2d_array_msaa_t", - "image2d_msaa_depth_t", RuntimeMD::KernelArg::Image) - .Cases("image2d_array_msaa_depth_t", "image3d_t", - RuntimeMD::KernelArg::Image) - .Default(isa<PointerType>(T) ? - (T->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ? - RuntimeMD::KernelArg::DynamicSharedPointer : - RuntimeMD::KernelArg::GlobalBuffer) : - RuntimeMD::KernelArg::ByValue); - emitRuntimeMetadataForKernelArg(DL, T, - Kind, BaseTypeName, TypeName, ArgName, TypeQual, AccQual); - } - - // Emit hidden kernel arguments for OpenCL kernels. - if (F.getParent()->getNamedMetadata("opencl.ocl.version")) { - auto Int64T = Type::getInt64Ty(F.getContext()); - emitRuntimeMetadataForKernelArg(DL, Int64T, - RuntimeMD::KernelArg::HiddenGlobalOffsetX); - emitRuntimeMetadataForKernelArg(DL, Int64T, - RuntimeMD::KernelArg::HiddenGlobalOffsetY); - emitRuntimeMetadataForKernelArg(DL, Int64T, - RuntimeMD::KernelArg::HiddenGlobalOffsetZ); - if (F.getParent()->getNamedMetadata("llvm.printf.fmts")) { - auto Int8PtrT = Type::getInt8PtrTy(F.getContext(), - RuntimeMD::KernelArg::Global); - emitRuntimeMetadataForKernelArg(DL, Int8PtrT, - RuntimeMD::KernelArg::HiddenPrintfBuffer); - } - } - - // Emit KeyReqdWorkGroupSize, KeyWorkGroupSizeHint, and KeyVecTypeHint. - if (auto RWGS = F.getMetadata("reqd_work_group_size")) { - emitRuntimeMDThreeIntValues(RuntimeMD::KeyReqdWorkGroupSize, - RWGS, 4); - } - - if (auto WGSH = F.getMetadata("work_group_size_hint")) { - emitRuntimeMDThreeIntValues(RuntimeMD::KeyWorkGroupSizeHint, - WGSH, 4); - } - - if (auto VTH = F.getMetadata("vec_type_hint")) { - auto TypeName = getOCLTypeName(cast<ValueAsMetadata>( - VTH->getOperand(0))->getType(), mdconst::extract<ConstantInt>( - VTH->getOperand(1))->getZExtValue()); - emitRuntimeMDStringValue(RuntimeMD::KeyVecTypeHint, TypeName); - } - - // Emit KeyKernelEnd - S.EmitIntValue(RuntimeMD::KeyKernelEnd, 1); -} - -void AMDGPUTargetStreamer::emitRuntimeMetadataAsNoteElement(Module &M) { +void AMDGPUTargetELFStreamer::emitRuntimeMetadata(Module &M) { auto &S = getStreamer(); auto &Context = S.getContext(); @@ -520,17 +223,10 @@ void AMDGPUTargetStreamer::emitRuntimeMetadataAsNoteElement(Module &M) { S.EmitValue(DescSZ, 4); // descz S.EmitIntValue(PT_NOTE::NT_AMDGPU_HSA_RUNTIME_METADATA, 4); // type S.EmitBytes(StringRef(PT_NOTE::NoteName, NameSZ)); // name - S.EmitValueToAlignment(4); // padding 0 + S.EmitValueToAlignment(4, 0, 1, 0); // padding 0 S.EmitLabel(DescBegin); - emitRuntimeMetadata(M); // desc + S.EmitBytes(getRuntimeMDYAMLString(M)); // desc S.EmitLabel(DescEnd); - S.EmitValueToAlignment(4); // padding 0 + S.EmitValueToAlignment(4, 0, 1, 0); // padding 0 S.PopSection(); } - -void AMDGPUTargetStreamer::emitRuntimeMetadata(Module &M) { - emitStartOfRuntimeMetadata(M); - for (auto &F : M.functions()) - emitRuntimeMetadata(F); -} - diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h index 1dd54dd8286..1f3a6f865ec 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -43,35 +43,7 @@ public: virtual void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) = 0; - /// Emit runtime metadata as a note element. - void emitRuntimeMetadataAsNoteElement(Module &M); - -private: - void emitRuntimeMetadata(Module &M); - void emitStartOfRuntimeMetadata(const Module &M); - - /// Emit runtime metadata for a kernel function. - void emitRuntimeMetadata(const Function &F); - - // Emit runtime metadata for a kernel argument. - void emitRuntimeMetadataForKernelArg(const DataLayout &DL, - Type *T, AMDGPU::RuntimeMD::KernelArg::Kind Kind, - StringRef BaseTypeName = "", StringRef TypeName = "", - StringRef ArgName = "", StringRef TypeQual = "", - StringRef AccQual = ""); - - /// Emit a key and an integer value for runtime metadata. - void emitRuntimeMDIntValue(AMDGPU::RuntimeMD::Key K, - uint64_t V, unsigned Size); - - /// Emit a key and a string value for runtime metadata. - void emitRuntimeMDStringValue(AMDGPU::RuntimeMD::Key K, - StringRef S); - - /// Emit a key and three integer values for runtime metadata. - /// The three integer values are obtained from MDNode \p Node; - void emitRuntimeMDThreeIntValues(AMDGPU::RuntimeMD::Key K, MDNode *Node, - unsigned Size); + virtual void emitRuntimeMetadata(Module &M) = 0; }; class AMDGPUTargetAsmStreamer : public AMDGPUTargetStreamer { @@ -92,6 +64,8 @@ public: void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) override; void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override; + + void emitRuntimeMetadata(Module &M) override {} }; class AMDGPUTargetELFStreamer : public AMDGPUTargetStreamer { @@ -116,6 +90,8 @@ public: void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) override; void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override; + + void emitRuntimeMetadata(Module &M) override; }; } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt b/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt index c823ee7e008..8a6d00ce69e 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt @@ -6,6 +6,7 @@ add_llvm_library(LLVMAMDGPUDesc AMDGPUMCCodeEmitter.cpp AMDGPUMCTargetDesc.cpp AMDGPUMCAsmInfo.cpp + AMDGPURuntimeMD.cpp AMDGPUTargetStreamer.cpp R600MCCodeEmitter.cpp SIMCCodeEmitter.cpp |