diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 229 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h | 138 |
3 files changed, 371 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index e4fbd956d65..cfe6346fb6b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -39,7 +39,9 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetLoweringObjectFile.h" +#include "AMDGPURuntimeMetadata.h" +using namespace ::AMDGPU; using namespace llvm; // TODO: This should get the default rounding mode from the kernel. We just set @@ -111,6 +113,7 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) { AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI->getFeatureBits()); TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU"); + emitStartOfRuntimeMetadata(M); } void AMDGPUAsmPrinter::EmitFunctionBodyStart() { @@ -244,6 +247,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { } } + emitRuntimeMetadata(*MF.getFunction()); + return false; } @@ -740,3 +745,227 @@ bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, *TM.getSubtargetImpl(*MF->getFunction())->getRegisterInfo()); return false; } + +// Emit a key and an integer value for runtime metadata. +static void emitRuntimeMDIntValue(std::unique_ptr<MCStreamer> &Streamer, + RuntimeMD::Key K, uint64_t V, + unsigned Size) { + Streamer->EmitIntValue(K, 1); + Streamer->EmitIntValue(V, Size); +} + +// Emit a key and a string value for runtime metadata. +static void emitRuntimeMDStringValue(std::unique_ptr<MCStreamer> &Streamer, + RuntimeMD::Key K, StringRef S) { + Streamer->EmitIntValue(K, 1); + Streamer->EmitIntValue(S.size(), 4); + Streamer->EmitBytes(S); +} + +// Emit a key and three integer values for runtime metadata. +// The three integer values are obtained from MDNode \p Node; +static void emitRuntimeMDThreeIntValues(std::unique_ptr<MCStreamer> &Streamer, + RuntimeMD::Key K, MDNode *Node, + unsigned Size) { + Streamer->EmitIntValue(K, 1); + Streamer->EmitIntValue(mdconst::extract<ConstantInt>( + Node->getOperand(0))->getZExtValue(), Size); + Streamer->EmitIntValue(mdconst::extract<ConstantInt>( + Node->getOperand(1))->getZExtValue(), Size); + Streamer->EmitIntValue(mdconst::extract<ConstantInt>( + Node->getOperand(2))->getZExtValue(), Size); +} + +void AMDGPUAsmPrinter::emitStartOfRuntimeMetadata(const Module &M) { + OutStreamer->SwitchSection(getObjFileLowering().getContext() + .getELFSection(RuntimeMD::SectionName, ELF::SHT_PROGBITS, 0)); + + emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyMDVersion, + RuntimeMD::MDVersion << 8 | RuntimeMD::MDRevision, 2); + if (auto MD = M.getNamedMetadata("opencl.ocl.version")) { + emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguage, + RuntimeMD::OpenCL_C, 1); + auto Node = MD->getOperand(0); + unsigned short Major = mdconst::extract<ConstantInt>(Node->getOperand(0)) + ->getZExtValue(); + unsigned short Minor = mdconst::extract<ConstantInt>(Node->getOperand(1)) + ->getZExtValue(); + emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguageVersion, + Major * 100 + Minor * 10, 2); + } +} + +static std::string getOCLTypeName(Type *Ty, bool isSigned) { + if (VectorType* VecTy = dyn_cast<VectorType>(Ty)) { + Type* EleTy = VecTy->getElementType(); + unsigned Size = VecTy->getVectorNumElements(); + return (Twine(getOCLTypeName(EleTy, isSigned)) + Twine(Size)).str(); + } + switch (Ty->getTypeID()) { + case Type::HalfTyID: return "half"; + case Type::FloatTyID: return "float"; + case Type::DoubleTyID: return "double"; + case Type::IntegerTyID: { + if (!isSigned) + return (Twine('u') + Twine(getOCLTypeName(Ty, true))).str(); + auto IntTy = cast<IntegerType>(Ty); + auto BW = IntTy->getIntegerBitWidth(); + switch (BW) { + case 8: + return "char"; + case 16: + return "short"; + case 32: + return "int"; + case 64: + return "long"; + default: + return (Twine('i') + Twine(BW)).str(); + } + } + default: + llvm_unreachable("invalid type"); + } +} + +static RuntimeMD::KernelArg::ValueType getRuntimeMDValueType( + Type *Ty, StringRef TypeName) { + if (auto VT = dyn_cast<VectorType>(Ty)) + return getRuntimeMDValueType(VT->getElementType(), TypeName); + else if (auto PT = dyn_cast<PointerType>(Ty)) + return getRuntimeMDValueType(PT->getElementType(), TypeName); + else if (Ty->isHalfTy()) + return RuntimeMD::KernelArg::F16; + else if (Ty->isFloatTy()) + return RuntimeMD::KernelArg::F32; + else if (Ty->isDoubleTy()) + return RuntimeMD::KernelArg::F64; + else if (IntegerType* intTy = dyn_cast<IntegerType>(Ty)) { + bool Signed = !TypeName.startswith("u"); + switch (intTy->getIntegerBitWidth()) { + case 8: + return Signed ? RuntimeMD::KernelArg::I8 : RuntimeMD::KernelArg::U8; + case 16: + return Signed ? RuntimeMD::KernelArg::I16 : RuntimeMD::KernelArg::U16; + case 32: + return Signed ? RuntimeMD::KernelArg::I32 : RuntimeMD::KernelArg::U32; + case 64: + return Signed ? RuntimeMD::KernelArg::I64 : RuntimeMD::KernelArg::U64; + default: + // Runtime does not recognize other integer types. Report as + // struct type. + return RuntimeMD::KernelArg::Struct; + } + } else + return RuntimeMD::KernelArg::Struct; +} + +void AMDGPUAsmPrinter::emitRuntimeMetadata(const Function &F) { + if (!F.getMetadata("kernel_arg_type")) + return; + + MCContext &Context = getObjFileLowering().getContext(); + OutStreamer->SwitchSection( + Context.getELFSection(RuntimeMD::SectionName, ELF::SHT_PROGBITS, 0)); + OutStreamer->EmitIntValue(RuntimeMD::KeyKernelBegin, 1); + emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyKernelName, F.getName()); + + for (auto &Arg:F.args()) { + // Emit KeyArgBegin. + unsigned I = Arg.getArgNo(); + OutStreamer->EmitIntValue(RuntimeMD::KeyArgBegin, 1); + + // Emit KeyArgSize and KeyArgAlign. + auto T = Arg.getType(); + auto DL = F.getParent()->getDataLayout(); + emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgSize, + DL.getTypeAllocSize(T), 4); + emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAlign, + DL.getABITypeAlignment(T), 4); + + // Emit KeyArgTypeName. + auto TypeName = dyn_cast<MDString>(F.getMetadata( + "kernel_arg_type")->getOperand(I))->getString(); + emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyArgTypeName, TypeName); + + // Emit KeyArgName. + if (auto ArgNameMD = F.getMetadata("kernel_arg_name")) { + auto ArgName = cast<MDString>(ArgNameMD->getOperand( + I))->getString(); + emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyArgName, ArgName); + } + + // Emit KeyArgIsVolatile, KeyArgIsRestrict, KeyArgIsConst and KeyArgIsPipe. + auto TypeQual = cast<MDString>(F.getMetadata( + "kernel_arg_type_qual")->getOperand(I))->getString(); + SmallVector<StringRef, 1> SplitQ; + TypeQual.split(SplitQ, " ", -1, false/* drop empty entry*/); + for (auto &I:SplitQ) { + auto Key = StringSwitch<RuntimeMD::Key>(I) + .Case("volatile", RuntimeMD::KeyArgIsVolatile) + .Case("restrict", RuntimeMD::KeyArgIsRestrict) + .Case("const", RuntimeMD::KeyArgIsConst) + .Case("pipe", RuntimeMD::KeyArgIsPipe) + .Default(RuntimeMD::KeyNull); + OutStreamer->EmitIntValue(Key, 1); + } + + // Emit KeyArgTypeKind. + auto BaseTypeName = cast<MDString>( + F.getMetadata("kernel_arg_base_type")->getOperand(I))->getString(); + auto TypeKind = StringSwitch<RuntimeMD::KernelArg::TypeKind>(BaseTypeName) + .Case("sampler_t", RuntimeMD::KernelArg::Sampler) + .Case("queue_t", RuntimeMD::KernelArg::Queue) + .Cases("image1d_t", "image1d_array_t", "image1d_buffer_t", + "image2d_t" , "image2d_array_t", RuntimeMD::KernelArg::Image) + .Cases("image2d_depth_t", "image2d_array_depth_t", + "image2d_msaa_t", "image2d_array_msaa_t", + "image2d_msaa_depth_t", RuntimeMD::KernelArg::Image) + .Cases("image2d_array_msaa_depth_t", "image3d_t", + RuntimeMD::KernelArg::Image) + .Default(isa<PointerType>(T) ? RuntimeMD::KernelArg::Pointer : + RuntimeMD::KernelArg::Value); + emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgTypeKind, TypeKind, 1); + + // Emit KeyArgValueType. + emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgValueType, + getRuntimeMDValueType(T, BaseTypeName), 2); + + // Emit KeyArgAccQual. + auto AccQual = cast<MDString>(F.getMetadata( + "kernel_arg_access_qual")->getOperand(I))->getString(); + auto AQ = StringSwitch<RuntimeMD::KernelArg::AccessQualifer>(AccQual) + .Case("read_only", RuntimeMD::KernelArg::ReadOnly) + .Case("write_only", RuntimeMD::KernelArg::WriteOnly) + .Case("read_write", RuntimeMD::KernelArg::ReadWrite) + .Default(RuntimeMD::KernelArg::None); + emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAccQual, + AQ, 1); + + // Emit KeyArgAddrQual. + if (isa<PointerType>(T)) + emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAddrQual, + T->getPointerAddressSpace(), 1); + + // Emit KeyArgEnd + OutStreamer->EmitIntValue(RuntimeMD::KeyArgEnd, 1); + } + + // Emit KeyReqdWorkGroupSize, KeyWorkGroupSizeHint, and KeyVecTypeHint. + if (auto RWGS = F.getMetadata("reqd_work_group_size")) + emitRuntimeMDThreeIntValues(OutStreamer, RuntimeMD::KeyReqdWorkGroupSize, + RWGS, 4); + if (auto WGSH = F.getMetadata("work_group_size_hint")) + emitRuntimeMDThreeIntValues(OutStreamer, RuntimeMD::KeyWorkGroupSizeHint, + WGSH, 4); + if (auto VTH = F.getMetadata("vec_type_hint")) { + auto TypeName = getOCLTypeName(cast<ValueAsMetadata>( + VTH->getOperand(0))->getType(), mdconst::extract<ConstantInt>( + VTH->getOperand(1))->getZExtValue()); + emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyVecTypeHint, + TypeName); + } + + // Emit KeyKernelEnd + OutStreamer->EmitIntValue(RuntimeMD::KeyKernelEnd, 1); +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index 2d44ffefd80..7b04c539520 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -127,6 +127,10 @@ public: unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) override; + void emitStartOfRuntimeMetadata(const Module &M); + + void emitRuntimeMetadata(const Function &F); + protected: std::vector<std::string> DisasmLines, HexLines; size_t DisasmLineMaxLen; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h b/llvm/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h new file mode 100644 index 00000000000..40f63943450 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h @@ -0,0 +1,138 @@ +//===-- AMDGPURuntimeMetadata.h - AMDGPU Runtime Metadata -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// +/// Enums and structure types used by runtime metadata. +/// +/// Runtime requests certain information (metadata) about kernels to be able +/// to execute the kernels and answer the queries about the kernels. +/// The metadata is represented as a byte stream in an ELF section of a +/// binary (code object). The byte stream consists of key-value pairs. +/// Each key is an 8 bit unsigned integer. Each value can be an integer, +/// a string, or a stream of key-value pairs. There are 3 levels of key-value +/// pair streams. At the beginning of the ELF section is the top level +/// key-value pair stream. A kernel-level key-value pair stream starts after +/// encountering KeyKernelBegin and ends immediately before encountering +/// KeyKernelEnd. A kernel-argument-level key-value pair stream starts +/// after encountering KeyArgBegin and ends immediately before encountering +/// KeyArgEnd. A kernel-level key-value pair stream can only appear in a top +/// level key-value pair stream. A kernel-argument-level key-value pair stream +/// can only appear in a kernel-level key-value pair stream. +/// +/// The format should be kept backward compatible. New enum values and bit +/// fields should be appended at the end. It is suggested to bump up the +/// revision number whenever the format changes and document the change +/// in the revision in this header. +/// +// +//===----------------------------------------------------------------------===// +// +#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H +#define LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H + +#include <stdint.h> + +namespace AMDGPU { + +namespace RuntimeMD { + + // Version and revision of runtime metadata + const unsigned char MDVersion = 1; + const unsigned char MDRevision = 0; + + // ELF section name containing runtime metadata + const char SectionName[] = ".AMDGPU.runtime_metadata"; + + // Enumeration values of keys in runtime metadata. + enum Key { + KeyNull = 0, // Place holder. Ignored when encountered + KeyMDVersion = 1, // Runtime metadata version + KeyLanguage = 2, // Language + KeyLanguageVersion = 3, // Language version + KeyKernelBegin = 4, // Beginning of kernel-level stream + KeyKernelEnd = 5, // End of kernel-level stream + KeyKernelName = 6, // Kernel name + KeyArgBegin = 7, // Beginning of kernel-arg-level stream + KeyArgEnd = 8, // End of kernel-arg-level stream + KeyArgSize = 9, // Kernel arg size + KeyArgAlign = 10, // Kernel arg alignment + KeyArgTypeName = 11, // Kernel type name + KeyArgName = 12, // Kernel name + KeyArgTypeKind = 13, // Kernel argument type kind + KeyArgValueType = 14, // Kernel argument value type + KeyArgAddrQual = 15, // Kernel argument address qualifier + KeyArgAccQual = 16, // Kernel argument access qualifier + KeyArgIsConst = 17, // Kernel argument is const qualified + KeyArgIsRestrict = 18, // Kernel argument is restrict qualified + KeyArgIsVolatile = 19, // Kernel argument is volatile qualified + KeyArgIsPipe = 20, // Kernel argument is pipe qualified + KeyReqdWorkGroupSize = 21, // Required work group size + KeyWorkGroupSizeHint = 22, // Work group size hint + KeyVecTypeHint = 23, // Vector type hint + KeyKernelIndex = 24, // Kernel index for device enqueue + KeySGPRs = 25, // Number of SGPRs + KeyVGPRs = 26, // Number of VGPRs + KeyMinWavesPerSIMD = 27, // Minimum number of waves per SIMD + KeyMaxWavesPerSIMD = 28, // Maximum number of waves per SIMD + KeyFlatWorkGroupSizeLimits = 29, // Flat work group size limits + KeyMaxWorkGroupSize = 30, // Maximum work group size + KeyNoPartialWorkGroups = 31, // No partial work groups + }; + + enum Language : uint8_t { + OpenCL_C = 0, + HCC = 1, + OpenMP = 2, + OpenCL_CPP = 3, +}; + + enum LanguageVersion : uint16_t { + V100 = 100, + V110 = 110, + V120 = 120, + V200 = 200, + V210 = 210, + }; + + namespace KernelArg { + enum TypeKind : uint8_t { + Value = 0, + Pointer = 1, + Image = 2, + Sampler = 3, + Queue = 4, + }; + + enum ValueType : uint16_t { + Struct = 0, + I8 = 1, + U8 = 2, + I16 = 3, + U16 = 4, + F16 = 5, + I32 = 6, + U32 = 7, + F32 = 8, + I64 = 9, + U64 = 10, + F64 = 11, + }; + + enum AccessQualifer : uint8_t { + None = 0, + ReadOnly = 1, + WriteOnly = 2, + ReadWrite = 3, + }; + } // namespace KernelArg +} // namespace RuntimeMD +} // namespace AMDGPU + +#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H |