diff options
| author | Yaxun Liu <Yaxun.Liu@amd.com> | 2016-09-07 17:44:00 +0000 |
|---|---|---|
| committer | Yaxun Liu <Yaxun.Liu@amd.com> | 2016-09-07 17:44:00 +0000 |
| commit | 638914009a8c787582b7a0e0b2234325377e311c (patch) | |
| tree | 8689f408a6995693035aa8fa14c5de1074f8f647 /llvm/lib/Target/AMDGPU | |
| parent | 2fe1cc482beb4a38df7b8ce221b05bf14128fde2 (diff) | |
| download | bcm5719-llvm-638914009a8c787582b7a0e0b2234325377e311c.tar.gz bcm5719-llvm-638914009a8c787582b7a0e0b2234325377e311c.zip | |
AMDGPU: Add hidden kernel arguments to runtime metadata
OpenCL kernels have hidden kernel arguments for global offset and printf buffer. For consistency, these hidden argument should be included in the runtime metadata. Also updated kernel argument kind metadata.
Differential Revision: https://reviews.llvm.org/D23424
llvm-svn: 280829
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 201 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h | 39 |
2 files changed, 157 insertions, 83 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 050a22d0cc5..1319f7ed2bd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -824,6 +824,15 @@ void AMDGPUAsmPrinter::emitStartOfRuntimeMetadata(const Module &M) { } } } + + if (auto MD = M.getNamedMetadata("llvm.printf.fmts")) { + for (unsigned I = 0; I < MD->getNumOperands(); ++I) { + auto Node = MD->getOperand(I); + if (Node->getNumOperands() > 0) + emitRuntimeMDStringValue(*OutStreamer, RuntimeMD::KeyPrintfInfo, + cast<MDString>(Node->getOperand(0))->getString()); + } + } } static std::string getOCLTypeName(Type *Ty, bool Signed) { @@ -896,6 +905,93 @@ static RuntimeMD::KernelArg::ValueType getRuntimeMDValueType( } } +static RuntimeMD::KernelArg::AddressSpaceQualifer getRuntimeAddrSpace( + AMDGPUAS::AddressSpaces A) { + switch (A) { + case AMDGPUAS::GLOBAL_ADDRESS: + return RuntimeMD::KernelArg::Global; + case AMDGPUAS::CONSTANT_ADDRESS: + return RuntimeMD::KernelArg::Constant; + case AMDGPUAS::LOCAL_ADDRESS: + return RuntimeMD::KernelArg::Local; + case AMDGPUAS::FLAT_ADDRESS: + return RuntimeMD::KernelArg::Generic; + case AMDGPUAS::REGION_ADDRESS: + return RuntimeMD::KernelArg::Region; + default: + return RuntimeMD::KernelArg::Private; + } +} + +static void emitRuntimeMetadataForKernelArg(const DataLayout &DL, + MCStreamer &OutStreamer, Type *T, + RuntimeMD::KernelArg::Kind Kind, + StringRef BaseTypeName = "", StringRef TypeName = "", + StringRef ArgName = "", StringRef TypeQual = "", StringRef AccQual = "") { + // Emit KeyArgBegin. + OutStreamer.EmitIntValue(RuntimeMD::KeyArgBegin, 1); + + // Emit KeyArgSize and KeyArgAlign. + emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgSize, + DL.getTypeAllocSize(T), 4); + emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAlign, + DL.getABITypeAlignment(T), 4); + if (auto PT = dyn_cast<PointerType>(T)) { + auto ET = PT->getElementType(); + if (PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && ET->isSized()) + emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgPointeeAlign, + DL.getABITypeAlignment(ET), 4); + } + + // Emit KeyArgTypeName. + if (!TypeName.empty()) + emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyArgTypeName, TypeName); + + // Emit KeyArgName. + if (!ArgName.empty()) + emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyArgName, ArgName); + + // Emit KeyArgIsVolatile, KeyArgIsRestrict, KeyArgIsConst and KeyArgIsPipe. + SmallVector<StringRef, 1> SplitQ; + TypeQual.split(SplitQ, " ", -1, false /* Drop empty entry */); + + for (StringRef KeyName : SplitQ) { + auto Key = StringSwitch<RuntimeMD::Key>(KeyName) + .Case("volatile", RuntimeMD::KeyArgIsVolatile) + .Case("restrict", RuntimeMD::KeyArgIsRestrict) + .Case("const", RuntimeMD::KeyArgIsConst) + .Case("pipe", RuntimeMD::KeyArgIsPipe) + .Default(RuntimeMD::KeyNull); + OutStreamer.EmitIntValue(Key, 1); + } + + // Emit KeyArgKind. + emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgKind, Kind, 1); + + // Emit KeyArgValueType. + emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgValueType, + getRuntimeMDValueType(T, BaseTypeName), 2); + + // Emit KeyArgAccQual. + if (!AccQual.empty()) { + auto AQ = StringSwitch<RuntimeMD::KernelArg::AccessQualifer>(AccQual) + .Case("read_only", RuntimeMD::KernelArg::ReadOnly) + .Case("write_only", RuntimeMD::KernelArg::WriteOnly) + .Case("read_write", RuntimeMD::KernelArg::ReadWrite) + .Default(RuntimeMD::KernelArg::None); + emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAccQual, AQ, 1); + } + + // Emit KeyArgAddrQual. + if (auto *PT = dyn_cast<PointerType>(T)) + emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAddrQual, + getRuntimeAddrSpace(static_cast<AMDGPUAS::AddressSpaces>( + PT->getAddressSpace())), 1); + + // Emit KeyArgEnd + OutStreamer.EmitIntValue(RuntimeMD::KeyArgEnd, 1); +} + void AMDGPUAsmPrinter::emitRuntimeMetadata(const Function &F) { if (!F.getMetadata("kernel_arg_type")) return; @@ -906,56 +1002,25 @@ void AMDGPUAsmPrinter::emitRuntimeMetadata(const Function &F) { OutStreamer->EmitIntValue(RuntimeMD::KeyKernelBegin, 1); emitRuntimeMDStringValue(*OutStreamer, RuntimeMD::KeyKernelName, F.getName()); + const DataLayout &DL = F.getParent()->getDataLayout(); for (auto &Arg : F.args()) { - // Emit KeyArgBegin. unsigned I = Arg.getArgNo(); - OutStreamer->EmitIntValue(RuntimeMD::KeyArgBegin, 1); - - // Emit KeyArgSize, KeyArgAlign and KeyArgPointeeAlign. Type *T = Arg.getType(); - const DataLayout &DL = F.getParent()->getDataLayout(); - emitRuntimeMDIntValue(*OutStreamer, RuntimeMD::KeyArgSize, - DL.getTypeAllocSize(T), 4); - emitRuntimeMDIntValue(*OutStreamer, RuntimeMD::KeyArgAlign, - DL.getABITypeAlignment(T), 4); - if (auto PT = dyn_cast<PointerType>(T)) { - auto ET = PT->getElementType(); - if (ET->isSized()) - emitRuntimeMDIntValue(*OutStreamer, RuntimeMD::KeyArgPointeeAlign, - DL.getABITypeAlignment(ET), 4); - } - - // Emit KeyArgTypeName. auto TypeName = dyn_cast<MDString>(F.getMetadata( - "kernel_arg_type")->getOperand(I))->getString(); - emitRuntimeMDStringValue(*OutStreamer, RuntimeMD::KeyArgTypeName, TypeName); - - // Emit KeyArgName. - if (auto ArgNameMD = F.getMetadata("kernel_arg_name")) { - auto ArgName = cast<MDString>(ArgNameMD->getOperand(I))->getString(); - emitRuntimeMDStringValue(*OutStreamer, RuntimeMD::KeyArgName, ArgName); - } - - // Emit KeyArgIsVolatile, KeyArgIsRestrict, KeyArgIsConst and KeyArgIsPipe. + "kernel_arg_type")->getOperand(I))->getString(); + auto BaseTypeName = cast<MDString>(F.getMetadata( + "kernel_arg_base_type")->getOperand(I))->getString(); + StringRef ArgName; + if (auto ArgNameMD = F.getMetadata("kernel_arg_name")) + ArgName = cast<MDString>(ArgNameMD->getOperand(I))->getString(); auto TypeQual = cast<MDString>(F.getMetadata( - "kernel_arg_type_qual")->getOperand(I))->getString(); - SmallVector<StringRef, 1> SplitQ; - TypeQual.split(SplitQ, " ", -1, false /* Drop empty entry */); - - for (StringRef KeyName : SplitQ) { - auto Key = StringSwitch<RuntimeMD::Key>(KeyName) - .Case("volatile", RuntimeMD::KeyArgIsVolatile) - .Case("restrict", RuntimeMD::KeyArgIsRestrict) - .Case("const", RuntimeMD::KeyArgIsConst) - .Case("pipe", RuntimeMD::KeyArgIsPipe) - .Default(RuntimeMD::KeyNull); - OutStreamer->EmitIntValue(Key, 1); - } - - // Emit KeyArgTypeKind. - auto BaseTypeName = cast<MDString>( - F.getMetadata("kernel_arg_base_type")->getOperand(I))->getString(); - auto TypeKind = StringSwitch<RuntimeMD::KernelArg::TypeKind>(BaseTypeName) + "kernel_arg_type_qual")->getOperand(I))->getString(); + auto AccQual = cast<MDString>(F.getMetadata( + "kernel_arg_access_qual")->getOperand(I))->getString(); + RuntimeMD::KernelArg::Kind Kind; + if (TypeQual.find("pipe") != StringRef::npos) + Kind = RuntimeMD::KernelArg::Pipe; + else Kind = StringSwitch<RuntimeMD::KernelArg::Kind>(BaseTypeName) .Case("sampler_t", RuntimeMD::KernelArg::Sampler) .Case("queue_t", RuntimeMD::KernelArg::Queue) .Cases("image1d_t", "image1d_array_t", "image1d_buffer_t", @@ -965,32 +1030,30 @@ void AMDGPUAsmPrinter::emitRuntimeMetadata(const Function &F) { "image2d_msaa_depth_t", RuntimeMD::KernelArg::Image) .Cases("image2d_array_msaa_depth_t", "image3d_t", RuntimeMD::KernelArg::Image) - .Default(isa<PointerType>(T) ? RuntimeMD::KernelArg::Pointer : - RuntimeMD::KernelArg::Value); - emitRuntimeMDIntValue(*OutStreamer, RuntimeMD::KeyArgTypeKind, TypeKind, 1); - - // Emit KeyArgValueType. - emitRuntimeMDIntValue(*OutStreamer, RuntimeMD::KeyArgValueType, - getRuntimeMDValueType(T, BaseTypeName), 2); - - // Emit KeyArgAccQual. - auto AccQual = cast<MDString>(F.getMetadata( - "kernel_arg_access_qual")->getOperand(I))->getString(); - auto AQ = StringSwitch<RuntimeMD::KernelArg::AccessQualifer>(AccQual) - .Case("read_only", RuntimeMD::KernelArg::ReadOnly) - .Case("write_only", RuntimeMD::KernelArg::WriteOnly) - .Case("read_write", RuntimeMD::KernelArg::ReadWrite) - .Default(RuntimeMD::KernelArg::None); - emitRuntimeMDIntValue(*OutStreamer, RuntimeMD::KeyArgAccQual, AQ, 1); + .Default(isa<PointerType>(T) ? + (T->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ? + RuntimeMD::KernelArg::DynamicSharedPointer : + RuntimeMD::KernelArg::GlobalBuffer) : + RuntimeMD::KernelArg::ByValue); + emitRuntimeMetadataForKernelArg(DL, *OutStreamer, T, + Kind, BaseTypeName, TypeName, ArgName, TypeQual, AccQual); + } - // Emit KeyArgAddrQual. - if (auto *PT = dyn_cast<PointerType>(T)) { - emitRuntimeMDIntValue(*OutStreamer, RuntimeMD::KeyArgAddrQual, - PT->getAddressSpace(), 1); + // Emit hidden kernel arguments for OpenCL kernels. + if (F.getParent()->getNamedMetadata("opencl.ocl.version")) { + auto Int64T = Type::getInt64Ty(F.getContext()); + emitRuntimeMetadataForKernelArg(DL, *OutStreamer, Int64T, + RuntimeMD::KernelArg::HiddenGlobalOffsetX); + emitRuntimeMetadataForKernelArg(DL, *OutStreamer, Int64T, + RuntimeMD::KernelArg::HiddenGlobalOffsetY); + emitRuntimeMetadataForKernelArg(DL, *OutStreamer, Int64T, + RuntimeMD::KernelArg::HiddenGlobalOffsetZ); + if (auto MD = F.getParent()->getNamedMetadata("llvm.printf.fmts")) { + auto Int8PtrT = Type::getInt8PtrTy(F.getContext(), + RuntimeMD::KernelArg::Global); + emitRuntimeMetadataForKernelArg(DL, *OutStreamer, Int8PtrT, + RuntimeMD::KernelArg::HiddenPrintfBuffer); } - - // Emit KeyArgEnd - OutStreamer->EmitIntValue(RuntimeMD::KeyArgEnd, 1); } // Emit KeyReqdWorkGroupSize, KeyWorkGroupSizeHint, and KeyVecTypeHint. diff --git a/llvm/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h b/llvm/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h index 9050b442b3b..60a86427418 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h @@ -65,7 +65,7 @@ namespace RuntimeMD { KeyArgAlign = 10, // Kernel arg alignment KeyArgTypeName = 11, // Kernel type name KeyArgName = 12, // Kernel name - KeyArgTypeKind = 13, // Kernel argument type kind + KeyArgKind = 13, // Kernel argument kind KeyArgValueType = 14, // Kernel argument value type KeyArgAddrQual = 15, // Kernel argument address qualifier KeyArgAccQual = 16, // Kernel argument access qualifier @@ -77,13 +77,13 @@ namespace RuntimeMD { KeyWorkGroupSizeHint = 22, // Work group size hint KeyVecTypeHint = 23, // Vector type hint KeyKernelIndex = 24, // Kernel index for device enqueue - KeySGPRs = 25, // Number of SGPRs - KeyVGPRs = 26, // Number of VGPRs - KeyMinWavesPerSIMD = 27, // Minimum number of waves per SIMD - KeyMaxWavesPerSIMD = 28, // Maximum number of waves per SIMD - KeyFlatWorkGroupSizeLimits = 29, // Flat work group size limits - KeyMaxWorkGroupSize = 30, // Maximum work group size - KeyNoPartialWorkGroups = 31, // No partial work groups + KeyMinWavesPerSIMD = 25, // Minimum number of waves per SIMD + KeyMaxWavesPerSIMD = 26, // Maximum number of waves per SIMD + KeyFlatWorkGroupSizeLimits = 27, // Flat work group size limits + KeyMaxWorkGroupSize = 28, // Maximum work group size + KeyNoPartialWorkGroups = 29, // No partial work groups + KeyPrintfInfo = 30, // Prinf function call information + KeyArgActualAcc = 31, // The actual kernel argument access qualifier KeyArgPointeeAlign = 32, // Alignment of pointee type }; @@ -103,12 +103,21 @@ namespace RuntimeMD { }; namespace KernelArg { - enum TypeKind : uint8_t { - Value = 0, - Pointer = 1, - Image = 2, - Sampler = 3, - Queue = 4, + enum Kind : uint8_t { + ByValue = 0, + GlobalBuffer = 1, + DynamicSharedPointer = 2, + Sampler = 3, + Image = 4, + Pipe = 5, + Queue = 6, + HiddenGlobalOffsetX = 7, + HiddenGlobalOffsetY = 8, + HiddenGlobalOffsetZ = 9, + HiddenNone = 10, + HiddenPrintfBuffer = 11, + HiddenDefaultQueue = 12, + HiddenCompletionAction = 13, }; enum ValueType : uint16_t { @@ -138,6 +147,8 @@ namespace RuntimeMD { Global = 1, Constant = 2, Local = 3, + Generic = 4, + Region = 5, }; } // namespace KernelArg } // namespace RuntimeMD |

