diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 103 |
1 files changed, 97 insertions, 6 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 0cb9cc0ff0c..8e56ec60eff 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -116,6 +116,10 @@ AMDGPUTargetStreamer* AMDGPUAsmPrinter::getTargetStreamer() const { } void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) { + if (IsaInfo::hasCodeObjectV3(getSTI()) && + TM.getTargetTriple().getOS() == Triple::AMDHSA) + return; + if (TM.getTargetTriple().getOS() != Triple::AMDHSA && TM.getTargetTriple().getOS() != Triple::AMDPAL) return; @@ -126,10 +130,6 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) { if (TM.getTargetTriple().getOS() == Triple::AMDPAL) readPALMetadata(M); - // Deprecated notes are not emitted for code object v3. - if (IsaInfo::hasCodeObjectV3(getSTI()->getFeatureBits())) - return; - // HSA emits NT_AMDGPU_HSA_CODE_OBJECT_VERSION for code objects v2. if (TM.getTargetTriple().getOS() == Triple::AMDHSA) getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1); @@ -141,6 +141,10 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) { } void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) { + // TODO: Add metadata to code object v3. + if (IsaInfo::hasCodeObjectV3(getSTI()) && + TM.getTargetTriple().getOS() == Triple::AMDHSA) + return; // Following code requires TargetStreamer to be present. if (!getTargetStreamer()) @@ -186,8 +190,11 @@ bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough( } void AMDGPUAsmPrinter::EmitFunctionBodyStart() { - const AMDGPUMachineFunction *MFI = MF->getInfo<AMDGPUMachineFunction>(); - if (!MFI->isEntryFunction()) + const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>(); + if (!MFI.isEntryFunction()) + return; + if (IsaInfo::hasCodeObjectV3(getSTI()) && + TM.getTargetTriple().getOS() == Triple::AMDHSA) return; const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>(); @@ -205,7 +212,27 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() { getHSADebugProps(*MF, CurrentProgramInfo)); } +void AMDGPUAsmPrinter::EmitFunctionBodyEnd() { + const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>(); + if (!MFI.isEntryFunction()) + return; + if (!IsaInfo::hasCodeObjectV3(getSTI()) || + TM.getTargetTriple().getOS() != Triple::AMDHSA) + return; + + SmallString<128> KernelName; + getNameWithPrefix(KernelName, &MF->getFunction()); + getTargetStreamer()->EmitAmdhsaKernelDescriptor( + KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo)); +} + void AMDGPUAsmPrinter::EmitFunctionEntryLabel() { + if (IsaInfo::hasCodeObjectV3(getSTI()) && + TM.getTargetTriple().getOS() == Triple::AMDHSA) { + AsmPrinter::EmitFunctionEntryLabel(); + return; + } + const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>(); if (MFI->isEntryFunction() && STM.isAmdCodeObjectV2(MF->getFunction())) { @@ -288,6 +315,70 @@ void AMDGPUAsmPrinter::emitCommonFunctionComments( false); } +uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties( + const MachineFunction &MF) const { + const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); + uint16_t KernelCodeProperties = 0; + + if (MFI.hasPrivateSegmentBuffer()) { + KernelCodeProperties |= + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER; + } + if (MFI.hasDispatchPtr()) { + KernelCodeProperties |= + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; + } + if (MFI.hasQueuePtr()) { + KernelCodeProperties |= + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR; + } + if (MFI.hasKernargSegmentPtr()) { + KernelCodeProperties |= + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR; + } + if (MFI.hasDispatchID()) { + KernelCodeProperties |= + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID; + } + if (MFI.hasFlatScratchInit()) { + KernelCodeProperties |= + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT; + } + if (MFI.hasGridWorkgroupCountX()) { + KernelCodeProperties |= + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X; + } + if (MFI.hasGridWorkgroupCountY()) { + KernelCodeProperties |= + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y; + } + if (MFI.hasGridWorkgroupCountZ()) { + KernelCodeProperties |= + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z; + } + + return KernelCodeProperties; +} + +amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor( + const MachineFunction &MF, + const SIProgramInfo &PI) const { + amdhsa::kernel_descriptor_t KernelDescriptor; + memset(&KernelDescriptor, 0x0, sizeof(KernelDescriptor)); + + assert(isUInt<32>(PI.ScratchSize)); + assert(isUInt<32>(PI.ComputePGMRSrc1)); + assert(isUInt<32>(PI.ComputePGMRSrc2)); + + KernelDescriptor.group_segment_fixed_size = PI.LDSSize; + KernelDescriptor.private_segment_fixed_size = PI.ScratchSize; + KernelDescriptor.compute_pgm_rsrc1 = PI.ComputePGMRSrc1; + KernelDescriptor.compute_pgm_rsrc2 = PI.ComputePGMRSrc2; + KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF); + + return KernelDescriptor; +} + bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { CurrentProgramInfo = SIProgramInfo(); |