diff options
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 9 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 14 |
3 files changed, 25 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index f2b9fc8477d..7f0d5141f36 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -730,7 +730,8 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF, header.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED; // FIXME: Should use getKernArgSize - header.kernarg_segment_byte_size = MFI->getABIArgOffset(); + header.kernarg_segment_byte_size = + STM.getKernArgSegmentSize(MFI->getABIArgOffset()); header.wavefront_sgpr_count = KernelInfo.NumSGPR; header.workitem_vgpr_count = KernelInfo.NumVGPR; header.workitem_private_segment_byte_size = KernelInfo.ScratchSize; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 3c4b5e72761..586d0478a5d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -297,6 +297,15 @@ bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const { return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv()); } +unsigned SISubtarget::getKernArgSegmentSize(unsigned ExplicitArgBytes) const { + unsigned ImplicitBytes = getImplicitArgNumBytes(); + if (ImplicitBytes == 0) + return ExplicitArgBytes; + + unsigned Alignment = getAlignmentForImplicitArgPtr(); + return alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes; +} + unsigned SISubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const { if (getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { if (SGPRs <= 80) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 1e52e1c8063..c278cc55a02 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -142,6 +142,10 @@ public: return TargetTriple.getOS() == Triple::Mesa3D; } + bool isOpenCLEnv() const { + return TargetTriple.getEnvironment() == Triple::OpenCL; + } + Generation getGeneration() const { return Gen; } @@ -288,6 +292,14 @@ public: return isAmdHsaOS() ? 8 : 4; } + unsigned getImplicitArgNumBytes() const { + if (isMesa3DOS()) + return 16; + if (isAmdHsaOS() && isOpenCLEnv()) + return 32; + return 0; + } + unsigned getStackAlignment() const { // Scratch is allocated in 256 dword per wave blocks. return 4 * 256 / getWavefrontSize(); @@ -521,6 +533,8 @@ public: return SGPRInitBug; } + unsigned getKernArgSegmentSize(unsigned ExplictArgBytes) const; + /// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const; |