diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2016-09-23 01:33:26 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2016-09-23 01:33:26 +0000 |
commit | e88bbc34c6c13b1ac1a76183afa04b327fc1abc0 (patch) | |
tree | 9e5f0b123f2175b363a8fea9e3ea8231184b5b21 /llvm/lib/Target | |
parent | 87a598e19f506885caf08f8d8763a753ebf758d2 (diff) | |
download | bcm5719-llvm-e88bbc34c6c13b1ac1a76183afa04b327fc1abc0.tar.gz bcm5719-llvm-e88bbc34c6c13b1ac1a76183afa04b327fc1abc0.zip |
AMDGPU/SI: Include implicit arguments in kernarg_segment_byte_size
Reviewers: arsenm
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, llvm-commits, tony-tye
Differential Revision: https://reviews.llvm.org/D24835
llvm-svn: 282223
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 9 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 14 |
3 files changed, 25 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index f2b9fc8477d..7f0d5141f36 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -730,7 +730,8 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF, header.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED; // FIXME: Should use getKernArgSize - header.kernarg_segment_byte_size = MFI->getABIArgOffset(); + header.kernarg_segment_byte_size = + STM.getKernArgSegmentSize(MFI->getABIArgOffset()); header.wavefront_sgpr_count = KernelInfo.NumSGPR; header.workitem_vgpr_count = KernelInfo.NumVGPR; header.workitem_private_segment_byte_size = KernelInfo.ScratchSize; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 3c4b5e72761..586d0478a5d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -297,6 +297,15 @@ bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const { return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv()); } +unsigned SISubtarget::getKernArgSegmentSize(unsigned ExplicitArgBytes) const { + unsigned ImplicitBytes = getImplicitArgNumBytes(); + if (ImplicitBytes == 0) + return ExplicitArgBytes; + + unsigned Alignment = getAlignmentForImplicitArgPtr(); + return alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes; +} + unsigned SISubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const { if (getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { if (SGPRs <= 80) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 1e52e1c8063..c278cc55a02 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -142,6 +142,10 @@ public: return TargetTriple.getOS() == Triple::Mesa3D; } + bool isOpenCLEnv() const { + return TargetTriple.getEnvironment() == Triple::OpenCL; + } + Generation getGeneration() const { return Gen; } @@ -288,6 +292,14 @@ public: return isAmdHsaOS() ? 8 : 4; } + unsigned getImplicitArgNumBytes() const { + if (isMesa3DOS()) + return 16; + if (isAmdHsaOS() && isOpenCLEnv()) + return 32; + return 0; + } + unsigned getStackAlignment() const { // Scratch is allocated in 256 dword per wave blocks. return 4 * 256 / getWavefrontSize(); @@ -521,6 +533,8 @@ public: return SGPRInitBug; } + unsigned getKernArgSegmentSize(unsigned ExplictArgBytes) const; + /// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const; |