diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 12 |
1 files changed, 8 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index a52b1137203..97fc6493b95 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -414,12 +414,16 @@ bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const { unsigned SISubtarget::getKernArgSegmentSize(const Function &F, unsigned ExplicitArgBytes) const { + uint64_t TotalSize = ExplicitArgBytes; unsigned ImplicitBytes = getImplicitArgNumBytes(F); - if (ImplicitBytes == 0) - return ExplicitArgBytes; - unsigned Alignment = getAlignmentForImplicitArgPtr(); - return alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes; + if (ImplicitBytes != 0) { + unsigned Alignment = getAlignmentForImplicitArgPtr(); + TotalSize = alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes; + } + + // Being able to dereference past the end is useful for emitting scalar loads. + return alignTo(TotalSize, 4); } unsigned SISubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const { |