summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp12
1 files changed, 8 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index a52b1137203..97fc6493b95 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -414,12 +414,16 @@ bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const {
unsigned SISubtarget::getKernArgSegmentSize(const Function &F,
unsigned ExplicitArgBytes) const {
+ uint64_t TotalSize = ExplicitArgBytes;
unsigned ImplicitBytes = getImplicitArgNumBytes(F);
- if (ImplicitBytes == 0)
- return ExplicitArgBytes;
- unsigned Alignment = getAlignmentForImplicitArgPtr();
- return alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;
+ if (ImplicitBytes != 0) {
+ unsigned Alignment = getAlignmentForImplicitArgPtr();
+ TotalSize = alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;
+ }
+
+ // Being able to dereference past the end is useful for emitting scalar loads.
+ return alignTo(TotalSize, 4);
}
unsigned SISubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
OpenPOWER on IntegriCloud