diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 12 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 5 |
3 files changed, 13 insertions, 5 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h index 6f50fca8831..bcc0e77a545 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -20,6 +20,7 @@ class AMDGPUMachineFunction : public MachineFunctionInfo { /// local memory space. SmallDenseMap<const GlobalValue *, unsigned, 4> LocalMemoryObjects; +protected: uint64_t KernArgSize; unsigned MaxKernArgAlign; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index a52b1137203..97fc6493b95 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -414,12 +414,16 @@ bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const { unsigned SISubtarget::getKernArgSegmentSize(const Function &F, unsigned ExplicitArgBytes) const { + uint64_t TotalSize = ExplicitArgBytes; unsigned ImplicitBytes = getImplicitArgNumBytes(F); - if (ImplicitBytes == 0) - return ExplicitArgBytes; - unsigned Alignment = getAlignmentForImplicitArgPtr(); - return alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes; + if (ImplicitBytes != 0) { + unsigned Alignment = getAlignmentForImplicitArgPtr(); + TotalSize = alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes; + } + + // Being able to dereference past the end is useful for emitting scalar loads. + return alignTo(TotalSize, 4); } unsigned SISubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const { diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 495a8534865..61b6cb33fd1 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -71,8 +71,11 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) ImplicitArgPtr = true; } else { - if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) + if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) { KernargSegmentPtr = true; + assert(MaxKernArgAlign == 0); + MaxKernArgAlign = ST.getAlignmentForImplicitArgPtr(); + } } CallingConv::ID CC = F.getCallingConv(); |