diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 26 |
1 files changed, 23 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index be775a1ae6b..9ad16e0b8a6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -451,11 +451,31 @@ bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const { return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv()); } +uint64_t SISubtarget::getExplicitKernArgSize(const Function &F) const { + assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL); + + const DataLayout &DL = F.getParent()->getDataLayout(); + uint64_t ExplicitArgBytes = 0; + for (const Argument &Arg : F.args()) { + Type *ArgTy = Arg.getType(); + + unsigned Align = DL.getABITypeAlignment(ArgTy); + uint64_t AllocSize = DL.getTypeAllocSize(ArgTy); + ExplicitArgBytes = alignTo(ExplicitArgBytes, Align) + AllocSize; + } + + return ExplicitArgBytes; +} + unsigned SISubtarget::getKernArgSegmentSize(const Function &F, - unsigned ExplicitArgBytes) const { - uint64_t TotalSize = ExplicitArgBytes; - unsigned ImplicitBytes = getImplicitArgNumBytes(F); + int64_t ExplicitArgBytes) const { + if (ExplicitArgBytes == -1) + ExplicitArgBytes = getExplicitKernArgSize(F); + unsigned ExplicitOffset = getExplicitKernelArgOffset(F); + + uint64_t TotalSize = ExplicitOffset + ExplicitArgBytes; + unsigned ImplicitBytes = getImplicitArgNumBytes(F); if (ImplicitBytes != 0) { unsigned Alignment = getAlignmentForImplicitArgPtr(); TotalSize = alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes; |