diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 74 |
1 files changed, 39 insertions, 35 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 3efc564c855..98b49070fa9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -209,7 +209,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, FeatureDisable(false), InstrInfo(initializeSubtargetDependencies(TT, GPU, FS)), - TLInfo(TM, *this), + TLInfo(TM, *this), FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0) { AS = AMDGPU::getAMDGPUAS(TT); CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering())); @@ -406,6 +406,44 @@ bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const { return true; } +uint64_t AMDGPUSubtarget::getExplicitKernArgSize(const Function &F, + unsigned &MaxAlign) const { + assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL || + F.getCallingConv() == CallingConv::SPIR_KERNEL); + + const DataLayout &DL = F.getParent()->getDataLayout(); + uint64_t ExplicitArgBytes = 0; + MaxAlign = 1; + + for (const Argument &Arg : F.args()) { + Type *ArgTy = Arg.getType(); + + unsigned Align = DL.getABITypeAlignment(ArgTy); + uint64_t AllocSize = DL.getTypeAllocSize(ArgTy); + ExplicitArgBytes = alignTo(ExplicitArgBytes, Align) + AllocSize; + MaxAlign = std::max(MaxAlign, Align); + } + + return ExplicitArgBytes; +} + +unsigned AMDGPUSubtarget::getKernArgSegmentSize(const Function &F, + unsigned &MaxAlign) const { + uint64_t ExplicitArgBytes = getExplicitKernArgSize(F, MaxAlign); + + unsigned ExplicitOffset = getExplicitKernelArgOffset(F); + + uint64_t TotalSize = ExplicitOffset + ExplicitArgBytes; + unsigned ImplicitBytes = getImplicitArgNumBytes(F); + if (ImplicitBytes != 0) { + unsigned Alignment = getAlignmentForImplicitArgPtr(); + TotalSize = alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes; + } + + // Being able to dereference past the end is useful for emitting scalar loads. + return alignTo(TotalSize, 4); +} + R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS, const TargetMachine &TM) : R600GenSubtargetInfo(TT, GPU, FS), @@ -446,40 +484,6 @@ bool GCNSubtarget::isVGPRSpillingEnabled(const Function& F) const { return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv()); } -uint64_t GCNSubtarget::getExplicitKernArgSize(const Function &F) const { - assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL); - - const DataLayout &DL = F.getParent()->getDataLayout(); - uint64_t ExplicitArgBytes = 0; - for (const Argument &Arg : F.args()) { - Type *ArgTy = Arg.getType(); - - unsigned Align = DL.getABITypeAlignment(ArgTy); - uint64_t AllocSize = DL.getTypeAllocSize(ArgTy); - ExplicitArgBytes = alignTo(ExplicitArgBytes, Align) + AllocSize; - } - - return ExplicitArgBytes; -} - -unsigned GCNSubtarget::getKernArgSegmentSize(const Function &F, - int64_t ExplicitArgBytes) const { - if (ExplicitArgBytes == -1) - ExplicitArgBytes = getExplicitKernArgSize(F); - - unsigned ExplicitOffset = getExplicitKernelArgOffset(F); - - uint64_t TotalSize = ExplicitOffset + ExplicitArgBytes; - unsigned ImplicitBytes = getImplicitArgNumBytes(F); - if (ImplicitBytes != 0) { - unsigned Alignment = getAlignmentForImplicitArgPtr(); - TotalSize = alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes; - } - - // Being able to dereference past the end is useful for emitting scalar loads. - return alignTo(TotalSize, 4); -} - unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const { if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { if (SGPRs <= 80) |