summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2016-09-23 01:33:26 +0000
committerTom Stellard <thomas.stellard@amd.com>2016-09-23 01:33:26 +0000
commite88bbc34c6c13b1ac1a76183afa04b327fc1abc0 (patch)
tree9e5f0b123f2175b363a8fea9e3ea8231184b5b21 /llvm/lib/Target
parent87a598e19f506885caf08f8d8763a753ebf758d2 (diff)
downloadbcm5719-llvm-e88bbc34c6c13b1ac1a76183afa04b327fc1abc0.tar.gz
bcm5719-llvm-e88bbc34c6c13b1ac1a76183afa04b327fc1abc0.zip
AMDGPU/SI: Include implicit arguments in kernarg_segment_byte_size
Reviewers: arsenm Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, llvm-commits, tony-tye Differential Revision: https://reviews.llvm.org/D24835 llvm-svn: 282223
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp9
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h14
3 files changed, 25 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index f2b9fc8477d..7f0d5141f36 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -730,7 +730,8 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
header.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
// FIXME: Should use getKernArgSize
- header.kernarg_segment_byte_size = MFI->getABIArgOffset();
+ header.kernarg_segment_byte_size =
+ STM.getKernArgSegmentSize(MFI->getABIArgOffset());
header.wavefront_sgpr_count = KernelInfo.NumSGPR;
header.workitem_vgpr_count = KernelInfo.NumVGPR;
header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 3c4b5e72761..586d0478a5d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -297,6 +297,15 @@ bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const {
return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv());
}
+unsigned SISubtarget::getKernArgSegmentSize(unsigned ExplicitArgBytes) const {
+ unsigned ImplicitBytes = getImplicitArgNumBytes();
+ if (ImplicitBytes == 0)
+ return ExplicitArgBytes;
+
+ unsigned Alignment = getAlignmentForImplicitArgPtr();
+ return alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;
+}
+
unsigned SISubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
if (getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
if (SGPRs <= 80)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 1e52e1c8063..c278cc55a02 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -142,6 +142,10 @@ public:
return TargetTriple.getOS() == Triple::Mesa3D;
}
+ bool isOpenCLEnv() const {
+ return TargetTriple.getEnvironment() == Triple::OpenCL;
+ }
+
Generation getGeneration() const {
return Gen;
}
@@ -288,6 +292,14 @@ public:
return isAmdHsaOS() ? 8 : 4;
}
+ unsigned getImplicitArgNumBytes() const {
+ if (isMesa3DOS())
+ return 16;
+ if (isAmdHsaOS() && isOpenCLEnv())
+ return 32;
+ return 0;
+ }
+
unsigned getStackAlignment() const {
// Scratch is allocated in 256 dword per wave blocks.
return 4 * 256 / getWavefrontSize();
@@ -521,6 +533,8 @@ public:
return SGPRInitBug;
}
+ unsigned getKernArgSegmentSize(unsigned ExplictArgBytes) const;
+
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
OpenPOWER on IntegriCloud