summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp12
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp5
3 files changed, 13 insertions, 5 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
index 6f50fca8831..bcc0e77a545 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -20,6 +20,7 @@ class AMDGPUMachineFunction : public MachineFunctionInfo {
/// local memory space.
SmallDenseMap<const GlobalValue *, unsigned, 4> LocalMemoryObjects;
+protected:
uint64_t KernArgSize;
unsigned MaxKernArgAlign;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index a52b1137203..97fc6493b95 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -414,12 +414,16 @@ bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const {
unsigned SISubtarget::getKernArgSegmentSize(const Function &F,
unsigned ExplicitArgBytes) const {
+ uint64_t TotalSize = ExplicitArgBytes;
unsigned ImplicitBytes = getImplicitArgNumBytes(F);
- if (ImplicitBytes == 0)
- return ExplicitArgBytes;
- unsigned Alignment = getAlignmentForImplicitArgPtr();
- return alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;
+ if (ImplicitBytes != 0) {
+ unsigned Alignment = getAlignmentForImplicitArgPtr();
+ TotalSize = alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;
+ }
+
+ // Being able to dereference past the end is useful for emitting scalar loads.
+ return alignTo(TotalSize, 4);
}
unsigned SISubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 495a8534865..61b6cb33fd1 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -71,8 +71,11 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
ImplicitArgPtr = true;
} else {
- if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
+ if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) {
KernargSegmentPtr = true;
+ assert(MaxKernArgAlign == 0);
+ MaxKernArgAlign = ST.getAlignmentForImplicitArgPtr();
+ }
}
CallingConv::ID CC = F.getCallingConv();
OpenPOWER on IntegriCloud