summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2017-07-18 16:44:58 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2017-07-18 16:44:58 +0000
commit254ad3de5cd433df78d44503f48c52102e17db7d (patch)
tree3a68a0d2906663cfc48803fc963e42ae990125ec /llvm/lib/Target/AMDGPU
parent1cc47f8413b38f075bbc7a1e0e38ead00700efdf (diff)
downloadbcm5719-llvm-254ad3de5cd433df78d44503f48c52102e17db7d.tar.gz
bcm5719-llvm-254ad3de5cd433df78d44503f48c52102e17db7d.zip
AMDGPU: Annotate necessity of flat-scratch-init
As an approximation of the existing handling to avoid regressions. Fixes using too many registers with calls on subtargets with the SGPR allocation bug. llvm-svn: 308326
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp19
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp15
3 files changed, 28 insertions, 11 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
index 66249276cd8..c68e5861ff2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
@@ -201,11 +201,14 @@ static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
}
bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
- bool HasApertureRegs = TM->getSubtarget<AMDGPUSubtarget>(F).hasApertureRegs();
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
+ bool HasFlat = ST.hasFlatAddressSpace();
+ bool HasApertureRegs = ST.hasApertureRegs();
SmallPtrSet<const Constant *, 8> ConstantExprVisited;
bool Changed = false;
bool NeedQueuePtr = false;
+ bool HaveCall = false;
bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv());
for (BasicBlock &BB : F) {
@@ -215,11 +218,15 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
Function *Callee = CS.getCalledFunction();
// TODO: Do something with indirect calls.
- if (!Callee)
+ if (!Callee) {
+ if (!CS.isInlineAsm())
+ HaveCall = true;
continue;
+ }
Intrinsic::ID IID = Callee->getIntrinsicID();
if (IID == Intrinsic::not_intrinsic) {
+ HaveCall = true;
copyFeaturesToFunction(F, *Callee, NeedQueuePtr);
Changed = true;
} else {
@@ -261,6 +268,14 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
Changed = true;
}
+ // TODO: We could refine this to captured pointers that could possibly be
+ // accessed by flat instructions. For now this is mostly a poor way of
+ // estimating whether there are calls before argument lowering.
+ if (HasFlat && !IsFunc && HaveCall) {
+ F.addFnAttr("amdgpu-flat-scratch");
+ Changed = true;
+ }
+
return Changed;
}
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index ed962ac38b8..7334781916d 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -246,7 +246,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
// this point it appears we need the setup. This part of the prolog should be
// emitted after frame indices are eliminated.
- if (MF.getFrameInfo().hasStackObjects() && MFI->hasFlatScratchInit())
+ if (MFI->hasFlatScratchInit())
emitFlatScratchInit(ST, MF, MBB);
unsigned SPReg = MFI->getStackPtrOffsetReg();
@@ -254,7 +254,8 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
assert(MRI.isReserved(SPReg) && "SPReg used but not reserved");
DebugLoc DL;
- int64_t StackSize = MF.getFrameInfo().getStackSize();
+ const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+ int64_t StackSize = FrameInfo.getStackSize();
if (StackSize == 0) {
BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::COPY), SPReg)
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 01456a124fb..a7c8166ff6d 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -152,7 +152,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
}
}
- if (ST.isAmdCodeObjectV2(MF)) {
+ bool IsCOV2 = ST.isAmdCodeObjectV2(MF);
+ if (IsCOV2) {
if (HasStackObjects || MaySpill)
PrivateSegmentBuffer = true;
@@ -172,12 +173,12 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
if (F->hasFnAttribute("amdgpu-kernarg-segment-ptr"))
KernargSegmentPtr = true;
- // We don't need to worry about accessing spills with flat instructions.
- // TODO: On VI where we must use flat for global, we should be able to omit
- // this if it is never used for generic access.
- if (HasStackObjects && ST.hasFlatAddressSpace() && ST.isAmdHsaOS() &&
- isEntryFunction())
- FlatScratchInit = true;
+ if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) {
+ // TODO: This could be refined a lot. The attribute is a poor way of
+ // detecting calls that may require it before argument lowering.
+ if (HasStackObjects || F->hasFnAttribute("amdgpu-flat-scratch"))
+ FlatScratchInit = true;
+ }
}
unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
OpenPOWER on IntegriCloud