diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-07-18 16:44:58 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-07-18 16:44:58 +0000 |
commit | 254ad3de5cd433df78d44503f48c52102e17db7d (patch) | |
tree | 3a68a0d2906663cfc48803fc963e42ae990125ec | |
parent | 1cc47f8413b38f075bbc7a1e0e38ead00700efdf (diff) | |
download | bcm5719-llvm-254ad3de5cd433df78d44503f48c52102e17db7d.tar.gz bcm5719-llvm-254ad3de5cd433df78d44503f48c52102e17db7d.zip |
AMDGPU: Annotate necessity of flat-scratch-init
As an approximation of the existing handling to avoid
regressions. Fixes using too many registers with calls
on subtargets with the SGPR allocation bug.
llvm-svn: 308326
4 files changed, 69 insertions, 11 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp index 66249276cd8..c68e5861ff2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -201,11 +201,14 @@ static void copyFeaturesToFunction(Function &Parent, const Function &Callee, } bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { - bool HasApertureRegs = TM->getSubtarget<AMDGPUSubtarget>(F).hasApertureRegs(); + const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F); + bool HasFlat = ST.hasFlatAddressSpace(); + bool HasApertureRegs = ST.hasApertureRegs(); SmallPtrSet<const Constant *, 8> ConstantExprVisited; bool Changed = false; bool NeedQueuePtr = false; + bool HaveCall = false; bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv()); for (BasicBlock &BB : F) { @@ -215,11 +218,15 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { Function *Callee = CS.getCalledFunction(); // TODO: Do something with indirect calls. - if (!Callee) + if (!Callee) { + if (!CS.isInlineAsm()) + HaveCall = true; continue; + } Intrinsic::ID IID = Callee->getIntrinsicID(); if (IID == Intrinsic::not_intrinsic) { + HaveCall = true; copyFeaturesToFunction(F, *Callee, NeedQueuePtr); Changed = true; } else { @@ -261,6 +268,14 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { Changed = true; } + // TODO: We could refine this to captured pointers that could possibly be + // accessed by flat instructions. For now this is mostly a poor way of + // estimating whether there are calls before argument lowering. + if (HasFlat && !IsFunc && HaveCall) { + F.addFnAttr("amdgpu-flat-scratch"); + Changed = true; + } + return Changed; } diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index ed962ac38b8..7334781916d 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -246,7 +246,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, // this point it appears we need the setup. This part of the prolog should be // emitted after frame indices are eliminated. - if (MF.getFrameInfo().hasStackObjects() && MFI->hasFlatScratchInit()) + if (MFI->hasFlatScratchInit()) emitFlatScratchInit(ST, MF, MBB); unsigned SPReg = MFI->getStackPtrOffsetReg(); @@ -254,7 +254,8 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, assert(MRI.isReserved(SPReg) && "SPReg used but not reserved"); DebugLoc DL; - int64_t StackSize = MF.getFrameInfo().getStackSize(); + const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); + int64_t StackSize = FrameInfo.getStackSize(); if (StackSize == 0) { BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::COPY), SPReg) diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 01456a124fb..a7c8166ff6d 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -152,7 +152,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) } } - if (ST.isAmdCodeObjectV2(MF)) { + bool IsCOV2 = ST.isAmdCodeObjectV2(MF); + if (IsCOV2) { if (HasStackObjects || MaySpill) PrivateSegmentBuffer = true; @@ -172,12 +173,12 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) if (F->hasFnAttribute("amdgpu-kernarg-segment-ptr")) KernargSegmentPtr = true; - // We don't need to worry about accessing spills with flat instructions. - // TODO: On VI where we must use flat for global, we should be able to omit - // this if it is never used for generic access. - if (HasStackObjects && ST.hasFlatAddressSpace() && ST.isAmdHsaOS() && - isEntryFunction()) - FlatScratchInit = true; + if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) { + // TODO: This could be refined a lot. The attribute is a poor way of + // detecting calls that may require it before argument lowering. + if (HasStackObjects || F->hasFnAttribute("amdgpu-flat-scratch")) + FlatScratchInit = true; + } } unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll index 32bcb21279c..e9797eff712 100644 --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll @@ -250,9 +250,48 @@ define void @func_indirect_use_implicitarg_ptr() #1 { ret void } +; HSA: declare void @external.func() #15 +declare void @external.func() #3 + +; HSA: define internal void @defined.func() #15 { +define internal void @defined.func() #3 { + ret void +} + +; HSA: define void @func_call_external() #15 { +define void @func_call_external() #3 { + call void @external.func() + ret void +} + +; HSA: define void @func_call_defined() #15 { +define void @func_call_defined() #3 { + call void @defined.func() + ret void +} + +; HSA: define void @func_call_asm() #15 { +define void @func_call_asm() #3 { + call void asm sideeffect "", ""() #3 + ret void +} + +; HSA: define amdgpu_kernel void @kern_call_external() #16 { +define amdgpu_kernel void @kern_call_external() #3 { + call void @external.func() + ret void +} + +; HSA: define amdgpu_kernel void @func_kern_defined() #16 { +define amdgpu_kernel void @func_kern_defined() #3 { + call void @defined.func() + ret void +} + attributes #0 = { nounwind readnone speculatable } attributes #1 = { nounwind "target-cpu"="fiji" } attributes #2 = { nounwind "target-cpu"="gfx900" } +attributes #3 = { nounwind } ; HSA: attributes #0 = { nounwind readnone speculatable } ; HSA: attributes #1 = { nounwind "amdgpu-work-item-id-x" "target-cpu"="fiji" } @@ -269,3 +308,5 @@ attributes #2 = { nounwind "target-cpu"="gfx900" } ; HSA: attributes #12 = { nounwind "target-cpu"="gfx900" } ; HSA: attributes #13 = { nounwind "amdgpu-queue-ptr" "target-cpu"="gfx900" } ; HSA: attributes #14 = { nounwind "amdgpu-kernarg-segment-ptr" "target-cpu"="fiji" } +; HSA: attributes #15 = { nounwind } +; HSA: attributes #16 = { nounwind "amdgpu-flat-scratch" } |