diff options
author | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2019-10-02 00:26:58 +0000 |
---|---|---|
committer | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2019-10-02 00:26:58 +0000 |
commit | 075bc48a7f4c78d72530ae1d7fdde7f97b74d6a0 (patch) | |
tree | 7348d19613f79cda67688450e6db9384b1b2b57f /llvm/lib | |
parent | d838cf76caca4636ad4884c77a9a6ff0616c1d97 (diff) | |
download | bcm5719-llvm-075bc48a7f4c78d72530ae1d7fdde7f97b74d6a0.tar.gz bcm5719-llvm-075bc48a7f4c78d72530ae1d7fdde7f97b74d6a0.zip |
[AMDGPU] separate accounting for agprs
Account and report agprs separately on gfx908. Other targets
do not change the reporting.
Differential Revision: https://reviews.llvm.org/D68307
llvm-svn: 373411
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 53 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIProgramInfo.h | 2 |
3 files changed, 52 insertions, 7 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 694ff52da10..b83cc7f2528 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -342,6 +342,8 @@ bool AMDGPUAsmPrinter::doFinalization(Module &M) { // Print comments that apply to both callable functions and entry points. void AMDGPUAsmPrinter::emitCommonFunctionComments( uint32_t NumVGPR, + Optional<uint32_t> NumAGPR, + uint32_t TotalNumVGPR, uint32_t NumSGPR, uint64_t ScratchSize, uint64_t CodeSize, @@ -349,6 +351,11 @@ void AMDGPUAsmPrinter::emitCommonFunctionComments( OutStreamer->emitRawComment(" codeLenInByte = " + Twine(CodeSize), false); OutStreamer->emitRawComment(" NumSgprs: " + Twine(NumSGPR), false); OutStreamer->emitRawComment(" NumVgprs: " + Twine(NumVGPR), false); + if (NumAGPR) { + OutStreamer->emitRawComment(" NumAgprs: " + Twine(*NumAGPR), false); + OutStreamer->emitRawComment(" TotalNumVgprs: " + Twine(TotalNumVGPR), + false); + } OutStreamer->emitRawComment(" ScratchSize: " + Twine(ScratchSize), false); OutStreamer->emitRawComment(" MemoryBound: " + Twine(MFI->isMemoryBound()), false); @@ -474,6 +481,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { SIFunctionResourceInfo &Info = CallGraphResourceInfo[&MF.getFunction()]; emitCommonFunctionComments( Info.NumVGPR, + STM.hasMAIInsts() ? Info.NumAGPR : Optional<uint32_t>(), + Info.getTotalNumVGPRs(STM), Info.getTotalNumSGPRs(MF.getSubtarget<GCNSubtarget>()), Info.PrivateSegmentSize, getFunctionCodeSize(MF), MFI); @@ -481,7 +490,11 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { } OutStreamer->emitRawComment(" Kernel info:", false); - emitCommonFunctionComments(CurrentProgramInfo.NumVGPR, + emitCommonFunctionComments(CurrentProgramInfo.NumArchVGPR, + STM.hasMAIInsts() + ? CurrentProgramInfo.NumAccVGPR + : Optional<uint32_t>(), + CurrentProgramInfo.NumVGPR, CurrentProgramInfo.NumSGPR, CurrentProgramInfo.ScratchSize, getFunctionCodeSize(MF), MFI); @@ -592,6 +605,11 @@ int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs( UsesVCC, UsesFlatScratch); } +int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumVGPRs( + const GCNSubtarget &ST) const { + return std::max(NumVGPR, NumAGPR); +} + AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( const MachineFunction &MF) const { SIFunctionResourceInfo Info; @@ -638,11 +656,18 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( HighestVGPRReg = Reg; break; } - MCPhysReg AReg = AMDGPU::AGPR0 + TRI.getHWRegIndex(Reg); - if (MRI.isPhysRegUsed(AReg)) { - HighestVGPRReg = AReg; - break; + } + + if (ST.hasMAIInsts()) { + MCPhysReg HighestAGPRReg = AMDGPU::NoRegister; + for (MCPhysReg Reg : reverse(AMDGPU::AGPR_32RegClass.getRegisters())) { + if (MRI.isPhysRegUsed(Reg)) { + HighestAGPRReg = Reg; + break; + } } + Info.NumAGPR = HighestAGPRReg == AMDGPU::NoRegister ? 0 : + TRI.getHWRegIndex(HighestAGPRReg) + 1; } MCPhysReg HighestSGPRReg = AMDGPU::NoRegister; @@ -664,6 +689,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( } int32_t MaxVGPR = -1; + int32_t MaxAGPR = -1; int32_t MaxSGPR = -1; uint64_t CalleeFrameSize = 0; @@ -673,6 +699,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( for (const MachineOperand &MO : MI.operands()) { unsigned Width = 0; bool IsSGPR = false; + bool IsAGPR = false; if (!MO.isReg()) continue; @@ -748,6 +775,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( Width = 1; } else if (AMDGPU::AGPR_32RegClass.contains(Reg)) { IsSGPR = false; + IsAGPR = true; Width = 1; } else if (AMDGPU::SReg_64RegClass.contains(Reg)) { assert(!AMDGPU::TTMP_64RegClass.contains(Reg) && @@ -759,6 +787,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( Width = 2; } else if (AMDGPU::AReg_64RegClass.contains(Reg)) { IsSGPR = false; + IsAGPR = true; Width = 2; } else if (AMDGPU::VReg_96RegClass.contains(Reg)) { IsSGPR = false; @@ -775,6 +804,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( Width = 4; } else if (AMDGPU::AReg_128RegClass.contains(Reg)) { IsSGPR = false; + IsAGPR = true; Width = 4; } else if (AMDGPU::SReg_256RegClass.contains(Reg)) { assert(!AMDGPU::TTMP_256RegClass.contains(Reg) && @@ -794,6 +824,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( Width = 16; } else if (AMDGPU::AReg_512RegClass.contains(Reg)) { IsSGPR = false; + IsAGPR = true; Width = 16; } else if (AMDGPU::SReg_1024RegClass.contains(Reg)) { IsSGPR = true; @@ -803,6 +834,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( Width = 32; } else if (AMDGPU::AReg_1024RegClass.contains(Reg)) { IsSGPR = false; + IsAGPR = true; Width = 32; } else { llvm_unreachable("Unknown register class"); @@ -811,6 +843,8 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( int MaxUsed = HWReg + Width - 1; if (IsSGPR) { MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR; + } else if (IsAGPR) { + MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR; } else { MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR; } @@ -832,6 +866,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( 47 - IsaInfo::getNumExtraSGPRs(&ST, true, ST.hasFlatAddressSpace()); MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess); MaxVGPR = std::max(MaxVGPR, 23); + MaxAGPR = std::max(MaxAGPR, 23); CalleeFrameSize = std::max(CalleeFrameSize, UINT64_C(16384)); Info.UsesVCC = true; @@ -856,6 +891,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR); MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR); + MaxAGPR = std::max(I->second.NumAGPR - 1, MaxAGPR); CalleeFrameSize = std::max(I->second.PrivateSegmentSize, CalleeFrameSize); Info.UsesVCC |= I->second.UsesVCC; @@ -872,6 +908,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( Info.NumExplicitSGPR = MaxSGPR + 1; Info.NumVGPR = MaxVGPR + 1; + Info.NumAGPR = MaxAGPR + 1; Info.PrivateSegmentSize += CalleeFrameSize; return Info; @@ -880,8 +917,11 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, const MachineFunction &MF) { SIFunctionResourceInfo Info = analyzeResourceUsage(MF); + const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); - ProgInfo.NumVGPR = Info.NumVGPR; + ProgInfo.NumArchVGPR = Info.NumVGPR; + ProgInfo.NumAccVGPR = Info.NumAGPR; + ProgInfo.NumVGPR = Info.getTotalNumVGPRs(STM); ProgInfo.NumSGPR = Info.NumExplicitSGPR; ProgInfo.ScratchSize = Info.PrivateSegmentSize; ProgInfo.VCCUsed = Info.UsesVCC; @@ -894,7 +934,6 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, MF.getFunction().getContext().diagnose(DiagStackSize); } - const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); // TODO(scott.linder): The calculations related to SGPR/VGPR blocks are diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index 959104dbc35..c50c19a4609 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -43,6 +43,7 @@ private: // Track the number of explicitly used VGPRs. Special registers reserved at // the end are tracked separately. int32_t NumVGPR = 0; + int32_t NumAGPR = 0; int32_t NumExplicitSGPR = 0; uint64_t PrivateSegmentSize = 0; bool UsesVCC = false; @@ -51,6 +52,7 @@ private: bool HasRecursion = false; int32_t getTotalNumSGPRs(const GCNSubtarget &ST) const; + int32_t getTotalNumVGPRs(const GCNSubtarget &ST) const; }; SIProgramInfo CurrentProgramInfo; @@ -77,6 +79,8 @@ private: void EmitPALMetadata(const MachineFunction &MF, const SIProgramInfo &KernelInfo); void emitCommonFunctionComments(uint32_t NumVGPR, + Optional<uint32_t> NumAGPR, + uint32_t TotalNumVGPR, uint32_t NumSGPR, uint64_t ScratchSize, uint64_t CodeSize, diff --git a/llvm/lib/Target/AMDGPU/SIProgramInfo.h b/llvm/lib/Target/AMDGPU/SIProgramInfo.h index 94ebe693feb..7c039a54b57 100644 --- a/llvm/lib/Target/AMDGPU/SIProgramInfo.h +++ b/llvm/lib/Target/AMDGPU/SIProgramInfo.h @@ -41,6 +41,8 @@ struct SIProgramInfo { uint64_t ComputePGMRSrc2 = 0; uint32_t NumVGPR = 0; + uint32_t NumArchVGPR = 0; + uint32_t NumAccVGPR = 0; uint32_t NumSGPR = 0; uint32_t LDSSize = 0; bool FlatUsed = false; |