summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2019-10-02 00:26:58 +0000
committerStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2019-10-02 00:26:58 +0000
commit075bc48a7f4c78d72530ae1d7fdde7f97b74d6a0 (patch)
tree7348d19613f79cda67688450e6db9384b1b2b57f /llvm/lib
parentd838cf76caca4636ad4884c77a9a6ff0616c1d97 (diff)
downloadbcm5719-llvm-075bc48a7f4c78d72530ae1d7fdde7f97b74d6a0.tar.gz
bcm5719-llvm-075bc48a7f4c78d72530ae1d7fdde7f97b74d6a0.zip
[AMDGPU] separate accounting for agprs
Account and report agprs separately on gfx908. Other targets do not change the reporting. Differential Revision: https://reviews.llvm.org/D68307 llvm-svn: 373411
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp53
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h4
-rw-r--r--llvm/lib/Target/AMDGPU/SIProgramInfo.h2
3 files changed, 52 insertions, 7 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 694ff52da10..b83cc7f2528 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -342,6 +342,8 @@ bool AMDGPUAsmPrinter::doFinalization(Module &M) {
// Print comments that apply to both callable functions and entry points.
void AMDGPUAsmPrinter::emitCommonFunctionComments(
uint32_t NumVGPR,
+ Optional<uint32_t> NumAGPR,
+ uint32_t TotalNumVGPR,
uint32_t NumSGPR,
uint64_t ScratchSize,
uint64_t CodeSize,
@@ -349,6 +351,11 @@ void AMDGPUAsmPrinter::emitCommonFunctionComments(
OutStreamer->emitRawComment(" codeLenInByte = " + Twine(CodeSize), false);
OutStreamer->emitRawComment(" NumSgprs: " + Twine(NumSGPR), false);
OutStreamer->emitRawComment(" NumVgprs: " + Twine(NumVGPR), false);
+ if (NumAGPR) {
+ OutStreamer->emitRawComment(" NumAgprs: " + Twine(*NumAGPR), false);
+ OutStreamer->emitRawComment(" TotalNumVgprs: " + Twine(TotalNumVGPR),
+ false);
+ }
OutStreamer->emitRawComment(" ScratchSize: " + Twine(ScratchSize), false);
OutStreamer->emitRawComment(" MemoryBound: " + Twine(MFI->isMemoryBound()),
false);
@@ -474,6 +481,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
SIFunctionResourceInfo &Info = CallGraphResourceInfo[&MF.getFunction()];
emitCommonFunctionComments(
Info.NumVGPR,
+ STM.hasMAIInsts() ? Info.NumAGPR : Optional<uint32_t>(),
+ Info.getTotalNumVGPRs(STM),
Info.getTotalNumSGPRs(MF.getSubtarget<GCNSubtarget>()),
Info.PrivateSegmentSize,
getFunctionCodeSize(MF), MFI);
@@ -481,7 +490,11 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
}
OutStreamer->emitRawComment(" Kernel info:", false);
- emitCommonFunctionComments(CurrentProgramInfo.NumVGPR,
+ emitCommonFunctionComments(CurrentProgramInfo.NumArchVGPR,
+ STM.hasMAIInsts()
+ ? CurrentProgramInfo.NumAccVGPR
+ : Optional<uint32_t>(),
+ CurrentProgramInfo.NumVGPR,
CurrentProgramInfo.NumSGPR,
CurrentProgramInfo.ScratchSize,
getFunctionCodeSize(MF), MFI);
@@ -592,6 +605,11 @@ int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs(
UsesVCC, UsesFlatScratch);
}
+int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumVGPRs(
+ const GCNSubtarget &ST) const {
+ return std::max(NumVGPR, NumAGPR);
+}
+
AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
const MachineFunction &MF) const {
SIFunctionResourceInfo Info;
@@ -638,11 +656,18 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
HighestVGPRReg = Reg;
break;
}
- MCPhysReg AReg = AMDGPU::AGPR0 + TRI.getHWRegIndex(Reg);
- if (MRI.isPhysRegUsed(AReg)) {
- HighestVGPRReg = AReg;
- break;
+ }
+
+ if (ST.hasMAIInsts()) {
+ MCPhysReg HighestAGPRReg = AMDGPU::NoRegister;
+ for (MCPhysReg Reg : reverse(AMDGPU::AGPR_32RegClass.getRegisters())) {
+ if (MRI.isPhysRegUsed(Reg)) {
+ HighestAGPRReg = Reg;
+ break;
+ }
}
+ Info.NumAGPR = HighestAGPRReg == AMDGPU::NoRegister ? 0 :
+ TRI.getHWRegIndex(HighestAGPRReg) + 1;
}
MCPhysReg HighestSGPRReg = AMDGPU::NoRegister;
@@ -664,6 +689,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
}
int32_t MaxVGPR = -1;
+ int32_t MaxAGPR = -1;
int32_t MaxSGPR = -1;
uint64_t CalleeFrameSize = 0;
@@ -673,6 +699,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
for (const MachineOperand &MO : MI.operands()) {
unsigned Width = 0;
bool IsSGPR = false;
+ bool IsAGPR = false;
if (!MO.isReg())
continue;
@@ -748,6 +775,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
Width = 1;
} else if (AMDGPU::AGPR_32RegClass.contains(Reg)) {
IsSGPR = false;
+ IsAGPR = true;
Width = 1;
} else if (AMDGPU::SReg_64RegClass.contains(Reg)) {
assert(!AMDGPU::TTMP_64RegClass.contains(Reg) &&
@@ -759,6 +787,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
Width = 2;
} else if (AMDGPU::AReg_64RegClass.contains(Reg)) {
IsSGPR = false;
+ IsAGPR = true;
Width = 2;
} else if (AMDGPU::VReg_96RegClass.contains(Reg)) {
IsSGPR = false;
@@ -775,6 +804,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
Width = 4;
} else if (AMDGPU::AReg_128RegClass.contains(Reg)) {
IsSGPR = false;
+ IsAGPR = true;
Width = 4;
} else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
assert(!AMDGPU::TTMP_256RegClass.contains(Reg) &&
@@ -794,6 +824,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
Width = 16;
} else if (AMDGPU::AReg_512RegClass.contains(Reg)) {
IsSGPR = false;
+ IsAGPR = true;
Width = 16;
} else if (AMDGPU::SReg_1024RegClass.contains(Reg)) {
IsSGPR = true;
@@ -803,6 +834,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
Width = 32;
} else if (AMDGPU::AReg_1024RegClass.contains(Reg)) {
IsSGPR = false;
+ IsAGPR = true;
Width = 32;
} else {
llvm_unreachable("Unknown register class");
@@ -811,6 +843,8 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
int MaxUsed = HWReg + Width - 1;
if (IsSGPR) {
MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
+ } else if (IsAGPR) {
+ MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR;
} else {
MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
}
@@ -832,6 +866,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
47 - IsaInfo::getNumExtraSGPRs(&ST, true, ST.hasFlatAddressSpace());
MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess);
MaxVGPR = std::max(MaxVGPR, 23);
+ MaxAGPR = std::max(MaxAGPR, 23);
CalleeFrameSize = std::max(CalleeFrameSize, UINT64_C(16384));
Info.UsesVCC = true;
@@ -856,6 +891,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR);
MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR);
+ MaxAGPR = std::max(I->second.NumAGPR - 1, MaxAGPR);
CalleeFrameSize
= std::max(I->second.PrivateSegmentSize, CalleeFrameSize);
Info.UsesVCC |= I->second.UsesVCC;
@@ -872,6 +908,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
Info.NumExplicitSGPR = MaxSGPR + 1;
Info.NumVGPR = MaxVGPR + 1;
+ Info.NumAGPR = MaxAGPR + 1;
Info.PrivateSegmentSize += CalleeFrameSize;
return Info;
@@ -880,8 +917,11 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
const MachineFunction &MF) {
SIFunctionResourceInfo Info = analyzeResourceUsage(MF);
+ const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
- ProgInfo.NumVGPR = Info.NumVGPR;
+ ProgInfo.NumArchVGPR = Info.NumVGPR;
+ ProgInfo.NumAccVGPR = Info.NumAGPR;
+ ProgInfo.NumVGPR = Info.getTotalNumVGPRs(STM);
ProgInfo.NumSGPR = Info.NumExplicitSGPR;
ProgInfo.ScratchSize = Info.PrivateSegmentSize;
ProgInfo.VCCUsed = Info.UsesVCC;
@@ -894,7 +934,6 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
MF.getFunction().getContext().diagnose(DiagStackSize);
}
- const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
// TODO(scott.linder): The calculations related to SGPR/VGPR blocks are
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index 959104dbc35..c50c19a4609 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -43,6 +43,7 @@ private:
// Track the number of explicitly used VGPRs. Special registers reserved at
// the end are tracked separately.
int32_t NumVGPR = 0;
+ int32_t NumAGPR = 0;
int32_t NumExplicitSGPR = 0;
uint64_t PrivateSegmentSize = 0;
bool UsesVCC = false;
@@ -51,6 +52,7 @@ private:
bool HasRecursion = false;
int32_t getTotalNumSGPRs(const GCNSubtarget &ST) const;
+ int32_t getTotalNumVGPRs(const GCNSubtarget &ST) const;
};
SIProgramInfo CurrentProgramInfo;
@@ -77,6 +79,8 @@ private:
void EmitPALMetadata(const MachineFunction &MF,
const SIProgramInfo &KernelInfo);
void emitCommonFunctionComments(uint32_t NumVGPR,
+ Optional<uint32_t> NumAGPR,
+ uint32_t TotalNumVGPR,
uint32_t NumSGPR,
uint64_t ScratchSize,
uint64_t CodeSize,
diff --git a/llvm/lib/Target/AMDGPU/SIProgramInfo.h b/llvm/lib/Target/AMDGPU/SIProgramInfo.h
index 94ebe693feb..7c039a54b57 100644
--- a/llvm/lib/Target/AMDGPU/SIProgramInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIProgramInfo.h
@@ -41,6 +41,8 @@ struct SIProgramInfo {
uint64_t ComputePGMRSrc2 = 0;
uint32_t NumVGPR = 0;
+ uint32_t NumArchVGPR = 0;
+ uint32_t NumAccVGPR = 0;
uint32_t NumSGPR = 0;
uint32_t LDSSize = 0;
bool FlatUsed = false;
OpenPOWER on IntegriCloud