summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2019-10-02 00:26:58 +0000
committerStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2019-10-02 00:26:58 +0000
commit075bc48a7f4c78d72530ae1d7fdde7f97b74d6a0 (patch)
tree7348d19613f79cda67688450e6db9384b1b2b57f
parentd838cf76caca4636ad4884c77a9a6ff0616c1d97 (diff)
downloadbcm5719-llvm-075bc48a7f4c78d72530ae1d7fdde7f97b74d6a0.tar.gz
bcm5719-llvm-075bc48a7f4c78d72530ae1d7fdde7f97b74d6a0.zip
[AMDGPU] separate accounting for agprs
Account and report agprs separately on gfx908. Other targets do not change the reporting. Differential Revision: https://reviews.llvm.org/D68307 llvm-svn: 373411
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp53
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h4
-rw-r--r--llvm/lib/Target/AMDGPU/SIProgramInfo.h2
-rw-r--r--llvm/test/CodeGen/AMDGPU/agpr-register-count.ll139
4 files changed, 181 insertions, 17 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 694ff52da10..b83cc7f2528 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -342,6 +342,8 @@ bool AMDGPUAsmPrinter::doFinalization(Module &M) {
// Print comments that apply to both callable functions and entry points.
void AMDGPUAsmPrinter::emitCommonFunctionComments(
uint32_t NumVGPR,
+ Optional<uint32_t> NumAGPR,
+ uint32_t TotalNumVGPR,
uint32_t NumSGPR,
uint64_t ScratchSize,
uint64_t CodeSize,
@@ -349,6 +351,11 @@ void AMDGPUAsmPrinter::emitCommonFunctionComments(
OutStreamer->emitRawComment(" codeLenInByte = " + Twine(CodeSize), false);
OutStreamer->emitRawComment(" NumSgprs: " + Twine(NumSGPR), false);
OutStreamer->emitRawComment(" NumVgprs: " + Twine(NumVGPR), false);
+ if (NumAGPR) {
+ OutStreamer->emitRawComment(" NumAgprs: " + Twine(*NumAGPR), false);
+ OutStreamer->emitRawComment(" TotalNumVgprs: " + Twine(TotalNumVGPR),
+ false);
+ }
OutStreamer->emitRawComment(" ScratchSize: " + Twine(ScratchSize), false);
OutStreamer->emitRawComment(" MemoryBound: " + Twine(MFI->isMemoryBound()),
false);
@@ -474,6 +481,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
SIFunctionResourceInfo &Info = CallGraphResourceInfo[&MF.getFunction()];
emitCommonFunctionComments(
Info.NumVGPR,
+ STM.hasMAIInsts() ? Info.NumAGPR : Optional<uint32_t>(),
+ Info.getTotalNumVGPRs(STM),
Info.getTotalNumSGPRs(MF.getSubtarget<GCNSubtarget>()),
Info.PrivateSegmentSize,
getFunctionCodeSize(MF), MFI);
@@ -481,7 +490,11 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
}
OutStreamer->emitRawComment(" Kernel info:", false);
- emitCommonFunctionComments(CurrentProgramInfo.NumVGPR,
+ emitCommonFunctionComments(CurrentProgramInfo.NumArchVGPR,
+ STM.hasMAIInsts()
+ ? CurrentProgramInfo.NumAccVGPR
+ : Optional<uint32_t>(),
+ CurrentProgramInfo.NumVGPR,
CurrentProgramInfo.NumSGPR,
CurrentProgramInfo.ScratchSize,
getFunctionCodeSize(MF), MFI);
@@ -592,6 +605,11 @@ int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs(
UsesVCC, UsesFlatScratch);
}
+int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumVGPRs(
+ const GCNSubtarget &ST) const {
+ return std::max(NumVGPR, NumAGPR);
+}
+
AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
const MachineFunction &MF) const {
SIFunctionResourceInfo Info;
@@ -638,11 +656,18 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
HighestVGPRReg = Reg;
break;
}
- MCPhysReg AReg = AMDGPU::AGPR0 + TRI.getHWRegIndex(Reg);
- if (MRI.isPhysRegUsed(AReg)) {
- HighestVGPRReg = AReg;
- break;
+ }
+
+ if (ST.hasMAIInsts()) {
+ MCPhysReg HighestAGPRReg = AMDGPU::NoRegister;
+ for (MCPhysReg Reg : reverse(AMDGPU::AGPR_32RegClass.getRegisters())) {
+ if (MRI.isPhysRegUsed(Reg)) {
+ HighestAGPRReg = Reg;
+ break;
+ }
}
+ Info.NumAGPR = HighestAGPRReg == AMDGPU::NoRegister ? 0 :
+ TRI.getHWRegIndex(HighestAGPRReg) + 1;
}
MCPhysReg HighestSGPRReg = AMDGPU::NoRegister;
@@ -664,6 +689,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
}
int32_t MaxVGPR = -1;
+ int32_t MaxAGPR = -1;
int32_t MaxSGPR = -1;
uint64_t CalleeFrameSize = 0;
@@ -673,6 +699,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
for (const MachineOperand &MO : MI.operands()) {
unsigned Width = 0;
bool IsSGPR = false;
+ bool IsAGPR = false;
if (!MO.isReg())
continue;
@@ -748,6 +775,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
Width = 1;
} else if (AMDGPU::AGPR_32RegClass.contains(Reg)) {
IsSGPR = false;
+ IsAGPR = true;
Width = 1;
} else if (AMDGPU::SReg_64RegClass.contains(Reg)) {
assert(!AMDGPU::TTMP_64RegClass.contains(Reg) &&
@@ -759,6 +787,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
Width = 2;
} else if (AMDGPU::AReg_64RegClass.contains(Reg)) {
IsSGPR = false;
+ IsAGPR = true;
Width = 2;
} else if (AMDGPU::VReg_96RegClass.contains(Reg)) {
IsSGPR = false;
@@ -775,6 +804,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
Width = 4;
} else if (AMDGPU::AReg_128RegClass.contains(Reg)) {
IsSGPR = false;
+ IsAGPR = true;
Width = 4;
} else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
assert(!AMDGPU::TTMP_256RegClass.contains(Reg) &&
@@ -794,6 +824,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
Width = 16;
} else if (AMDGPU::AReg_512RegClass.contains(Reg)) {
IsSGPR = false;
+ IsAGPR = true;
Width = 16;
} else if (AMDGPU::SReg_1024RegClass.contains(Reg)) {
IsSGPR = true;
@@ -803,6 +834,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
Width = 32;
} else if (AMDGPU::AReg_1024RegClass.contains(Reg)) {
IsSGPR = false;
+ IsAGPR = true;
Width = 32;
} else {
llvm_unreachable("Unknown register class");
@@ -811,6 +843,8 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
int MaxUsed = HWReg + Width - 1;
if (IsSGPR) {
MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
+ } else if (IsAGPR) {
+ MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR;
} else {
MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
}
@@ -832,6 +866,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
47 - IsaInfo::getNumExtraSGPRs(&ST, true, ST.hasFlatAddressSpace());
MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess);
MaxVGPR = std::max(MaxVGPR, 23);
+ MaxAGPR = std::max(MaxAGPR, 23);
CalleeFrameSize = std::max(CalleeFrameSize, UINT64_C(16384));
Info.UsesVCC = true;
@@ -856,6 +891,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR);
MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR);
+ MaxAGPR = std::max(I->second.NumAGPR - 1, MaxAGPR);
CalleeFrameSize
= std::max(I->second.PrivateSegmentSize, CalleeFrameSize);
Info.UsesVCC |= I->second.UsesVCC;
@@ -872,6 +908,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
Info.NumExplicitSGPR = MaxSGPR + 1;
Info.NumVGPR = MaxVGPR + 1;
+ Info.NumAGPR = MaxAGPR + 1;
Info.PrivateSegmentSize += CalleeFrameSize;
return Info;
@@ -880,8 +917,11 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
const MachineFunction &MF) {
SIFunctionResourceInfo Info = analyzeResourceUsage(MF);
+ const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
- ProgInfo.NumVGPR = Info.NumVGPR;
+ ProgInfo.NumArchVGPR = Info.NumVGPR;
+ ProgInfo.NumAccVGPR = Info.NumAGPR;
+ ProgInfo.NumVGPR = Info.getTotalNumVGPRs(STM);
ProgInfo.NumSGPR = Info.NumExplicitSGPR;
ProgInfo.ScratchSize = Info.PrivateSegmentSize;
ProgInfo.VCCUsed = Info.UsesVCC;
@@ -894,7 +934,6 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
MF.getFunction().getContext().diagnose(DiagStackSize);
}
- const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
// TODO(scott.linder): The calculations related to SGPR/VGPR blocks are
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index 959104dbc35..c50c19a4609 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -43,6 +43,7 @@ private:
// Track the number of explicitly used VGPRs. Special registers reserved at
// the end are tracked separately.
int32_t NumVGPR = 0;
+ int32_t NumAGPR = 0;
int32_t NumExplicitSGPR = 0;
uint64_t PrivateSegmentSize = 0;
bool UsesVCC = false;
@@ -51,6 +52,7 @@ private:
bool HasRecursion = false;
int32_t getTotalNumSGPRs(const GCNSubtarget &ST) const;
+ int32_t getTotalNumVGPRs(const GCNSubtarget &ST) const;
};
SIProgramInfo CurrentProgramInfo;
@@ -77,6 +79,8 @@ private:
void EmitPALMetadata(const MachineFunction &MF,
const SIProgramInfo &KernelInfo);
void emitCommonFunctionComments(uint32_t NumVGPR,
+ Optional<uint32_t> NumAGPR,
+ uint32_t TotalNumVGPR,
uint32_t NumSGPR,
uint64_t ScratchSize,
uint64_t CodeSize,
diff --git a/llvm/lib/Target/AMDGPU/SIProgramInfo.h b/llvm/lib/Target/AMDGPU/SIProgramInfo.h
index 94ebe693feb..7c039a54b57 100644
--- a/llvm/lib/Target/AMDGPU/SIProgramInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIProgramInfo.h
@@ -41,6 +41,8 @@ struct SIProgramInfo {
uint64_t ComputePGMRSrc2 = 0;
uint32_t NumVGPR = 0;
+ uint32_t NumArchVGPR = 0;
+ uint32_t NumAccVGPR = 0;
uint32_t NumSGPR = 0;
uint32_t LDSSize = 0;
bool FlatUsed = false;
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll b/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll
index c2dfbe27890..8ad231c4c91 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll
+++ b/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll
@@ -1,15 +1,134 @@
-; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
-declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32)
+; GCN-LABEL: {{^}}kernel_32_agprs:
+; GCN: .amdhsa_next_free_vgpr 32
+; GCN: NumVgprs: 9
+; GCN: NumAgprs: 32
+; GCN: TotalNumVgprs: 32
+; GCN: VGPRBlocks: 7
+; GCN: NumVGPRsForWavesPerEU: 32
+; GCN: Occupancy: 8
+define amdgpu_kernel void @kernel_32_agprs() {
+bb:
+ call void asm sideeffect "", "~{v8}" ()
+ call void asm sideeffect "", "~{a31}" ()
+ ret void
+}
+
+; GCN-LABEL: {{^}}kernel_0_agprs:
+; GCN: .amdhsa_next_free_vgpr 1
+; GCN: NumVgprs: 1
+; GCN: NumAgprs: 0
+; GCN: TotalNumVgprs: 1
+; GCN: VGPRBlocks: 0
+; GCN: NumVGPRsForWavesPerEU: 1
+; GCN: Occupancy: 10
+define amdgpu_kernel void @kernel_0_agprs() {
+bb:
+ call void asm sideeffect "", "~{v0}" ()
+ ret void
+}
+
+; GCN-LABEL: {{^}}kernel_40_vgprs:
+; GCN: .amdhsa_next_free_vgpr 40
+; GCN: NumVgprs: 40
+; GCN: NumAgprs: 16
+; GCN: TotalNumVgprs: 40
+; GCN: VGPRBlocks: 9
+; GCN: NumVGPRsForWavesPerEU: 40
+; GCN: Occupancy: 6
+define amdgpu_kernel void @kernel_40_vgprs() {
+bb:
+ call void asm sideeffect "", "~{v39}" ()
+ call void asm sideeffect "", "~{a15}" ()
+ ret void
+}
+
+; GCN-LABEL: {{^}}func_32_agprs:
+; GCN: NumVgprs: 9
+; GCN: NumAgprs: 32
+; GCN: TotalNumVgprs: 32
+define void @func_32_agprs() #0 {
+bb:
+ call void asm sideeffect "", "~{v8}" ()
+ call void asm sideeffect "", "~{a31}" ()
+ ret void
+}
+
+; GCN-LABEL: {{^}}func_32_vgprs:
+; GCN: NumVgprs: 32
+; GCN: NumAgprs: 9
+; GCN: TotalNumVgprs: 32
+define void @func_32_vgprs() {
+bb:
+ call void asm sideeffect "", "~{v31}" ()
+ call void asm sideeffect "", "~{a8}" ()
+ ret void
+}
-; GCN-LABEL: {{^}}test_32_agprs:
-; GCN: v_mfma_f32_32x32x1f32 a[0:31], {{v[0-9]+}}, {{v[0-9]+}},
-; GCN-NOT: v28
-; GCN: NumVgprs: 32
-; GCN: VGPRBlocks: 7
-define amdgpu_kernel void @test_32_agprs(<32 x float> addrspace(1)* %arg) {
+; GCN-LABEL: {{^}}func_0_agprs:
+; GCN: NumVgprs: 1
+; GCN: NumAgprs: 0
+; GCN: TotalNumVgprs: 1
+define amdgpu_kernel void @func_0_agprs() {
bb:
- %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> <float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0>, i32 0, i32 0, i32 0)
- store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg
+ call void asm sideeffect "", "~{v0}" ()
ret void
}
+
+; GCN-LABEL: {{^}}kernel_max_gprs:
+; GCN: .amdhsa_next_free_vgpr 256
+; GCN: NumVgprs: 256
+; GCN: NumAgprs: 256
+; GCN: TotalNumVgprs: 256
+; GCN: VGPRBlocks: 63
+; GCN: NumVGPRsForWavesPerEU: 256
+; GCN: Occupancy: 1
+define amdgpu_kernel void @kernel_max_gprs() {
+bb:
+ call void asm sideeffect "", "~{v255}" ()
+ call void asm sideeffect "", "~{a255}" ()
+ ret void
+}
+
+; GCN-LABEL: {{^}}kernel_call_func_32_agprs:
+; GCN: .amdhsa_next_free_vgpr 32
+; GCN: NumVgprs: 9
+; GCN: NumAgprs: 32
+; GCN: TotalNumVgprs: 32
+; GCN: VGPRBlocks: 7
+; GCN: NumVGPRsForWavesPerEU: 32
+; GCN: Occupancy: 8
+define amdgpu_kernel void @kernel_call_func_32_agprs() {
+bb:
+ call void @func_32_agprs() #0
+ ret void
+}
+
+; GCN-LABEL: {{^}}func_call_func_32_agprs:
+; GCN: NumVgprs: 9
+; GCN: NumAgprs: 32
+; GCN: TotalNumVgprs: 32
+define void @func_call_func_32_agprs() {
+bb:
+ call void @func_32_agprs() #0
+ ret void
+}
+
+declare void @undef_func()
+
+; GCN-LABEL: {{^}}kernel_call_undef_func:
+; GCN: .amdhsa_next_free_vgpr 24
+; GCN: NumVgprs: 24
+; GCN: NumAgprs: 24
+; GCN: TotalNumVgprs: 24
+; GCN: VGPRBlocks: 5
+; GCN: NumVGPRsForWavesPerEU: 24
+; GCN: Occupancy: 10
+define amdgpu_kernel void @kernel_call_undef_func() {
+bb:
+ call void @undef_func()
+ ret void
+}
+
+attributes #0 = { nounwind noinline }
OpenPOWER on IntegriCloud