summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
diff options
context:
space:
mode:
authorStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2019-07-11 21:30:34 +0000
committerStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2019-07-11 21:30:34 +0000
commit7d2019bb961f94eec476f255bb776d470ecd1b49 (patch)
treeea7688d412ff94d75a08dad4115cd27511dbc376 /llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
parent7b4a59db1e01408cbf32c5d10688aa00692bbeec (diff)
downloadbcm5719-llvm-7d2019bb961f94eec476f255bb776d470ecd1b49.tar.gz
bcm5719-llvm-7d2019bb961f94eec476f255bb776d470ecd1b49.zip
[AMDGPU] gfx908 hazard recognizer
Differential Revision: https://reviews.llvm.org/D64593 llvm-svn: 365829
Diffstat (limited to 'llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp229
1 files changed, 228 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 16436be984f..a23348e18f9 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -46,7 +46,8 @@ GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
TRI(TII.getRegisterInfo()),
ClauseUses(TRI.getNumRegUnits()),
ClauseDefs(TRI.getNumRegUnits()) {
- MaxLookAhead = 5;
+ MaxLookAhead = MF.getRegInfo().isPhysRegUsed(AMDGPU::AGPR0) ? 18 : 5;
+ TSchedModel.init(&ST);
}
void GCNHazardRecognizer::EmitInstruction(SUnit *SU) {
@@ -181,6 +182,12 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
checkReadM0Hazards(MI) > 0)
return NoopHazard;
+ if (SIInstrInfo::isMAI(*MI) && checkMAIHazards(MI) > 0)
+ return NoopHazard;
+
+ if ((MI->mayLoad() || MI->mayStore()) && checkMAILdStHazards(MI) > 0)
+ return NoopHazard;
+
if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0)
return NoopHazard;
@@ -286,6 +293,12 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI))
return std::max(WaitStates, checkReadM0Hazards(MI));
+ if (SIInstrInfo::isMAI(*MI))
+ return std::max(WaitStates, checkMAIHazards(MI));
+
+ if (MI->mayLoad() || MI->mayStore())
+ return std::max(WaitStates, checkMAILdStHazards(MI));
+
return WaitStates;
}
@@ -1179,3 +1192,217 @@ int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) {
return FPAtomicToDenormModeWaitStates -
::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn);
}
+
+int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) {
+ assert(SIInstrInfo::isMAI(*MI));
+
+ int WaitStatesNeeded = 0;
+ unsigned Opc = MI->getOpcode();
+
+ auto IsVALUFn = [] (MachineInstr *MI) {
+ return SIInstrInfo::isVALU(*MI);
+ };
+
+ if (Opc != AMDGPU::V_ACCVGPR_READ_B32) { // MFMA or v_accvgpr_write
+ const int LegacyVALUWritesVGPRWaitStates = 2;
+ const int VALUWritesExecWaitStates = 4;
+ const int MaxWaitStates = 4;
+
+ int WaitStatesNeededForUse = VALUWritesExecWaitStates -
+ getWaitStatesSinceDef(AMDGPU::EXEC, IsVALUFn, MaxWaitStates);
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+
+ if (WaitStatesNeeded < MaxWaitStates) {
+ for (const MachineOperand &Use : MI->explicit_uses()) {
+ const int MaxWaitStates = 2;
+
+ if (!Use.isReg() || !TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
+ continue;
+
+ int WaitStatesNeededForUse = LegacyVALUWritesVGPRWaitStates -
+ getWaitStatesSinceDef(Use.getReg(), IsVALUFn, MaxWaitStates);
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+
+ if (WaitStatesNeeded == MaxWaitStates)
+ break;
+ }
+ }
+ }
+
+ auto IsMFMAFn = [] (MachineInstr *MI) {
+ return SIInstrInfo::isMAI(*MI) &&
+ MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32 &&
+ MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32;
+ };
+
+ for (const MachineOperand &Op : MI->explicit_operands()) {
+ if (!Op.isReg() || !TRI.isAGPR(MF.getRegInfo(), Op.getReg()))
+ continue;
+
+ if (Op.isDef() && Opc != AMDGPU::V_ACCVGPR_WRITE_B32)
+ continue;
+
+ const int MFMAWritesAGPROverlappedSrcABWaitStates = 4;
+ const int MFMAWritesAGPROverlappedSrcCWaitStates = 2;
+ const int MFMA4x4WritesAGPRAccVgprReadWaitStates = 4;
+ const int MFMA16x16WritesAGPRAccVgprReadWaitStates = 10;
+ const int MFMA32x32WritesAGPRAccVgprReadWaitStates = 18;
+ const int MFMA4x4WritesAGPRAccVgprWriteWaitStates = 1;
+ const int MFMA16x16WritesAGPRAccVgprWriteWaitStates = 7;
+ const int MFMA32x32WritesAGPRAccVgprWriteWaitStates = 15;
+ const int MaxWaitStates = 18;
+ unsigned Reg = Op.getReg();
+ unsigned HazardDefLatency = 0;
+
+ auto IsOverlappedMFMAFn = [Reg, &IsMFMAFn, &HazardDefLatency, this]
+ (MachineInstr *MI) {
+ if (!IsMFMAFn(MI))
+ return false;
+ unsigned DstReg = MI->getOperand(0).getReg();
+ if (DstReg == Reg)
+ return false;
+ HazardDefLatency = std::max(HazardDefLatency,
+ TSchedModel.computeInstrLatency(MI));
+ return TRI.regsOverlap(DstReg, Reg);
+ };
+
+ int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn,
+ MaxWaitStates);
+ int NeedWaitStates = MFMAWritesAGPROverlappedSrcABWaitStates;
+ int SrcCIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
+ int OpNo = MI->getOperandNo(&Op);
+ if (OpNo == SrcCIdx) {
+ NeedWaitStates = MFMAWritesAGPROverlappedSrcCWaitStates;
+ } else if (Opc == AMDGPU::V_ACCVGPR_READ_B32) {
+ switch (HazardDefLatency) {
+ case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprReadWaitStates;
+ break;
+ case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprReadWaitStates;
+ break;
+ case 16: LLVM_FALLTHROUGH;
+ default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprReadWaitStates;
+ break;
+ }
+ } else if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32) {
+ switch (HazardDefLatency) {
+ case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprWriteWaitStates;
+ break;
+ case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprWriteWaitStates;
+ break;
+ case 16: LLVM_FALLTHROUGH;
+ default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprWriteWaitStates;
+ break;
+ }
+ }
+
+ int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef;
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+
+ if (WaitStatesNeeded == MaxWaitStates)
+ return WaitStatesNeeded; // Early exit.
+
+ auto IsAccVgprWriteFn = [Reg, this] (MachineInstr *MI) {
+ if (MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32)
+ return false;
+ unsigned DstReg = MI->getOperand(0).getReg();
+ return TRI.regsOverlap(Reg, DstReg);
+ };
+
+ const int AccVGPRWriteMFMAReadSrcCWaitStates = 1;
+ const int AccVGPRWriteMFMAReadSrcABWaitStates = 3;
+ const int AccVGPRWriteAccVgprReadWaitStates = 3;
+ NeedWaitStates = AccVGPRWriteMFMAReadSrcABWaitStates;
+ if (OpNo == SrcCIdx)
+ NeedWaitStates = AccVGPRWriteMFMAReadSrcCWaitStates;
+ else if (Opc == AMDGPU::V_ACCVGPR_READ_B32)
+ NeedWaitStates = AccVGPRWriteAccVgprReadWaitStates;
+
+ WaitStatesNeededForUse = NeedWaitStates -
+ getWaitStatesSinceDef(Reg, IsAccVgprWriteFn, MaxWaitStates);
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+
+ if (WaitStatesNeeded == MaxWaitStates)
+ return WaitStatesNeeded; // Early exit.
+ }
+
+ if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32) {
+ const int MFMA4x4ReadSrcCAccVgprWriteWaitStates = 0;
+ const int MFMA16x16ReadSrcCAccVgprWriteWaitStates = 5;
+ const int MFMA32x32ReadSrcCAccVgprWriteWaitStates = 13;
+ const int MaxWaitStates = 13;
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned HazardDefLatency = 0;
+
+ auto IsSrcCMFMAFn = [DstReg, &IsMFMAFn, &HazardDefLatency, this]
+ (MachineInstr *MI) {
+ if (!IsMFMAFn(MI))
+ return false;
+ unsigned Reg = TII.getNamedOperand(*MI, AMDGPU::OpName::src2)->getReg();
+ HazardDefLatency = std::max(HazardDefLatency,
+ TSchedModel.computeInstrLatency(MI));
+ return TRI.regsOverlap(Reg, DstReg);
+ };
+
+ int WaitStatesSince = getWaitStatesSince(IsSrcCMFMAFn, MaxWaitStates);
+ int NeedWaitStates;
+ switch (HazardDefLatency) {
+ case 2: NeedWaitStates = MFMA4x4ReadSrcCAccVgprWriteWaitStates;
+ break;
+ case 8: NeedWaitStates = MFMA16x16ReadSrcCAccVgprWriteWaitStates;
+ break;
+ case 16: LLVM_FALLTHROUGH;
+ default: NeedWaitStates = MFMA32x32ReadSrcCAccVgprWriteWaitStates;
+ break;
+ }
+
+ int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSince;
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+ }
+
+ return WaitStatesNeeded;
+}
+
+int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
+ if (!ST.hasMAIInsts())
+ return 0;
+
+ int WaitStatesNeeded = 0;
+
+ auto IsAccVgprReadFn = [] (MachineInstr *MI) {
+ return MI->getOpcode() == AMDGPU::V_ACCVGPR_READ_B32;
+ };
+
+ for (const MachineOperand &Op : MI->explicit_uses()) {
+ if (!Op.isReg() || !TRI.isVGPR(MF.getRegInfo(), Op.getReg()))
+ continue;
+
+ unsigned Reg = Op.getReg();
+
+ const int AccVgprReadLdStWaitStates = 2;
+ const int VALUWriteAccVgprReadLdStDepVALUWaitStates = 1;
+ const int MaxWaitStates = 2;
+
+ int WaitStatesNeededForUse = AccVgprReadLdStWaitStates -
+ getWaitStatesSinceDef(Reg, IsAccVgprReadFn, MaxWaitStates);
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+
+ if (WaitStatesNeeded == MaxWaitStates)
+ return WaitStatesNeeded; // Early exit.
+
+ auto IsVALUAccVgprReadCheckFn = [Reg, this] (MachineInstr *MI) {
+ if (MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32)
+ return false;
+ auto IsVALUFn = [] (MachineInstr *MI) {
+ return SIInstrInfo::isVALU(*MI) && !SIInstrInfo::isMAI(*MI);
+ };
+ return getWaitStatesSinceDef(Reg, IsVALUFn, 2 /*MaxWaitStates*/) <
+ std::numeric_limits<int>::max();
+ };
+
+ WaitStatesNeededForUse = VALUWriteAccVgprReadLdStDepVALUWaitStates -
+ getWaitStatesSince(IsVALUAccVgprReadCheckFn, MaxWaitStates);
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+ }
+
+ return WaitStatesNeeded;
+}
OpenPOWER on IntegriCloud