diff options
author | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2017-02-15 17:19:50 +0000 |
---|---|---|
committer | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2017-02-15 17:19:50 +0000 |
commit | 582a5237f95a3852cead5208f28a84b4cab0efb2 (patch) | |
tree | df76fef746ef0d85b80901079d46f88b680b5486 /llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | |
parent | 0a6913bc2f6213d109c4497de8c4a302f62222a8 (diff) | |
download | bcm5719-llvm-582a5237f95a3852cead5208f28a84b4cab0efb2.tar.gz bcm5719-llvm-582a5237f95a3852cead5208f28a84b4cab0efb2.zip |
[AMDGPU] Revert failed scheduling
This patch reverts region's scheduling to the original untouched state
in case if we have have decreased occupancy.
In addition it switches to use TargetRegisterInfo occupancy callback
for pressure limits instead of gradually increasing limits which were
just passed by. We are going to stay with the best schedule so we do
not need to tolerate worsened scheduling anymore.
Differential Revision: https://reviews.llvm.org/D29971
llvm-svn: 295206
Diffstat (limited to 'llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 117 |
1 files changed, 88 insertions, 29 deletions
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 6fd96b1a33c..91034030f6e 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -39,15 +39,30 @@ static unsigned getMaxWaves(unsigned SGPRs, unsigned VGPRs, *MF.getFunction())); } +void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) { + GenericScheduler::initialize(DAG); + + const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI); + + // FIXME: This is also necessary, because some passes that run after + // scheduling and before regalloc increase register pressure. + const int ErrorMargin = 3; + + SGPRExcessLimit = Context->RegClassInfo + ->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass) - ErrorMargin; + VGPRExcessLimit = Context->RegClassInfo + ->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass) - ErrorMargin; + SGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF, + SRI->getSGPRPressureSet()) - ErrorMargin; + VGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF, + SRI->getVGPRPressureSet()) - ErrorMargin; +} + void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, - int SGPRPressure, - int VGPRPressure, - int SGPRExcessLimit, - int VGPRExcessLimit, - int SGPRCriticalLimit, - int VGPRCriticalLimit) { + unsigned SGPRPressure, + unsigned VGPRPressure) { Cand.SU = SU; Cand.AtTop = AtTop; @@ -67,8 +82,8 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure); } - int NewSGPRPressure = Pressure[SRI->getSGPRPressureSet()]; - int NewVGPRPressure = Pressure[SRI->getVGPRPressureSet()]; + unsigned NewSGPRPressure = Pressure[SRI->getSGPRPressureSet()]; + unsigned NewVGPRPressure = Pressure[SRI->getVGPRPressureSet()]; // If two instructions increase the pressure of different register sets // by the same amount, the generic scheduler will prefer to schedule the @@ -78,7 +93,7 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU // only for VGPRs or only for SGPRs. // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs. - const int MaxVGPRPressureInc = 16; + const unsigned MaxVGPRPressureInc = 16; bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit; bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit; @@ -87,11 +102,6 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU // to increase the likelihood we don't go over the limits. We should improve // the analysis to look through dependencies to find the path with the least // register pressure. - // FIXME: This is also necessary, because some passes that run after - // scheduling and before regalloc increase register pressure. - const int ErrorMargin = 3; - VGPRExcessLimit -= ErrorMargin; - SGPRExcessLimit -= ErrorMargin; // We only need to update the RPDelata for instructions that increase // register pressure. Instructions that decrease or keep reg pressure @@ -112,9 +122,6 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU // register pressure is 'CRITICAL', increading SGPR and VGPR pressure both // has the same cost, so we don't need to prefer one over the other. - VGPRCriticalLimit -= ErrorMargin; - SGPRCriticalLimit -= ErrorMargin; - int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit; int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit; @@ -135,27 +142,16 @@ void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand) { - const SISubtarget &ST = DAG->MF.getSubtarget<SISubtarget>(); const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI); ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos(); unsigned SGPRPressure = Pressure[SRI->getSGPRPressureSet()]; unsigned VGPRPressure = Pressure[SRI->getVGPRPressureSet()]; - unsigned SGPRExcessLimit = - Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass); - unsigned VGPRExcessLimit = - Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass); - unsigned MaxWaves = getMaxWaves(SGPRPressure, VGPRPressure, DAG->MF); - unsigned SGPRCriticalLimit = ST.getMaxNumSGPRs(MaxWaves, true); - unsigned VGPRCriticalLimit = ST.getMaxNumVGPRs(MaxWaves); - ReadyQueue &Q = Zone.Available; for (SUnit *SU : Q) { SchedCandidate TryCand(ZonePolicy); initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, - SGPRPressure, VGPRPressure, - SGPRExcessLimit, VGPRExcessLimit, - SGPRCriticalLimit, VGPRCriticalLimit); + SGPRPressure, VGPRPressure); // Pass SchedBoundary only when comparing nodes from the same boundary. SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg); @@ -311,3 +307,66 @@ SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) { DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr()); return SU; } + +void GCNScheduleDAGMILive::schedule() { + const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI); + + std::vector<MachineInstr*> Unsched; + Unsched.reserve(NumRegionInstrs); + for (auto &I : *this) + Unsched.push_back(&I); + + ScheduleDAGMILive::schedule(); + + // Check the results of scheduling. + GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl; + std::vector<unsigned> UnschedPressure = getRegPressure().MaxSetPressure; + unsigned MaxSGPRs = std::max( + getTopRPTracker().getPressure().MaxSetPressure[SRI->getSGPRPressureSet()], + getBotRPTracker().getPressure().MaxSetPressure[SRI->getSGPRPressureSet()]); + unsigned MaxVGPRs = std::max( + getTopRPTracker().getPressure().MaxSetPressure[SRI->getVGPRPressureSet()], + getBotRPTracker().getPressure().MaxSetPressure[SRI->getVGPRPressureSet()]); + DEBUG(dbgs() << "Pressure after scheduling:\nSGPR = " << MaxSGPRs + << "\nVGPR = " << MaxVGPRs << '\n'); + if (MaxSGPRs <= S.SGPRCriticalLimit && + MaxVGPRs <= S.VGPRCriticalLimit) { + DEBUG(dbgs() << "Pressure in desired limits, done.\n"); + return; + } + unsigned WavesAfter = getMaxWaves(MaxSGPRs, MaxVGPRs, MF); + unsigned WavesUnsched = getMaxWaves(UnschedPressure[SRI->getSGPRPressureSet()], + UnschedPressure[SRI->getVGPRPressureSet()], MF); + DEBUG(dbgs() << "Occupancy before scheduling: " << WavesUnsched << + ", after " << WavesAfter << ".\n"); + if (WavesAfter >= WavesUnsched) + return; + + DEBUG(dbgs() << "Attempting to revert scheduling.\n"); + RegionEnd = RegionBegin; + for (MachineInstr *MI : Unsched) { + if (MI->getIterator() != RegionEnd) { + BB->remove(MI); + BB->insert(RegionEnd, MI); + if (LIS) { + LIS->handleMove(*MI, true); + RegisterOperands RegOpers; + RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false); + if (ShouldTrackLaneMasks) { + // Adjust liveness and add missing dead+read-undef flags. + SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot(); + RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI); + } else { + // Adjust for missing dead-def flags. + RegOpers.detectDeadDefs(*MI, *LIS); + } + } + } + RegionEnd = MI->getIterator(); + ++RegionEnd; + DEBUG(dbgs() << "Scheduling " << *MI); + } + RegionBegin = Unsched.front()->getIterator(); + + placeDebugValues(); +} |