diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2016-08-29 19:42:52 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2016-08-29 19:42:52 +0000 |
commit | 0d23ebe8883af4b280897af751614c7b433e00f7 (patch) | |
tree | 451e879c6008fb9fa90460d4fe9d22640f3ea01b /llvm/lib/Target | |
parent | 9b1669ae353e02c0fdbbe1ac181f18a453e98b35 (diff) | |
download | bcm5719-llvm-0d23ebe8883af4b280897af751614c7b433e00f7.tar.gz bcm5719-llvm-0d23ebe8883af4b280897af751614c7b433e00f7.zip |
AMDGPU/SI: Implement a custom MachineSchedStrategy
Summary:
GCNSchedStrategy re-uses most of GenericScheduler, it's just uses
a different method to compute the excess and critical register
pressure limits.
It's not enabled by default, to enable it you need to pass -misched=gcn
to llc.
Shader DB stats:
32464 shaders in 17874 tests
Totals:
SGPRS: 1542846 -> 1643125 (6.50 %)
VGPRS: 1005595 -> 904653 (-10.04 %)
Spilled SGPRs: 29929 -> 27745 (-7.30 %)
Spilled VGPRs: 334 -> 352 (5.39 %)
Scratch VGPRs: 1612 -> 1624 (0.74 %) dwords per thread
Code Size: 36688188 -> 37034900 (0.95 %) bytes
LDS: 1913 -> 1913 (0.00 %) blocks
Max Waves: 254101 -> 265125 (4.34 %)
Wait states: 0 -> 0 (0.00 %)
Totals from affected shaders:
SGPRS: 1338220 -> 1438499 (7.49 %)
VGPRS: 886221 -> 785279 (-11.39 %)
Spilled SGPRs: 29869 -> 27685 (-7.31 %)
Spilled VGPRs: 334 -> 352 (5.39 %)
Scratch VGPRs: 1612 -> 1624 (0.74 %) dwords per thread
Code Size: 34315716 -> 34662428 (1.01 %) bytes
LDS: 1551 -> 1551 (0.00 %) blocks
Max Waves: 188127 -> 199151 (5.86 %)
Wait states: 0 -> 0 (0.00 %)
Reviewers: arsenm, mareko, nhaehnle, MatzeB, atrick
Subscribers: arsenm, kzhuravl, llvm-commits
Differential Revision: https://reviews.llvm.org/D23688
llvm-svn: 279995
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 45 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 16 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/CMakeLists.txt | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 312 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 54 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SISchedule.td | 4 |
9 files changed, 445 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 44f59c33125..03ac8cceea3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -214,3 +214,48 @@ void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const { return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv()); } + +unsigned SISubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const { + if (getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { + if (SGPRs <= 80) + return 10; + if (SGPRs <= 88) + return 9; + if (SGPRs <= 100) + return 8; + return 7; + } + if (SGPRs <= 48) + return 10; + if (SGPRs <= 56) + return 9; + if (SGPRs <= 64) + return 8; + if (SGPRs <= 72) + return 7; + if (SGPRs <= 80) + return 6; + return 5; +} + +unsigned SISubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const { + if (VGPRs <= 24) + return 10; + if (VGPRs <= 28) + return 9; + if (VGPRs <= 32) + return 8; + if (VGPRs <= 36) + return 7; + if (VGPRs <= 40) + return 6; + if (VGPRs <= 48) + return 5; + if (VGPRs <= 64) + return 4; + if (VGPRs <= 84) + return 3; + if (VGPRs <= 128) + return 2; + return 1; +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index b15d359fab5..985b1ea7a2a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -429,6 +429,12 @@ public: bool hasSGPRInitBug() const { return SGPRInitBug; } + + /// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs + unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const; + + /// Return the maximum number of waves per SIMD for kernels using \p VGPRs VGPRs + unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const; }; } // End namespace llvm diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index a86603a11ff..f144ce2eb33 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -18,6 +18,7 @@ #include "AMDGPUCallLowering.h" #include "AMDGPUTargetObjectFile.h" #include "AMDGPUTargetTransformInfo.h" +#include "GCNSchedStrategy.h" #include "R600ISelLowering.h" #include "R600InstrInfo.h" #include "R600MachineScheduler.h" @@ -96,6 +97,14 @@ static ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) { return new SIScheduleDAGMI(C); } +static ScheduleDAGInstrs * +createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) { + ScheduleDAGMILive *DAG = + new ScheduleDAGMILive(C, make_unique<GCNMaxOccupancySchedStrategy>(C)); + DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); + return DAG; +} + static MachineSchedRegistry R600SchedRegistry("r600", "Run R600's custom scheduler", createR600MachineScheduler); @@ -104,6 +113,11 @@ static MachineSchedRegistry SISchedRegistry("si", "Run SI's custom scheduler", createSIMachineScheduler); +static MachineSchedRegistry +GCNMaxOccupancySchedRegistry("gcn-max-occupancy", + "Run GCN scheduler to maximize occupancy", + createGCNMaxOccupancyMachineScheduler); + static StringRef computeDataLayout(const Triple &TT) { if (TT.getArch() == Triple::r600) { // 32-bit pointers. @@ -467,7 +481,7 @@ ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler( const SISubtarget &ST = C->MF->getSubtarget<SISubtarget>(); if (ST.enableSIScheduler()) return createSIMachineScheduler(C); - return nullptr; + return createGCNMaxOccupancyMachineScheduler(C); } bool GCNPassConfig::addPreISel() { diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt index c1ecf09df7a..e58e5b2f92d 100644 --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -49,6 +49,7 @@ add_llvm_target(AMDGPUCodeGen AMDGPUPromoteAlloca.cpp AMDGPURegisterInfo.cpp GCNHazardRecognizer.cpp + GCNSchedStrategy.cpp R600ClauseMergePass.cpp R600ControlFlowFinalizer.cpp R600EmitClauseMarkers.cpp diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp new file mode 100644 index 00000000000..62e9f526904 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -0,0 +1,312 @@ +//===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This contains a MachineSchedStrategy implementation for maximizing wave +/// occupancy on GCN hardware. +//===----------------------------------------------------------------------===// + +#include "GCNSchedStrategy.h" +#include "AMDGPUSubtarget.h" +#include "SIInstrInfo.h" +#include "SIMachineFunctionInfo.h" +#include "SIRegisterInfo.h" +#include "llvm/CodeGen/RegisterClassInfo.h" + +#define DEBUG_TYPE "misched" + +using namespace llvm; + +GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy( + const MachineSchedContext *C) : + GenericScheduler(C) { } + +static unsigned getMaxWaves(unsigned SGPRs, unsigned VGPRs, + const MachineFunction &MF) { + + const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); + unsigned MinRegOccupancy = std::min(ST.getOccupancyWithNumSGPRs(SGPRs), + ST.getOccupancyWithNumVGPRs(VGPRs)); + return std::min(MinRegOccupancy, + ST.getOccupancyWithLocalMemSize(MFI->getLDSSize())); +} + +void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU, + bool AtTop, const RegPressureTracker &RPTracker, + const SIRegisterInfo *SRI, + int SGPRPressure, + int VGPRPressure, + int SGPRExcessLimit, + int VGPRExcessLimit, + int SGPRCriticalLimit, + int VGPRCriticalLimit) { + + Cand.SU = SU; + Cand.AtTop = AtTop; + + // getDownwardPressure() and getUpwardPressure() make temporary changes to + // the the tracker, so we need to pass those function a non-const copy. + RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); + + std::vector<unsigned> Pressure; + std::vector<unsigned> MaxPressure; + + if (AtTop) + TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure); + else { + // FIXME: I think for bottom up scheduling, the register pressure is cached + // and can be retrieved by DAG->getPressureDif(SU). + TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure); + } + + int NewSGPRPressure = Pressure[SRI->getSGPRPressureSet()]; + int NewVGPRPressure = Pressure[SRI->getVGPRPressureSet()]; + + // If two instructions increase the pressure of different register sets + // by the same amount, the generic scheduler will prefer to schedule the + // instruction that increases the set with the least amount of registers, + // which in our case would be SGPRs. This is rarely what we want, so + // when we report excess/critical register pressure, we do it either + // only for VGPRs or only for SGPRs. + + // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs. + const int MaxVGPRPressureInc = 16; + bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit; + bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit; + + + // FIXME: We have to enter REG-EXCESS before we reach the actual threshold + // to increase the likelihood we don't go over the limits. We should improve + // the analysis to look through dependencies to find the path with the least + // register pressure. + // FIXME: This is also necessary, because some passes that run after + // scheduling and before regalloc increase register pressure. + const int ErrorMargin = 3; + VGPRExcessLimit -= ErrorMargin; + SGPRExcessLimit -= ErrorMargin; + + // We only need to update the RPDelata for instructions that increase + // register pressure. Instructions that decrease or keep reg pressure + // the same will be marked as RegExcess in tryCandidate() when they + // are compared with instructions that increase the register pressure. + if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) { + Cand.RPDelta.Excess = PressureChange(SRI->getVGPRPressureSet()); + Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit); + } + + if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) { + Cand.RPDelta.Excess = PressureChange(SRI->getSGPRPressureSet()); + Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure = SGPRExcessLimit); + } + + // Register pressure is considered 'CRITICAL' if it is approaching a value + // that would reduce the wave occupancy for the execution unit. When + // register pressure is 'CRITICAL', increading SGPR and VGPR pressure both + // has the same cost, so we don't need to prefer one over the other. + + VGPRCriticalLimit -= ErrorMargin; + SGPRCriticalLimit -= ErrorMargin; + + int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit; + int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit; + + if (SGPRDelta >= 0 || VGPRDelta >= 0) { + if (SGPRDelta > VGPRDelta) { + Cand.RPDelta.CriticalMax = PressureChange(SRI->getSGPRPressureSet()); + Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta); + } else { + Cand.RPDelta.CriticalMax = PressureChange(SRI->getVGPRPressureSet()); + Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta); + } + } +} + +// This function is mostly cut and pasted from +// GenericScheduler::pickNodeFromQueue() +void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone, + const CandPolicy &ZonePolicy, + const RegPressureTracker &RPTracker, + SchedCandidate &Cand) { + const SISubtarget &ST = DAG->MF.getSubtarget<SISubtarget>(); + const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI); + ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos(); + unsigned SGPRPressure = Pressure[SRI->getSGPRPressureSet()]; + unsigned VGPRPressure = Pressure[SRI->getVGPRPressureSet()]; + unsigned SGPRExcessLimit = + Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass); + unsigned VGPRExcessLimit = + Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass); + unsigned MaxWaves = getMaxWaves(SGPRPressure, VGPRPressure, DAG->MF); + unsigned SGPRCriticalLimit = SRI->getNumSGPRsAllowed(ST, MaxWaves); + unsigned VGPRCriticalLimit = SRI->getNumVGPRsAllowed(MaxWaves); + + ReadyQueue &Q = Zone.Available; + for (SUnit *SU : Q) { + + SchedCandidate TryCand(ZonePolicy); + initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, + SGPRPressure, VGPRPressure, + SGPRExcessLimit, VGPRExcessLimit, + SGPRCriticalLimit, VGPRCriticalLimit); + // Pass SchedBoundary only when comparing nodes from the same boundary. + SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; + GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg); + if (TryCand.Reason != NoCand) { + // Initialize resource delta if needed in case future heuristics query it. + if (TryCand.ResDelta == SchedResourceDelta()) + TryCand.initResourceDelta(Zone.DAG, SchedModel); + Cand.setBest(TryCand); + } + } +} + +static int getBidirectionalReasonRank(GenericSchedulerBase::CandReason Reason) { + switch (Reason) { + default: + return Reason; + case GenericSchedulerBase::RegCritical: + case GenericSchedulerBase::RegExcess: + return -Reason; + } +} + +// This function is mostly cut and pasted from +// GenericScheduler::pickNodeBidirectional() +SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) { + // Schedule as far as possible in the direction of no choice. This is most + // efficient, but also provides the best heuristics for CriticalPSets. + if (SUnit *SU = Bot.pickOnlyChoice()) { + IsTopNode = false; + return SU; + } + if (SUnit *SU = Top.pickOnlyChoice()) { + IsTopNode = true; + return SU; + } + // Set the bottom-up policy based on the state of the current bottom zone and + // the instructions outside the zone, including the top zone. + CandPolicy BotPolicy; + setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top); + // Set the top-down policy based on the state of the current top zone and + // the instructions outside the zone, including the bottom zone. + CandPolicy TopPolicy; + setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot); + + // See if BotCand is still valid (because we previously scheduled from Top). + DEBUG(dbgs() << "Picking from Bot:\n"); + if (!BotCand.isValid() || BotCand.SU->isScheduled || + BotCand.Policy != BotPolicy) { + BotCand.reset(CandPolicy()); + pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand); + assert(BotCand.Reason != NoCand && "failed to find the first candidate"); + } else { + DEBUG(traceCandidate(BotCand)); + } + + // Check if the top Q has a better candidate. + DEBUG(dbgs() << "Picking from Top:\n"); + if (!TopCand.isValid() || TopCand.SU->isScheduled || + TopCand.Policy != TopPolicy) { + TopCand.reset(CandPolicy()); + pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand); + assert(TopCand.Reason != NoCand && "failed to find the first candidate"); + } else { + DEBUG(traceCandidate(TopCand)); + } + + // Pick best from BotCand and TopCand. + DEBUG( + dbgs() << "Top Cand: "; + traceCandidate(BotCand); + dbgs() << "Bot Cand: "; + traceCandidate(TopCand); + ); + SchedCandidate Cand; + if (TopCand.Reason == BotCand.Reason) { + Cand = BotCand; + GenericSchedulerBase::CandReason TopReason = TopCand.Reason; + TopCand.Reason = NoCand; + GenericScheduler::tryCandidate(Cand, TopCand, nullptr); + if (TopCand.Reason != NoCand) { + Cand.setBest(TopCand); + } else { + TopCand.Reason = TopReason; + } + } else { + if (TopCand.Reason == RegExcess && TopCand.RPDelta.Excess.getUnitInc() <= 0) { + Cand = TopCand; + } else if (BotCand.Reason == RegExcess && BotCand.RPDelta.Excess.getUnitInc() <= 0) { + Cand = BotCand; + } else if (TopCand.Reason == RegCritical && TopCand.RPDelta.CriticalMax.getUnitInc() <= 0) { + Cand = TopCand; + } else if (BotCand.Reason == RegCritical && BotCand.RPDelta.CriticalMax.getUnitInc() <= 0) { + Cand = BotCand; + } else { + int TopRank = getBidirectionalReasonRank(TopCand.Reason); + int BotRank = getBidirectionalReasonRank(BotCand.Reason); + if (TopRank > BotRank) { + Cand = TopCand; + } else { + Cand = BotCand; + } + } + } + DEBUG( + dbgs() << "Picking: "; + traceCandidate(Cand); + ); + + IsTopNode = Cand.AtTop; + return Cand.SU; +} + +// This function is mostly cut and pasted from +// GenericScheduler::pickNode() +SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) { + if (DAG->top() == DAG->bottom()) { + assert(Top.Available.empty() && Top.Pending.empty() && + Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); + return nullptr; + } + SUnit *SU; + do { + if (RegionPolicy.OnlyTopDown) { + SU = Top.pickOnlyChoice(); + if (!SU) { + CandPolicy NoPolicy; + TopCand.reset(NoPolicy); + pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand); + assert(TopCand.Reason != NoCand && "failed to find a candidate"); + SU = TopCand.SU; + } + IsTopNode = true; + } else if (RegionPolicy.OnlyBottomUp) { + SU = Bot.pickOnlyChoice(); + if (!SU) { + CandPolicy NoPolicy; + BotCand.reset(NoPolicy); + pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand); + assert(BotCand.Reason != NoCand && "failed to find a candidate"); + SU = BotCand.SU; + } + IsTopNode = false; + } else { + SU = pickNodeBidirectional(IsTopNode); + } + } while (SU->isScheduled); + + if (SU->isTopReady()) + Top.removeReady(SU); + if (SU->isBottomReady()) + Bot.removeReady(SU); + + DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr()); + return SU; +} diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h new file mode 100644 index 00000000000..4cfc0cea81f --- /dev/null +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -0,0 +1,54 @@ +//===-- GCNSchedStrategy.h - GCN Scheduler Strategy -*- C++ -*-------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H +#define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H + +#include "llvm/CodeGen/MachineScheduler.h" + +namespace llvm { + +class SIRegisterInfo; + +/// This is a minimal scheduler strategy. The main difference between this +/// and the GenericScheduler is that GCNSchedStrategy uses different +/// heuristics to determine excess/critical pressure sets. Its goal is to +/// maximize kernel occupancy (i.e. maximum number of waves per simd). +class GCNMaxOccupancySchedStrategy : public GenericScheduler { + + SUnit *pickNodeBidirectional(bool &IsTopNode); + + void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, + const RegPressureTracker &RPTracker, + SchedCandidate &Cand); + + void initCandidate(SchedCandidate &Cand, SUnit *SU, + bool AtTop, const RegPressureTracker &RPTracker, + const SIRegisterInfo *SRI, + int SGPRPressure, int VGPRPressure, + int SGPRExcessLimit, int VGPRExcessLimit, + int SGPRCriticalLimit, int VGPRCriticalLimit); + + void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, + SchedBoundary *Zone, const SIRegisterInfo *SRI, + unsigned SGPRPressure, unsigned VGPRPressure); + +public: + GCNMaxOccupancySchedStrategy(const MachineSchedContext *C); + + SUnit *pickNode(bool &IsTopNode) override; +}; + +} // End namespace llvm + +#endif // GCNSCHEDSTRATEGY_H diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 80fdd854fbd..7d84f7bec8c 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -249,6 +249,12 @@ unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF, return VGPRLimit; } +unsigned +SIRegisterInfo::getDefaultRegPressureSetLimit(const MachineFunction &MF, + unsigned Idx) const { + return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx); +} + bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const { return Fn.getFrameInfo().hasStackObjects(); } diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index b0e852e6127..6e66c52da6c 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -51,6 +51,8 @@ public: unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override; + unsigned getDefaultRegPressureSetLimit(const MachineFunction &MF, + unsigned Idx) const; bool requiresRegisterScavenging(const MachineFunction &Fn) const override; diff --git a/llvm/lib/Target/AMDGPU/SISchedule.td b/llvm/lib/Target/AMDGPU/SISchedule.td index 0db92fc254f..be27966fd5f 100644 --- a/llvm/lib/Target/AMDGPU/SISchedule.td +++ b/llvm/lib/Target/AMDGPU/SISchedule.td @@ -47,6 +47,10 @@ def Write64Bit : SchedWrite; class SISchedMachineModel : SchedMachineModel { let CompleteModel = 1; + // MicroOpBufferSize = 1 means that instructions will always be added + // the ready queue when they become available. This exposes them + // to the register pressure analysis. + let MicroOpBufferSize = 1; let IssueWidth = 1; let PostRAScheduler = 1; } |