summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp117
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSchedStrategy.h23
3 files changed, 106 insertions, 37 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 1847d177137..497bf6b54ad 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -136,8 +136,7 @@ static ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) {
static ScheduleDAGInstrs *
createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
ScheduleDAGMILive *DAG =
- new ScheduleDAGMILive(C,
- llvm::make_unique<GCNMaxOccupancySchedStrategy>(C));
+ new GCNScheduleDAGMILive(C, make_unique<GCNMaxOccupancySchedStrategy>(C));
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
return DAG;
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 6fd96b1a33c..91034030f6e 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -39,15 +39,30 @@ static unsigned getMaxWaves(unsigned SGPRs, unsigned VGPRs,
*MF.getFunction()));
}
+void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) {
+ GenericScheduler::initialize(DAG);
+
+ const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
+
+ // FIXME: This is also necessary, because some passes that run after
+ // scheduling and before regalloc increase register pressure.
+ const int ErrorMargin = 3;
+
+ SGPRExcessLimit = Context->RegClassInfo
+ ->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass) - ErrorMargin;
+ VGPRExcessLimit = Context->RegClassInfo
+ ->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass) - ErrorMargin;
+ SGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF,
+ SRI->getSGPRPressureSet()) - ErrorMargin;
+ VGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF,
+ SRI->getVGPRPressureSet()) - ErrorMargin;
+}
+
void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
bool AtTop, const RegPressureTracker &RPTracker,
const SIRegisterInfo *SRI,
- int SGPRPressure,
- int VGPRPressure,
- int SGPRExcessLimit,
- int VGPRExcessLimit,
- int SGPRCriticalLimit,
- int VGPRCriticalLimit) {
+ unsigned SGPRPressure,
+ unsigned VGPRPressure) {
Cand.SU = SU;
Cand.AtTop = AtTop;
@@ -67,8 +82,8 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU
TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
}
- int NewSGPRPressure = Pressure[SRI->getSGPRPressureSet()];
- int NewVGPRPressure = Pressure[SRI->getVGPRPressureSet()];
+ unsigned NewSGPRPressure = Pressure[SRI->getSGPRPressureSet()];
+ unsigned NewVGPRPressure = Pressure[SRI->getVGPRPressureSet()];
// If two instructions increase the pressure of different register sets
// by the same amount, the generic scheduler will prefer to schedule the
@@ -78,7 +93,7 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU
// only for VGPRs or only for SGPRs.
// FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
- const int MaxVGPRPressureInc = 16;
+ const unsigned MaxVGPRPressureInc = 16;
bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
@@ -87,11 +102,6 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU
// to increase the likelihood we don't go over the limits. We should improve
// the analysis to look through dependencies to find the path with the least
// register pressure.
- // FIXME: This is also necessary, because some passes that run after
- // scheduling and before regalloc increase register pressure.
- const int ErrorMargin = 3;
- VGPRExcessLimit -= ErrorMargin;
- SGPRExcessLimit -= ErrorMargin;
// We only need to update the RPDelata for instructions that increase
// register pressure. Instructions that decrease or keep reg pressure
@@ -112,9 +122,6 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU
// register pressure is 'CRITICAL', increading SGPR and VGPR pressure both
// has the same cost, so we don't need to prefer one over the other.
- VGPRCriticalLimit -= ErrorMargin;
- SGPRCriticalLimit -= ErrorMargin;
-
int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
@@ -135,27 +142,16 @@ void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
const CandPolicy &ZonePolicy,
const RegPressureTracker &RPTracker,
SchedCandidate &Cand) {
- const SISubtarget &ST = DAG->MF.getSubtarget<SISubtarget>();
const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
unsigned SGPRPressure = Pressure[SRI->getSGPRPressureSet()];
unsigned VGPRPressure = Pressure[SRI->getVGPRPressureSet()];
- unsigned SGPRExcessLimit =
- Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
- unsigned VGPRExcessLimit =
- Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
- unsigned MaxWaves = getMaxWaves(SGPRPressure, VGPRPressure, DAG->MF);
- unsigned SGPRCriticalLimit = ST.getMaxNumSGPRs(MaxWaves, true);
- unsigned VGPRCriticalLimit = ST.getMaxNumVGPRs(MaxWaves);
-
ReadyQueue &Q = Zone.Available;
for (SUnit *SU : Q) {
SchedCandidate TryCand(ZonePolicy);
initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI,
- SGPRPressure, VGPRPressure,
- SGPRExcessLimit, VGPRExcessLimit,
- SGPRCriticalLimit, VGPRCriticalLimit);
+ SGPRPressure, VGPRPressure);
// Pass SchedBoundary only when comparing nodes from the same boundary.
SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg);
@@ -311,3 +307,66 @@ SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) {
DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
return SU;
}
+
+void GCNScheduleDAGMILive::schedule() {
+ const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
+
+ std::vector<MachineInstr*> Unsched;
+ Unsched.reserve(NumRegionInstrs);
+ for (auto &I : *this)
+ Unsched.push_back(&I);
+
+ ScheduleDAGMILive::schedule();
+
+ // Check the results of scheduling.
+ GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
+ std::vector<unsigned> UnschedPressure = getRegPressure().MaxSetPressure;
+ unsigned MaxSGPRs = std::max(
+ getTopRPTracker().getPressure().MaxSetPressure[SRI->getSGPRPressureSet()],
+ getBotRPTracker().getPressure().MaxSetPressure[SRI->getSGPRPressureSet()]);
+ unsigned MaxVGPRs = std::max(
+ getTopRPTracker().getPressure().MaxSetPressure[SRI->getVGPRPressureSet()],
+ getBotRPTracker().getPressure().MaxSetPressure[SRI->getVGPRPressureSet()]);
+ DEBUG(dbgs() << "Pressure after scheduling:\nSGPR = " << MaxSGPRs
+ << "\nVGPR = " << MaxVGPRs << '\n');
+ if (MaxSGPRs <= S.SGPRCriticalLimit &&
+ MaxVGPRs <= S.VGPRCriticalLimit) {
+ DEBUG(dbgs() << "Pressure in desired limits, done.\n");
+ return;
+ }
+ unsigned WavesAfter = getMaxWaves(MaxSGPRs, MaxVGPRs, MF);
+ unsigned WavesUnsched = getMaxWaves(UnschedPressure[SRI->getSGPRPressureSet()],
+ UnschedPressure[SRI->getVGPRPressureSet()], MF);
+ DEBUG(dbgs() << "Occupancy before scheduling: " << WavesUnsched <<
+ ", after " << WavesAfter << ".\n");
+ if (WavesAfter >= WavesUnsched)
+ return;
+
+ DEBUG(dbgs() << "Attempting to revert scheduling.\n");
+ RegionEnd = RegionBegin;
+ for (MachineInstr *MI : Unsched) {
+ if (MI->getIterator() != RegionEnd) {
+ BB->remove(MI);
+ BB->insert(RegionEnd, MI);
+ if (LIS) {
+ LIS->handleMove(*MI, true);
+ RegisterOperands RegOpers;
+ RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
+ if (ShouldTrackLaneMasks) {
+ // Adjust liveness and add missing dead+read-undef flags.
+ SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
+ RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
+ } else {
+ // Adjust for missing dead-def flags.
+ RegOpers.detectDeadDefs(*MI, *LIS);
+ }
+ }
+ }
+ RegionEnd = MI->getIterator();
+ ++RegionEnd;
+ DEBUG(dbgs() << "Scheduling " << *MI);
+ }
+ RegionBegin = Unsched.front()->getIterator();
+
+ placeDebugValues();
+}
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 4cfc0cea81f..a0068f55d2d 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -25,6 +25,7 @@ class SIRegisterInfo;
/// heuristics to determine excess/critical pressure sets. Its goal is to
/// maximize kernel occupancy (i.e. maximum number of waves per simd).
class GCNMaxOccupancySchedStrategy : public GenericScheduler {
+ friend class GCNScheduleDAGMILive;
SUnit *pickNodeBidirectional(bool &IsTopNode);
@@ -35,18 +36,28 @@ class GCNMaxOccupancySchedStrategy : public GenericScheduler {
void initCandidate(SchedCandidate &Cand, SUnit *SU,
bool AtTop, const RegPressureTracker &RPTracker,
const SIRegisterInfo *SRI,
- int SGPRPressure, int VGPRPressure,
- int SGPRExcessLimit, int VGPRExcessLimit,
- int SGPRCriticalLimit, int VGPRCriticalLimit);
+ unsigned SGPRPressure, unsigned VGPRPressure);
- void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
- SchedBoundary *Zone, const SIRegisterInfo *SRI,
- unsigned SGPRPressure, unsigned VGPRPressure);
+ unsigned SGPRExcessLimit;
+ unsigned VGPRExcessLimit;
+ unsigned SGPRCriticalLimit;
+ unsigned VGPRCriticalLimit;
public:
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C);
SUnit *pickNode(bool &IsTopNode) override;
+
+ void initialize(ScheduleDAGMI *DAG) override;
+};
+
+class GCNScheduleDAGMILive : public ScheduleDAGMILive {
+public:
+ GCNScheduleDAGMILive(MachineSchedContext *C,
+ std::unique_ptr<MachineSchedStrategy> S) :
+ ScheduleDAGMILive(C, std::move(S)) {}
+
+ void schedule() override;
};
} // End namespace llvm
OpenPOWER on IntegriCloud