summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp45
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h6
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp16
-rw-r--r--llvm/lib/Target/AMDGPU/CMakeLists.txt1
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp312
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSchedStrategy.h54
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.h2
-rw-r--r--llvm/lib/Target/AMDGPU/SISchedule.td4
9 files changed, 445 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 44f59c33125..03ac8cceea3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -214,3 +214,48 @@ void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const {
return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv());
}
+
+unsigned SISubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
+ if (getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
+ if (SGPRs <= 80)
+ return 10;
+ if (SGPRs <= 88)
+ return 9;
+ if (SGPRs <= 100)
+ return 8;
+ return 7;
+ }
+ if (SGPRs <= 48)
+ return 10;
+ if (SGPRs <= 56)
+ return 9;
+ if (SGPRs <= 64)
+ return 8;
+ if (SGPRs <= 72)
+ return 7;
+ if (SGPRs <= 80)
+ return 6;
+ return 5;
+}
+
+unsigned SISubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const {
+ if (VGPRs <= 24)
+ return 10;
+ if (VGPRs <= 28)
+ return 9;
+ if (VGPRs <= 32)
+ return 8;
+ if (VGPRs <= 36)
+ return 7;
+ if (VGPRs <= 40)
+ return 6;
+ if (VGPRs <= 48)
+ return 5;
+ if (VGPRs <= 64)
+ return 4;
+ if (VGPRs <= 84)
+ return 3;
+ if (VGPRs <= 128)
+ return 2;
+ return 1;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index b15d359fab5..985b1ea7a2a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -429,6 +429,12 @@ public:
bool hasSGPRInitBug() const {
return SGPRInitBug;
}
+
+ /// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
+ unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
+
+ /// Return the maximum number of waves per SIMD for kernels using \p VGPRs VGPRs
+ unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
};
} // End namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index a86603a11ff..f144ce2eb33 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -18,6 +18,7 @@
#include "AMDGPUCallLowering.h"
#include "AMDGPUTargetObjectFile.h"
#include "AMDGPUTargetTransformInfo.h"
+#include "GCNSchedStrategy.h"
#include "R600ISelLowering.h"
#include "R600InstrInfo.h"
#include "R600MachineScheduler.h"
@@ -96,6 +97,14 @@ static ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) {
return new SIScheduleDAGMI(C);
}
+static ScheduleDAGInstrs *
+createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
+ ScheduleDAGMILive *DAG =
+ new ScheduleDAGMILive(C, make_unique<GCNMaxOccupancySchedStrategy>(C));
+ DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+ return DAG;
+}
+
static MachineSchedRegistry
R600SchedRegistry("r600", "Run R600's custom scheduler",
createR600MachineScheduler);
@@ -104,6 +113,11 @@ static MachineSchedRegistry
SISchedRegistry("si", "Run SI's custom scheduler",
createSIMachineScheduler);
+static MachineSchedRegistry
+GCNMaxOccupancySchedRegistry("gcn-max-occupancy",
+ "Run GCN scheduler to maximize occupancy",
+ createGCNMaxOccupancyMachineScheduler);
+
static StringRef computeDataLayout(const Triple &TT) {
if (TT.getArch() == Triple::r600) {
// 32-bit pointers.
@@ -467,7 +481,7 @@ ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
const SISubtarget &ST = C->MF->getSubtarget<SISubtarget>();
if (ST.enableSIScheduler())
return createSIMachineScheduler(C);
- return nullptr;
+ return createGCNMaxOccupancyMachineScheduler(C);
}
bool GCNPassConfig::addPreISel() {
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index c1ecf09df7a..e58e5b2f92d 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -49,6 +49,7 @@ add_llvm_target(AMDGPUCodeGen
AMDGPUPromoteAlloca.cpp
AMDGPURegisterInfo.cpp
GCNHazardRecognizer.cpp
+ GCNSchedStrategy.cpp
R600ClauseMergePass.cpp
R600ControlFlowFinalizer.cpp
R600EmitClauseMarkers.cpp
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
new file mode 100644
index 00000000000..62e9f526904
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -0,0 +1,312 @@
+//===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This contains a MachineSchedStrategy implementation for maximizing wave
+/// occupancy on GCN hardware.
+//===----------------------------------------------------------------------===//
+
+#include "GCNSchedStrategy.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+
+#define DEBUG_TYPE "misched"
+
+using namespace llvm;
+
+GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
+ const MachineSchedContext *C) :
+ GenericScheduler(C) { }
+
+static unsigned getMaxWaves(unsigned SGPRs, unsigned VGPRs,
+ const MachineFunction &MF) {
+
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ unsigned MinRegOccupancy = std::min(ST.getOccupancyWithNumSGPRs(SGPRs),
+ ST.getOccupancyWithNumVGPRs(VGPRs));
+ return std::min(MinRegOccupancy,
+ ST.getOccupancyWithLocalMemSize(MFI->getLDSSize()));
+}
+
+void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
+ bool AtTop, const RegPressureTracker &RPTracker,
+ const SIRegisterInfo *SRI,
+ int SGPRPressure,
+ int VGPRPressure,
+ int SGPRExcessLimit,
+ int VGPRExcessLimit,
+ int SGPRCriticalLimit,
+ int VGPRCriticalLimit) {
+
+ Cand.SU = SU;
+ Cand.AtTop = AtTop;
+
+ // getDownwardPressure() and getUpwardPressure() make temporary changes to
+ // the the tracker, so we need to pass those function a non-const copy.
+ RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
+
+ std::vector<unsigned> Pressure;
+ std::vector<unsigned> MaxPressure;
+
+ if (AtTop)
+ TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
+ else {
+ // FIXME: I think for bottom up scheduling, the register pressure is cached
+ // and can be retrieved by DAG->getPressureDif(SU).
+ TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
+ }
+
+ int NewSGPRPressure = Pressure[SRI->getSGPRPressureSet()];
+ int NewVGPRPressure = Pressure[SRI->getVGPRPressureSet()];
+
+ // If two instructions increase the pressure of different register sets
+ // by the same amount, the generic scheduler will prefer to schedule the
+ // instruction that increases the set with the least amount of registers,
+ // which in our case would be SGPRs. This is rarely what we want, so
+ // when we report excess/critical register pressure, we do it either
+ // only for VGPRs or only for SGPRs.
+
+ // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
+ const int MaxVGPRPressureInc = 16;
+ bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
+ bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
+
+
+ // FIXME: We have to enter REG-EXCESS before we reach the actual threshold
+ // to increase the likelihood we don't go over the limits. We should improve
+ // the analysis to look through dependencies to find the path with the least
+ // register pressure.
+ // FIXME: This is also necessary, because some passes that run after
+ // scheduling and before regalloc increase register pressure.
+ const int ErrorMargin = 3;
+ VGPRExcessLimit -= ErrorMargin;
+ SGPRExcessLimit -= ErrorMargin;
+
+ // We only need to update the RPDelata for instructions that increase
+ // register pressure. Instructions that decrease or keep reg pressure
+ // the same will be marked as RegExcess in tryCandidate() when they
+ // are compared with instructions that increase the register pressure.
+ if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
+ Cand.RPDelta.Excess = PressureChange(SRI->getVGPRPressureSet());
+ Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
+ }
+
+ if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
+ Cand.RPDelta.Excess = PressureChange(SRI->getSGPRPressureSet());
+ Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure = SGPRExcessLimit);
+ }
+
+ // Register pressure is considered 'CRITICAL' if it is approaching a value
+ // that would reduce the wave occupancy for the execution unit. When
+ // register pressure is 'CRITICAL', increading SGPR and VGPR pressure both
+ // has the same cost, so we don't need to prefer one over the other.
+
+ VGPRCriticalLimit -= ErrorMargin;
+ SGPRCriticalLimit -= ErrorMargin;
+
+ int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
+ int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
+
+ if (SGPRDelta >= 0 || VGPRDelta >= 0) {
+ if (SGPRDelta > VGPRDelta) {
+ Cand.RPDelta.CriticalMax = PressureChange(SRI->getSGPRPressureSet());
+ Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
+ } else {
+ Cand.RPDelta.CriticalMax = PressureChange(SRI->getVGPRPressureSet());
+ Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);
+ }
+ }
+}
+
+// This function is mostly cut and pasted from
+// GenericScheduler::pickNodeFromQueue()
+void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
+ const CandPolicy &ZonePolicy,
+ const RegPressureTracker &RPTracker,
+ SchedCandidate &Cand) {
+ const SISubtarget &ST = DAG->MF.getSubtarget<SISubtarget>();
+ const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
+ ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
+ unsigned SGPRPressure = Pressure[SRI->getSGPRPressureSet()];
+ unsigned VGPRPressure = Pressure[SRI->getVGPRPressureSet()];
+ unsigned SGPRExcessLimit =
+ Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
+ unsigned VGPRExcessLimit =
+ Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
+ unsigned MaxWaves = getMaxWaves(SGPRPressure, VGPRPressure, DAG->MF);
+ unsigned SGPRCriticalLimit = SRI->getNumSGPRsAllowed(ST, MaxWaves);
+ unsigned VGPRCriticalLimit = SRI->getNumVGPRsAllowed(MaxWaves);
+
+ ReadyQueue &Q = Zone.Available;
+ for (SUnit *SU : Q) {
+
+ SchedCandidate TryCand(ZonePolicy);
+ initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI,
+ SGPRPressure, VGPRPressure,
+ SGPRExcessLimit, VGPRExcessLimit,
+ SGPRCriticalLimit, VGPRCriticalLimit);
+ // Pass SchedBoundary only when comparing nodes from the same boundary.
+ SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
+ GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg);
+ if (TryCand.Reason != NoCand) {
+ // Initialize resource delta if needed in case future heuristics query it.
+ if (TryCand.ResDelta == SchedResourceDelta())
+ TryCand.initResourceDelta(Zone.DAG, SchedModel);
+ Cand.setBest(TryCand);
+ }
+ }
+}
+
+static int getBidirectionalReasonRank(GenericSchedulerBase::CandReason Reason) {
+ switch (Reason) {
+ default:
+ return Reason;
+ case GenericSchedulerBase::RegCritical:
+ case GenericSchedulerBase::RegExcess:
+ return -Reason;
+ }
+}
+
+// This function is mostly cut and pasted from
+// GenericScheduler::pickNodeBidirectional()
+SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
+ // Schedule as far as possible in the direction of no choice. This is most
+ // efficient, but also provides the best heuristics for CriticalPSets.
+ if (SUnit *SU = Bot.pickOnlyChoice()) {
+ IsTopNode = false;
+ return SU;
+ }
+ if (SUnit *SU = Top.pickOnlyChoice()) {
+ IsTopNode = true;
+ return SU;
+ }
+ // Set the bottom-up policy based on the state of the current bottom zone and
+ // the instructions outside the zone, including the top zone.
+ CandPolicy BotPolicy;
+ setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
+ // Set the top-down policy based on the state of the current top zone and
+ // the instructions outside the zone, including the bottom zone.
+ CandPolicy TopPolicy;
+ setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
+
+ // See if BotCand is still valid (because we previously scheduled from Top).
+ DEBUG(dbgs() << "Picking from Bot:\n");
+ if (!BotCand.isValid() || BotCand.SU->isScheduled ||
+ BotCand.Policy != BotPolicy) {
+ BotCand.reset(CandPolicy());
+ pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
+ assert(BotCand.Reason != NoCand && "failed to find the first candidate");
+ } else {
+ DEBUG(traceCandidate(BotCand));
+ }
+
+ // Check if the top Q has a better candidate.
+ DEBUG(dbgs() << "Picking from Top:\n");
+ if (!TopCand.isValid() || TopCand.SU->isScheduled ||
+ TopCand.Policy != TopPolicy) {
+ TopCand.reset(CandPolicy());
+ pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
+ assert(TopCand.Reason != NoCand && "failed to find the first candidate");
+ } else {
+ DEBUG(traceCandidate(TopCand));
+ }
+
+ // Pick best from BotCand and TopCand.
+ DEBUG(
+ dbgs() << "Top Cand: ";
+ traceCandidate(BotCand);
+ dbgs() << "Bot Cand: ";
+ traceCandidate(TopCand);
+ );
+ SchedCandidate Cand;
+ if (TopCand.Reason == BotCand.Reason) {
+ Cand = BotCand;
+ GenericSchedulerBase::CandReason TopReason = TopCand.Reason;
+ TopCand.Reason = NoCand;
+ GenericScheduler::tryCandidate(Cand, TopCand, nullptr);
+ if (TopCand.Reason != NoCand) {
+ Cand.setBest(TopCand);
+ } else {
+ TopCand.Reason = TopReason;
+ }
+ } else {
+ if (TopCand.Reason == RegExcess && TopCand.RPDelta.Excess.getUnitInc() <= 0) {
+ Cand = TopCand;
+ } else if (BotCand.Reason == RegExcess && BotCand.RPDelta.Excess.getUnitInc() <= 0) {
+ Cand = BotCand;
+ } else if (TopCand.Reason == RegCritical && TopCand.RPDelta.CriticalMax.getUnitInc() <= 0) {
+ Cand = TopCand;
+ } else if (BotCand.Reason == RegCritical && BotCand.RPDelta.CriticalMax.getUnitInc() <= 0) {
+ Cand = BotCand;
+ } else {
+ int TopRank = getBidirectionalReasonRank(TopCand.Reason);
+ int BotRank = getBidirectionalReasonRank(BotCand.Reason);
+ if (TopRank > BotRank) {
+ Cand = TopCand;
+ } else {
+ Cand = BotCand;
+ }
+ }
+ }
+ DEBUG(
+ dbgs() << "Picking: ";
+ traceCandidate(Cand);
+ );
+
+ IsTopNode = Cand.AtTop;
+ return Cand.SU;
+}
+
+// This function is mostly cut and pasted from
+// GenericScheduler::pickNode()
+SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) {
+ if (DAG->top() == DAG->bottom()) {
+ assert(Top.Available.empty() && Top.Pending.empty() &&
+ Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
+ return nullptr;
+ }
+ SUnit *SU;
+ do {
+ if (RegionPolicy.OnlyTopDown) {
+ SU = Top.pickOnlyChoice();
+ if (!SU) {
+ CandPolicy NoPolicy;
+ TopCand.reset(NoPolicy);
+ pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
+ assert(TopCand.Reason != NoCand && "failed to find a candidate");
+ SU = TopCand.SU;
+ }
+ IsTopNode = true;
+ } else if (RegionPolicy.OnlyBottomUp) {
+ SU = Bot.pickOnlyChoice();
+ if (!SU) {
+ CandPolicy NoPolicy;
+ BotCand.reset(NoPolicy);
+ pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
+ assert(BotCand.Reason != NoCand && "failed to find a candidate");
+ SU = BotCand.SU;
+ }
+ IsTopNode = false;
+ } else {
+ SU = pickNodeBidirectional(IsTopNode);
+ }
+ } while (SU->isScheduled);
+
+ if (SU->isTopReady())
+ Top.removeReady(SU);
+ if (SU->isBottomReady())
+ Bot.removeReady(SU);
+
+ DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
+ return SU;
+}
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
new file mode 100644
index 00000000000..4cfc0cea81f
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -0,0 +1,54 @@
+//===-- GCNSchedStrategy.h - GCN Scheduler Strategy -*- C++ -*-------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
+#define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
+
+#include "llvm/CodeGen/MachineScheduler.h"
+
+namespace llvm {
+
+class SIRegisterInfo;
+
+/// This is a minimal scheduler strategy. The main difference between this
+/// and the GenericScheduler is that GCNSchedStrategy uses different
+/// heuristics to determine excess/critical pressure sets. Its goal is to
+/// maximize kernel occupancy (i.e. maximum number of waves per simd).
+class GCNMaxOccupancySchedStrategy : public GenericScheduler {
+
+ SUnit *pickNodeBidirectional(bool &IsTopNode);
+
+ void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
+ const RegPressureTracker &RPTracker,
+ SchedCandidate &Cand);
+
+ void initCandidate(SchedCandidate &Cand, SUnit *SU,
+ bool AtTop, const RegPressureTracker &RPTracker,
+ const SIRegisterInfo *SRI,
+ int SGPRPressure, int VGPRPressure,
+ int SGPRExcessLimit, int VGPRExcessLimit,
+ int SGPRCriticalLimit, int VGPRCriticalLimit);
+
+ void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
+ SchedBoundary *Zone, const SIRegisterInfo *SRI,
+ unsigned SGPRPressure, unsigned VGPRPressure);
+
+public:
+ GCNMaxOccupancySchedStrategy(const MachineSchedContext *C);
+
+ SUnit *pickNode(bool &IsTopNode) override;
+};
+
+} // End namespace llvm
+
+#endif // GCNSCHEDSTRATEGY_H
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 80fdd854fbd..7d84f7bec8c 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -249,6 +249,12 @@ unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
return VGPRLimit;
}
+unsigned
+SIRegisterInfo::getDefaultRegPressureSetLimit(const MachineFunction &MF,
+ unsigned Idx) const {
+ return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx);
+}
+
bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
return Fn.getFrameInfo().hasStackObjects();
}
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index b0e852e6127..6e66c52da6c 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -51,6 +51,8 @@ public:
unsigned getRegPressureSetLimit(const MachineFunction &MF,
unsigned Idx) const override;
+ unsigned getDefaultRegPressureSetLimit(const MachineFunction &MF,
+ unsigned Idx) const;
bool requiresRegisterScavenging(const MachineFunction &Fn) const override;
diff --git a/llvm/lib/Target/AMDGPU/SISchedule.td b/llvm/lib/Target/AMDGPU/SISchedule.td
index 0db92fc254f..be27966fd5f 100644
--- a/llvm/lib/Target/AMDGPU/SISchedule.td
+++ b/llvm/lib/Target/AMDGPU/SISchedule.td
@@ -47,6 +47,10 @@ def Write64Bit : SchedWrite;
class SISchedMachineModel : SchedMachineModel {
let CompleteModel = 1;
+ // MicroOpBufferSize = 1 means that instructions will always be added
+ // the ready queue when they become available. This exposes them
+ // to the register pressure analysis.
+ let MicroOpBufferSize = 1;
let IssueWidth = 1;
let PostRAScheduler = 1;
}
OpenPOWER on IntegriCloud