summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp22
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp11
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSchedStrategy.h2
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp10
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h26
6 files changed, 60 insertions, 13 deletions
diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index d18596f2390..960c63ce41d 100644
--- a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -478,13 +478,19 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
}
LLVM_DEBUG(dbgs() << "New occupancy = " << NewOcc
<< ", prev occupancy = " << Occ << '\n');
+ if (NewOcc > Occ) {
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ MFI->increaseOccupancy(MF, NewOcc);
+ }
+
return std::max(NewOcc, Occ);
}
void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
bool TryMaximizeOccupancy) {
const auto &ST = MF.getSubtarget<SISubtarget>();
- auto TgtOcc = ST.getOccupancyWithLocalMemSize(MF);
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ auto TgtOcc = MFI->getMinAllowedOccupancy();
sortRegionsByPressure(TgtOcc);
auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
@@ -501,6 +507,7 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
"target occupancy = "
<< TgtOcc << '\n');
GCNMaxOccupancySchedStrategy LStrgy(Context);
+ unsigned FinalOccupancy = std::min(Occ, MFI->getOccupancy());
for (int I = 0; I < NumPasses; ++I) {
// running first pass with TargetOccupancy = 0 mimics previous scheduling
@@ -525,8 +532,10 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
assert(R->MaxPressure.getOccupancy(ST) >= TgtOcc);
}
}
+ FinalOccupancy = std::min(FinalOccupancy, RP.getOccupancy(ST));
}
}
+ MFI->limitOccupancy(FinalOccupancy);
}
///////////////////////////////////////////////////////////////////////////////
@@ -534,7 +543,8 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
void GCNIterativeScheduler::scheduleMinReg(bool force) {
const auto &ST = MF.getSubtarget<SISubtarget>();
- const auto TgtOcc = ST.getOccupancyWithLocalMemSize(MF);
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ const auto TgtOcc = MFI->getOccupancy();
sortRegionsByPressure(TgtOcc);
auto MaxPressure = Regions.front()->MaxPressure;
@@ -567,9 +577,8 @@ void GCNIterativeScheduler::scheduleMinReg(bool force) {
void GCNIterativeScheduler::scheduleILP(
bool TryMaximizeOccupancy) {
const auto &ST = MF.getSubtarget<SISubtarget>();
- const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- auto TgtOcc = std::min(ST.getOccupancyWithLocalMemSize(MF),
- MFI->getMaxWavesPerEU());
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ auto TgtOcc = MFI->getMinAllowedOccupancy();
sortRegionsByPressure(TgtOcc);
auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
@@ -582,6 +591,7 @@ void GCNIterativeScheduler::scheduleILP(
"target occupancy = "
<< TgtOcc << '\n');
+ unsigned FinalOccupancy = std::min(Occ, MFI->getOccupancy());
for (auto R : Regions) {
BuildDAG DAG(*R, *this);
const auto ILPSchedule = makeGCNILPScheduler(DAG.getBottomRoots(), *this);
@@ -599,6 +609,8 @@ void GCNIterativeScheduler::scheduleILP(
} else {
scheduleRegion(*R, ILPSchedule, RP);
LLVM_DEBUG(printSchedResult(dbgs(), R, RP));
+ FinalOccupancy = std::min(FinalOccupancy, RP.getOccupancy(ST));
}
}
+ MFI->limitOccupancy(FinalOccupancy);
}
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index e9afe2a2b11..d2883a8d998 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -308,9 +308,7 @@ GCNScheduleDAGMILive::GCNScheduleDAGMILive(MachineSchedContext *C,
ScheduleDAGMILive(C, std::move(S)),
ST(MF.getSubtarget<SISubtarget>()),
MFI(*MF.getInfo<SIMachineFunctionInfo>()),
- StartingOccupancy(std::min(ST.getOccupancyWithLocalMemSize(MFI.getLDSSize(),
- MF.getFunction()),
- MFI.getMaxWavesPerEU())),
+ StartingOccupancy(MFI.getOccupancy()),
MinOccupancy(StartingOccupancy), Stage(0), RegionIdx(0) {
LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
@@ -374,16 +372,15 @@ void GCNScheduleDAGMILive::schedule() {
unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
// Allow memory bound functions to drop to 4 waves if not limited by an
// attribute.
- unsigned MinMemBoundWaves = std::max(MFI.getMinWavesPerEU(), 4u);
if (WavesAfter < WavesBefore && WavesAfter < MinOccupancy &&
- WavesAfter >= MinMemBoundWaves &&
- (MFI.isMemoryBound() || MFI.needsWaveLimiter())) {
+ WavesAfter >= MFI.getMinAllowedOccupancy()) {
LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "
- << MinMemBoundWaves << " waves\n");
+ << MFI.getMinAllowedOccupancy() << " waves\n");
NewOccupancy = WavesAfter;
}
if (NewOccupancy < MinOccupancy) {
MinOccupancy = NewOccupancy;
+ MFI.limitOccupancy(MinOccupancy);
LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "
<< MinOccupancy << ".\n");
}
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 060d2ca72d9..a772a53fecb 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -64,7 +64,7 @@ class GCNScheduleDAGMILive : public ScheduleDAGMILive {
const SISubtarget &ST;
- const SIMachineFunctionInfo &MFI;
+ SIMachineFunctionInfo &MFI;
// Occupancy target at the beginning of function scheduling cycle.
unsigned StartingOccupancy;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index cc326cdce88..9145e2e56dc 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -7729,6 +7729,8 @@ void SITargetLowering::finalizeLowering(MachineFunction &MF) const {
MRI.replaceRegWith(AMDGPU::SCRATCH_WAVE_OFFSET_REG,
Info->getScratchWaveOffsetReg());
+ Info->limitOccupancy(MF);
+
TargetLoweringBase::finalizeLowering(MF);
}
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 61b6cb33fd1..0a789dcd090 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -55,6 +55,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
WavesPerEU = ST.getWavesPerEU(F);
+ Occupancy = getMaxWavesPerEU();
+ limitOccupancy(MF);
+
if (!isEntryFunction()) {
// Non-entry functions have no special inputs for now, other registers
// required for scratch access.
@@ -176,6 +179,13 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
S.consumeInteger(0, HighBitsOf32BitAddress);
}
+void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
+ limitOccupancy(getMaxWavesPerEU());
+ const SISubtarget& ST = MF.getSubtarget<SISubtarget>();
+ limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
+ MF.getFunction()));
+}
+
unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
const SIRegisterInfo &TRI) {
ArgInfo.PrivateSegmentBuffer =
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index dcd44a5b297..6a3eae4b128 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -186,6 +186,9 @@ private:
unsigned HighBitsOf32BitAddress;
+ // Current recorded maximum possible occupancy.
+ unsigned Occupancy;
+
MCPhysReg getNextUserSGPR() const;
MCPhysReg getNextSystemSGPR() const;
@@ -641,6 +644,29 @@ public:
llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII));
return PSV.first->second.get();
}
+
+ unsigned getOccupancy() const {
+ return Occupancy;
+ }
+
+ unsigned getMinAllowedOccupancy() const {
+ if (!isMemoryBound() && !needsWaveLimiter())
+ return Occupancy;
+ return (Occupancy < 4) ? Occupancy : 4;
+ }
+
+ void limitOccupancy(const MachineFunction &MF);
+
+ void limitOccupancy(unsigned Limit) {
+ if (Occupancy > Limit)
+ Occupancy = Limit;
+ }
+
+ void increaseOccupancy(const MachineFunction &MF, unsigned Limit) {
+ if (Occupancy < Limit)
+ Occupancy = Limit;
+ limitOccupancy(MF);
+ }
};
} // end namespace llvm
OpenPOWER on IntegriCloud