diff options
| -rw-r--r-- | llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp | 22 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 11 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h | 26 |
6 files changed, 60 insertions, 13 deletions
diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp index d18596f2390..960c63ce41d 100644 --- a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp +++ b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp @@ -478,13 +478,19 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) { } LLVM_DEBUG(dbgs() << "New occupancy = " << NewOcc << ", prev occupancy = " << Occ << '\n'); + if (NewOcc > Occ) { + SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); + MFI->increaseOccupancy(MF, NewOcc); + } + return std::max(NewOcc, Occ); } void GCNIterativeScheduler::scheduleLegacyMaxOccupancy( bool TryMaximizeOccupancy) { const auto &ST = MF.getSubtarget<SISubtarget>(); - auto TgtOcc = ST.getOccupancyWithLocalMemSize(MF); + SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); + auto TgtOcc = MFI->getMinAllowedOccupancy(); sortRegionsByPressure(TgtOcc); auto Occ = Regions.front()->MaxPressure.getOccupancy(ST); @@ -501,6 +507,7 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy( "target occupancy = " << TgtOcc << '\n'); GCNMaxOccupancySchedStrategy LStrgy(Context); + unsigned FinalOccupancy = std::min(Occ, MFI->getOccupancy()); for (int I = 0; I < NumPasses; ++I) { // running first pass with TargetOccupancy = 0 mimics previous scheduling @@ -525,8 +532,10 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy( assert(R->MaxPressure.getOccupancy(ST) >= TgtOcc); } } + FinalOccupancy = std::min(FinalOccupancy, RP.getOccupancy(ST)); } } + MFI->limitOccupancy(FinalOccupancy); } /////////////////////////////////////////////////////////////////////////////// @@ -534,7 +543,8 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy( void GCNIterativeScheduler::scheduleMinReg(bool force) { const auto &ST = MF.getSubtarget<SISubtarget>(); - const auto TgtOcc = ST.getOccupancyWithLocalMemSize(MF); + const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); + const auto TgtOcc = MFI->getOccupancy(); sortRegionsByPressure(TgtOcc); auto MaxPressure = Regions.front()->MaxPressure; @@ -567,9 +577,8 @@ void GCNIterativeScheduler::scheduleMinReg(bool force) { void GCNIterativeScheduler::scheduleILP( bool TryMaximizeOccupancy) { const auto &ST = MF.getSubtarget<SISubtarget>(); - const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); - auto TgtOcc = std::min(ST.getOccupancyWithLocalMemSize(MF), - MFI->getMaxWavesPerEU()); + SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); + auto TgtOcc = MFI->getMinAllowedOccupancy(); sortRegionsByPressure(TgtOcc); auto Occ = Regions.front()->MaxPressure.getOccupancy(ST); @@ -582,6 +591,7 @@ void GCNIterativeScheduler::scheduleILP( "target occupancy = " << TgtOcc << '\n'); + unsigned FinalOccupancy = std::min(Occ, MFI->getOccupancy()); for (auto R : Regions) { BuildDAG DAG(*R, *this); const auto ILPSchedule = makeGCNILPScheduler(DAG.getBottomRoots(), *this); @@ -599,6 +609,8 @@ void GCNIterativeScheduler::scheduleILP( } else { scheduleRegion(*R, ILPSchedule, RP); LLVM_DEBUG(printSchedResult(dbgs(), R, RP)); + FinalOccupancy = std::min(FinalOccupancy, RP.getOccupancy(ST)); } } + MFI->limitOccupancy(FinalOccupancy); } diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index e9afe2a2b11..d2883a8d998 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -308,9 +308,7 @@ GCNScheduleDAGMILive::GCNScheduleDAGMILive(MachineSchedContext *C, ScheduleDAGMILive(C, std::move(S)), ST(MF.getSubtarget<SISubtarget>()), MFI(*MF.getInfo<SIMachineFunctionInfo>()), - StartingOccupancy(std::min(ST.getOccupancyWithLocalMemSize(MFI.getLDSSize(), - MF.getFunction()), - MFI.getMaxWavesPerEU())), + StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy), Stage(0), RegionIdx(0) { LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n"); @@ -374,16 +372,15 @@ void GCNScheduleDAGMILive::schedule() { unsigned NewOccupancy = std::max(WavesAfter, WavesBefore); // Allow memory bound functions to drop to 4 waves if not limited by an // attribute. - unsigned MinMemBoundWaves = std::max(MFI.getMinWavesPerEU(), 4u); if (WavesAfter < WavesBefore && WavesAfter < MinOccupancy && - WavesAfter >= MinMemBoundWaves && - (MFI.isMemoryBound() || MFI.needsWaveLimiter())) { + WavesAfter >= MFI.getMinAllowedOccupancy()) { LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to " - << MinMemBoundWaves << " waves\n"); + << MFI.getMinAllowedOccupancy() << " waves\n"); NewOccupancy = WavesAfter; } if (NewOccupancy < MinOccupancy) { MinOccupancy = NewOccupancy; + MFI.limitOccupancy(MinOccupancy); LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to " << MinOccupancy << ".\n"); } diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h index 060d2ca72d9..a772a53fecb 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -64,7 +64,7 @@ class GCNScheduleDAGMILive : public ScheduleDAGMILive { const SISubtarget &ST; - const SIMachineFunctionInfo &MFI; + SIMachineFunctionInfo &MFI; // Occupancy target at the beginning of function scheduling cycle. unsigned StartingOccupancy; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index cc326cdce88..9145e2e56dc 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -7729,6 +7729,8 @@ void SITargetLowering::finalizeLowering(MachineFunction &MF) const { MRI.replaceRegWith(AMDGPU::SCRATCH_WAVE_OFFSET_REG, Info->getScratchWaveOffsetReg()); + Info->limitOccupancy(MF); + TargetLoweringBase::finalizeLowering(MF); } diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 61b6cb33fd1..0a789dcd090 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -55,6 +55,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F); WavesPerEU = ST.getWavesPerEU(F); + Occupancy = getMaxWavesPerEU(); + limitOccupancy(MF); + if (!isEntryFunction()) { // Non-entry functions have no special inputs for now, other registers // required for scratch access. @@ -176,6 +179,13 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) S.consumeInteger(0, HighBitsOf32BitAddress); } +void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) { + limitOccupancy(getMaxWavesPerEU()); + const SISubtarget& ST = MF.getSubtarget<SISubtarget>(); + limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(), + MF.getFunction())); +} + unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( const SIRegisterInfo &TRI) { ArgInfo.PrivateSegmentBuffer = diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index dcd44a5b297..6a3eae4b128 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -186,6 +186,9 @@ private: unsigned HighBitsOf32BitAddress; + // Current recorded maximum possible occupancy. + unsigned Occupancy; + MCPhysReg getNextUserSGPR() const; MCPhysReg getNextSystemSGPR() const; @@ -641,6 +644,29 @@ public: llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII)); return PSV.first->second.get(); } + + unsigned getOccupancy() const { + return Occupancy; + } + + unsigned getMinAllowedOccupancy() const { + if (!isMemoryBound() && !needsWaveLimiter()) + return Occupancy; + return (Occupancy < 4) ? Occupancy : 4; + } + + void limitOccupancy(const MachineFunction &MF); + + void limitOccupancy(unsigned Limit) { + if (Occupancy > Limit) + Occupancy = Limit; + } + + void increaseOccupancy(const MachineFunction &MF, unsigned Limit) { + if (Occupancy < Limit) + Occupancy = Limit; + limitOccupancy(MF); + } }; } // end namespace llvm |

