[AMDGPU] Track occupancy in MFI

Keep track of achieved occupancy in SIMachineFunctionInfo. At the moment we have a lot of duplicated or even missed code to query and maintain occupancy info. Record it in the MFI and query in a single call. Interfaces: - getOccupancy() - returns current recorded achieved occupancy. - getMinAllowedOccupancy() - returns lesser of the achieved occupancy and the lowest occupancy we are ready to tolerate. For example if a kernel is memory bound we are ready to tolerate 4 waves. - limitOccupancy() - record occupancy level if we have to lower it. - increaseOccupancy() - record occupancy if scheduler managed to increase the occupancy. MFI takes care of integrating different checks affecting occupancy, including LDS use and waves-per-eu attribute. Note that scheduler starts with not yet known register pressure, so has to record either limit or increase in occupancy after it is done. Later passes can just query a resulting value. New interface is used in the active scheduler and NFC wrt its work. Changes are also made to experimental schedulers to use it and record an occupancy after they are done. Before the change waves-per-eu was ignored by experimental schedulers and tolerance window for memory bound kernels was not used. Differential Revision: https://reviews.llvm.org/D47509 llvm-svn: 333629
author: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> 2018-05-31 05:36:04 +0000
committer: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> 2018-05-31 05:36:04 +0000
commit: d4b500cb08b26931ea38b9f9691fd5d72c724da1 (patch)
tree: eb45d5411ebc4332ee0e67a0c151c7cb88f2cc6a /llvm/lib/Target/AMDGPU
parent: d1fe5066944af4947846efdfbf2009e45b39cfb6 (diff)
download: bcm5719-llvm-d4b500cb08b26931ea38b9f9691fd5d72c724da1.tar.gz
bcm5719-llvm-d4b500cb08b26931ea38b9f9691fd5d72c724da1.zip
6 files changed, 60 insertions, 13 deletions
diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index d18596f2390..960c63ce41d 100644
--- a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -478,13 +478,19 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
   }
   LLVM_DEBUG(dbgs() << "New occupancy = " << NewOcc
                     << ", prev occupancy = " << Occ << '\n');
+  if (NewOcc > Occ) {
+    SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+    MFI->increaseOccupancy(MF, NewOcc);
+  }
+
   return std::max(NewOcc, Occ);
 }
 
 void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
   bool TryMaximizeOccupancy) {
   const auto &ST = MF.getSubtarget<SISubtarget>();
-  auto TgtOcc = ST.getOccupancyWithLocalMemSize(MF);
+  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  auto TgtOcc = MFI->getMinAllowedOccupancy();
 
   sortRegionsByPressure(TgtOcc);
   auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
@@ -501,6 +507,7 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
                        "target occupancy = "
                     << TgtOcc << '\n');
   GCNMaxOccupancySchedStrategy LStrgy(Context);
+  unsigned FinalOccupancy = std::min(Occ, MFI->getOccupancy());
 
   for (int I = 0; I < NumPasses; ++I) {
     // running first pass with TargetOccupancy = 0 mimics previous scheduling
@@ -525,8 +532,10 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
           assert(R->MaxPressure.getOccupancy(ST) >= TgtOcc);
         }
       }
+      FinalOccupancy = std::min(FinalOccupancy, RP.getOccupancy(ST));
     }
   }
+  MFI->limitOccupancy(FinalOccupancy);
 }
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -534,7 +543,8 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
 
 void GCNIterativeScheduler::scheduleMinReg(bool force) {
   const auto &ST = MF.getSubtarget<SISubtarget>();
-  const auto TgtOcc = ST.getOccupancyWithLocalMemSize(MF);
+  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  const auto TgtOcc = MFI->getOccupancy();
   sortRegionsByPressure(TgtOcc);
 
   auto MaxPressure = Regions.front()->MaxPressure;
@@ -567,9 +577,8 @@ void GCNIterativeScheduler::scheduleMinReg(bool force) {
 void GCNIterativeScheduler::scheduleILP(
   bool TryMaximizeOccupancy) {
   const auto &ST = MF.getSubtarget<SISubtarget>();
-  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-  auto TgtOcc = std::min(ST.getOccupancyWithLocalMemSize(MF),
-                         MFI->getMaxWavesPerEU());
+  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  auto TgtOcc = MFI->getMinAllowedOccupancy();
 
   sortRegionsByPressure(TgtOcc);
   auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
@@ -582,6 +591,7 @@ void GCNIterativeScheduler::scheduleILP(
                        "target occupancy = "
                     << TgtOcc << '\n');
 
+  unsigned FinalOccupancy = std::min(Occ, MFI->getOccupancy());
   for (auto R : Regions) {
     BuildDAG DAG(*R, *this);
     const auto ILPSchedule = makeGCNILPScheduler(DAG.getBottomRoots(), *this);
@@ -599,6 +609,8 @@ void GCNIterativeScheduler::scheduleILP(
     } else {
       scheduleRegion(*R, ILPSchedule, RP);
       LLVM_DEBUG(printSchedResult(dbgs(), R, RP));
+      FinalOccupancy = std::min(FinalOccupancy, RP.getOccupancy(ST));
     }
   }
+  MFI->limitOccupancy(FinalOccupancy);
 }
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index e9afe2a2b11..d2883a8d998 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -308,9 +308,7 @@ GCNScheduleDAGMILive::GCNScheduleDAGMILive(MachineSchedContext *C,
   ScheduleDAGMILive(C, std::move(S)),
   ST(MF.getSubtarget<SISubtarget>()),
   MFI(*MF.getInfo<SIMachineFunctionInfo>()),
-  StartingOccupancy(std::min(ST.getOccupancyWithLocalMemSize(MFI.getLDSSize(),
-                                                             MF.getFunction()),
-                             MFI.getMaxWavesPerEU())),
+  StartingOccupancy(MFI.getOccupancy()),
   MinOccupancy(StartingOccupancy), Stage(0), RegionIdx(0) {
 
   LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
@@ -374,16 +372,15 @@ void GCNScheduleDAGMILive::schedule() {
   unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
   // Allow memory bound functions to drop to 4 waves if not limited by an
   // attribute.
-  unsigned MinMemBoundWaves = std::max(MFI.getMinWavesPerEU(), 4u);
   if (WavesAfter < WavesBefore && WavesAfter < MinOccupancy &&
-      WavesAfter >= MinMemBoundWaves &&
-      (MFI.isMemoryBound() || MFI.needsWaveLimiter())) {
+      WavesAfter >= MFI.getMinAllowedOccupancy()) {
     LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "
-                      << MinMemBoundWaves << " waves\n");
+                      << MFI.getMinAllowedOccupancy() << " waves\n");
     NewOccupancy = WavesAfter;
   }
   if (NewOccupancy < MinOccupancy) {
     MinOccupancy = NewOccupancy;
+    MFI.limitOccupancy(MinOccupancy);
     LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "
                       << MinOccupancy << ".\n");
   }
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 060d2ca72d9..a772a53fecb 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -64,7 +64,7 @@ class GCNScheduleDAGMILive : public ScheduleDAGMILive {
 
   const SISubtarget &ST;
 
-  const SIMachineFunctionInfo &MFI;
+  SIMachineFunctionInfo &MFI;
 
   // Occupancy target at the beginning of function scheduling cycle.
   unsigned StartingOccupancy;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index cc326cdce88..9145e2e56dc 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -7729,6 +7729,8 @@ void SITargetLowering::finalizeLowering(MachineFunction &MF) const {
   MRI.replaceRegWith(AMDGPU::SCRATCH_WAVE_OFFSET_REG,
                      Info->getScratchWaveOffsetReg());
 
+  Info->limitOccupancy(MF);
+
   TargetLoweringBase::finalizeLowering(MF);
 }
 
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 61b6cb33fd1..0a789dcd090 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -55,6 +55,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
   WavesPerEU = ST.getWavesPerEU(F);
 
+  Occupancy = getMaxWavesPerEU();
+  limitOccupancy(MF);
+
   if (!isEntryFunction()) {
     // Non-entry functions have no special inputs for now, other registers
     // required for scratch access.
@@ -176,6 +179,13 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
     S.consumeInteger(0, HighBitsOf32BitAddress);
 }
 
+void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
+  limitOccupancy(getMaxWavesPerEU());
+  const SISubtarget& ST = MF.getSubtarget<SISubtarget>();
+  limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
+                 MF.getFunction()));
+}
+
 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
   const SIRegisterInfo &TRI) {
   ArgInfo.PrivateSegmentBuffer =
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index dcd44a5b297..6a3eae4b128 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -186,6 +186,9 @@ private:
 
   unsigned HighBitsOf32BitAddress;
 
+  // Current recorded maximum possible occupancy.
+  unsigned Occupancy;
+
   MCPhysReg getNextUserSGPR() const;
 
   MCPhysReg getNextSystemSGPR() const;
@@ -641,6 +644,29 @@ public:
       llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII));
     return PSV.first->second.get();
   }
+
+  unsigned getOccupancy() const {
+    return Occupancy;
+  }
+
+  unsigned getMinAllowedOccupancy() const {
+    if (!isMemoryBound() && !needsWaveLimiter())
+      return Occupancy;
+    return (Occupancy < 4) ? Occupancy : 4;
+  }
+
+  void limitOccupancy(const MachineFunction &MF);
+
+  void limitOccupancy(unsigned Limit) {
+    if (Occupancy > Limit)
+      Occupancy = Limit;
+  }
+
+  void increaseOccupancy(const MachineFunction &MF, unsigned Limit) {
+    if (Occupancy < Limit)
+      Occupancy = Limit;
+    limitOccupancy(MF);
+  }
 };
 
 } // end namespace llvm
author	Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>	2018-05-31 05:36:04 +0000
committer	Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>	2018-05-31 05:36:04 +0000
commit	d4b500cb08b26931ea38b9f9691fd5d72c724da1 (patch)
tree	eb45d5411ebc4332ee0e67a0c151c7cb88f2cc6a /llvm/lib/Target/AMDGPU
parent	d1fe5066944af4947846efdfbf2009e45b39cfb6 (diff)
download	bcm5719-llvm-d4b500cb08b26931ea38b9f9691fd5d72c724da1.tar.gz bcm5719-llvm-d4b500cb08b26931ea38b9f9691fd5d72c724da1.zip