summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorValery Pykhtin <Valery.Pykhtin@amd.com>2019-06-18 11:43:17 +0000
committerValery Pykhtin <Valery.Pykhtin@amd.com>2019-06-18 11:43:17 +0000
commit7e854e1cdd23000acfbac316db2c8f20eeeba1a4 (patch)
treebde7d201efae7b7d50ffca5ab475fcd27a87405c
parent43854e3ccc7fb9fa2cbe37529a72f77ca512bb86 (diff)
downloadbcm5719-llvm-7e854e1cdd23000acfbac316db2c8f20eeeba1a4.tar.gz
bcm5719-llvm-7e854e1cdd23000acfbac316db2c8f20eeeba1a4.zip
[AMDGPU] Speed up live-in virtual register set computaion in GCNScheduleDAGMILive.
Differential revision: https://reviews.llvm.org/D62401 llvm-svn: 363661
-rw-r--r--llvm/lib/Target/AMDGPU/GCNRegPressure.cpp7
-rw-r--r--llvm/lib/Target/AMDGPU/GCNRegPressure.h47
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp28
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSchedStrategy.h3
4 files changed, 80 insertions, 5 deletions
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 1c6081772d5..be01988b6bc 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -63,9 +63,10 @@ void llvm::printLivesAt(SlotIndex SI,
}
if (!Num) dbgs() << " <none>\n";
}
+#endif
-static bool isEqual(const GCNRPTracker::LiveRegSet &S1,
- const GCNRPTracker::LiveRegSet &S2) {
+bool llvm::isEqual(const GCNRPTracker::LiveRegSet &S1,
+ const GCNRPTracker::LiveRegSet &S2) {
if (S1.size() != S2.size())
return false;
@@ -76,7 +77,7 @@ static bool isEqual(const GCNRPTracker::LiveRegSet &S1,
}
return true;
}
-#endif
+
///////////////////////////////////////////////////////////////////////////////
// GCNRegPressure
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index a061e3c7466..fe7b934f291 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -190,6 +190,50 @@ GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI,
const LiveIntervals &LIS,
const MachineRegisterInfo &MRI);
+/// creates a map MachineInstr -> LiveRegSet
+/// R - range of iterators on instructions
+/// After - upon entry or exit of every instruction
+/// Note: there is no entry in the map for instructions with empty live reg set
+/// Complexity = O(NumVirtRegs * averageLiveRangeSegmentsPerReg * lg(R))
+template <typename Range>
+DenseMap<MachineInstr*, GCNRPTracker::LiveRegSet>
+getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS) {
+ std::vector<SlotIndex> Indexes;
+ Indexes.reserve(std::distance(R.begin(), R.end()));
+ auto &SII = *LIS.getSlotIndexes();
+ for (MachineInstr *I : R) {
+ auto SI = SII.getInstructionIndex(*I);
+ Indexes.push_back(After ? SI.getDeadSlot() : SI.getBaseIndex());
+ }
+ std::sort(Indexes.begin(), Indexes.end());
+
+ auto &MRI = (*R.begin())->getParent()->getParent()->getRegInfo();
+ DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> LiveRegMap;
+ SmallVector<SlotIndex, 32> LiveIdxs, SRLiveIdxs;
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
+ auto Reg = TargetRegisterInfo::index2VirtReg(I);
+ if (!LIS.hasInterval(Reg))
+ continue;
+ auto &LI = LIS.getInterval(Reg);
+ LiveIdxs.clear();
+ if (!LI.findIndexesLiveAt(Indexes, std::back_inserter(LiveIdxs)))
+ continue;
+ if (!LI.hasSubRanges()) {
+ for (auto SI : LiveIdxs)
+ LiveRegMap[SII.getInstructionFromIndex(SI)][Reg] =
+ MRI.getMaxLaneMaskForVReg(Reg);
+ } else
+ for (const auto &S : LI.subranges()) {
+ // constrain search for subranges by indexes live at main range
+ SRLiveIdxs.clear();
+ S.findIndexesLiveAt(LiveIdxs, std::back_inserter(SRLiveIdxs));
+ for (auto SI : SRLiveIdxs)
+ LiveRegMap[SII.getInstructionFromIndex(SI)][Reg] |= S.LaneMask;
+ }
+ }
+ return LiveRegMap;
+}
+
inline GCNRPTracker::LiveRegSet getLiveRegsAfter(const MachineInstr &MI,
const LiveIntervals &LIS) {
return getLiveRegs(LIS.getInstructionIndex(MI).getDeadSlot(), LIS,
@@ -211,6 +255,9 @@ GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI,
return Res;
}
+bool isEqual(const GCNRPTracker::LiveRegSet &S1,
+ const GCNRPTracker::LiveRegSet &S2);
+
void printLivesAt(SlotIndex SI,
const LiveIntervals &LIS,
const MachineRegisterInfo &MRI);
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 226ea7967be..4ea990ae490 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -445,8 +445,12 @@ void GCNScheduleDAGMILive::computeBlockPressure(const MachineBasicBlock *MBB) {
RPTracker.reset(*MBB->begin(), &LiveIn);
MBBLiveIns.erase(LiveInIt);
} else {
- I = Regions[CurRegion].first;
- RPTracker.reset(*I);
+ auto &Rgn = Regions[CurRegion];
+ I = Rgn.first;
+ auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);
+ auto LRS = BBLiveInMap.lookup(NonDbgMI);
+ assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS));
+ RPTracker.reset(*I, &LRS);
}
for ( ; ; ) {
@@ -477,6 +481,23 @@ void GCNScheduleDAGMILive::computeBlockPressure(const MachineBasicBlock *MBB) {
}
}
+DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
+GCNScheduleDAGMILive::getBBLiveInMap() const {
+ assert(!Regions.empty());
+ std::vector<MachineInstr *> BBStarters;
+ BBStarters.reserve(Regions.size());
+ auto I = Regions.rbegin(), E = Regions.rend();
+ auto *BB = I->first->getParent();
+ do {
+ auto *MI = &*skipDebugInstructionsForward(I->first, I->second);
+ BBStarters.push_back(MI);
+ do {
+ ++I;
+ } while (I != E && I->first->getParent() == BB);
+ } while (I != E);
+ return getLiveRegMap(BBStarters, false /*After*/, *LIS);
+}
+
void GCNScheduleDAGMILive::finalizeSchedule() {
GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
@@ -484,6 +505,9 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
LiveIns.resize(Regions.size());
Pressure.resize(Regions.size());
+ if (!Regions.empty())
+ BBLiveInMap = getBBLiveInMap();
+
do {
Stage++;
RegionIdx = 0;
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index b425fd13b49..eaf3dee9ba5 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -90,6 +90,9 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
// Temporary basic block live-in cache.
DenseMap<const MachineBasicBlock*, GCNRPTracker::LiveRegSet> MBBLiveIns;
+ DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveInMap;
+ DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> getBBLiveInMap() const;
+
// Return current region pressure.
GCNRegPressure getRealRegPressure() const;
OpenPOWER on IntegriCloud