summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--polly/include/polly/ScopInfo.h61
-rw-r--r--polly/lib/Analysis/ScopInfo.cpp140
-rw-r--r--polly/test/ScopInfo/schedule-const-post-dominator-walk-2.ll42
-rw-r--r--polly/test/ScopInfo/schedule-const-post-dominator-walk.ll34
4 files changed, 233 insertions, 44 deletions
diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h
index 28dced21903..b46e0817a4f 100644
--- a/polly/include/polly/ScopInfo.h
+++ b/polly/include/polly/ScopInfo.h
@@ -1474,17 +1474,62 @@ private:
/// the dimensionality of the underlying ScopArrayInfo object.
void updateAccessDimensionality();
- /// @brief Build Schedule for the SCoP region.
- ///
+ /// @brief Construct the schedule of this SCoP.
void buildSchedule();
- /// @brief Build Schedule for the region @p RN.
+ /// @brief A loop stack element to keep track of per-loop information during
+ /// schedule construction.
+ typedef struct LoopStackElement {
+ // The loop for which we keep information.
+ Loop *L;
+
+ // The (possibly incomplete) schedule for this loop.
+ isl_schedule *Schedule;
+
+ // The number of basic blocks in the current loop, for which a schedule has
+ // already been constructed.
+ unsigned NumBlocksProcessed;
+
+ LoopStackElement(Loop *L, __isl_give isl_schedule *S,
+ unsigned NumBlocksProcessed)
+ : L(L), Schedule(S), NumBlocksProcessed(NumBlocksProcessed){};
+ } LoopStackElementTy;
+
+ /// @brief The loop stack used for schedule construction.
+ ///
+ /// The loop stack keeps track of schedule information for a set of nested
+ /// loops as well as an (optional) 'nullptr' loop that models the outermost
+ /// schedule dimension. The loops in a loop stack always have a parent-child
+ /// relation where the loop at position n is the parent of the loop at
+ /// position n + 1.
+ typedef SmallVector<LoopStackElementTy, 4> LoopStackTy;
+
+ /// @brief Construct schedule information for a given Region and add the
+ /// derived information to @p LoopStack.
+ ///
+ /// Given a Region we derive schedule information for all RegionNodes
+ /// contained in this region ensuring that the assigned execution times
+ /// correctly model the existing control flow relations.
+ ///
+ /// @param R The region which to process.
+ /// @param LoopStack A stack of loops that are currently under
+ /// construction.
+ void buildSchedule(Region *R, LoopStackTy &LoopStack);
+
+ /// @brief Build Schedule for the region node @p RN and add the derived
+ /// information to @p LoopStack.
+ ///
+ /// In case @p RN is a BasicBlock or a non-affine Region, we construct the
+ /// schedule for this @p RN and also finalize loop schedules in case the
+ /// current @p RN completes the loop.
+ ///
+ /// In case @p RN is a not-non-affine Region, we delegate the construction to
+ /// buildSchedule(Region *R, ...).
///
- /// @param RN The current region traversed.
- /// @param LoopSchedules Map from loops to their schedule and progress.
- void buildSchedule(
- RegionNode *RN,
- DenseMap<Loop *, std::pair<isl_schedule *, unsigned>> &LoopSchedules);
+ /// @param RN The RegionNode region traversed.
+ /// @param LoopStack A stack of loops that are currently under
+ /// construction.
+ void buildSchedule(RegionNode *RN, LoopStackTy &LoopStack);
/// @brief Collect all memory access relations of a given type.
///
diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp
index d4ad62444ea..80d13807406 100644
--- a/polly/lib/Analysis/ScopInfo.cpp
+++ b/polly/lib/Analysis/ScopInfo.cpp
@@ -3444,61 +3444,129 @@ void Scop::addScopStmt(BasicBlock *BB, Region *R) {
}
void Scop::buildSchedule() {
- DenseMap<Loop *, std::pair<isl_schedule *, unsigned>> LoopSchedules;
Loop *L = getLoopSurroundingRegion(getRegion(), LI);
- LoopSchedules[L];
- buildSchedule(getRegion().getNode(), LoopSchedules);
- Schedule = LoopSchedules[L].first;
+ LoopStackTy LoopStack({LoopStackElementTy(L, nullptr, 0)});
+ buildSchedule(getRegion().getNode(), LoopStack);
+ assert(LoopStack.size() == 1 && LoopStack.back().L == L);
+ Schedule = LoopStack[0].Schedule;
}
-void Scop::buildSchedule(
- RegionNode *RN,
- DenseMap<Loop *, std::pair<isl_schedule *, unsigned>> &LoopSchedules) {
+/// To generate a schedule for the elements in a Region we traverse the Region
+/// in reverse-post-order and add the contained RegionNodes in traversal order
+/// to the schedule of the loop that is currently at the top of the LoopStack.
+/// For loop-free codes, this results in a correct sequential ordering.
+///
+/// Example:
+/// bb1(0)
+/// / \.
+/// bb2(1) bb3(2)
+/// \ / \.
+/// bb4(3) bb5(4)
+/// \ /
+/// bb6(5)
+///
+/// Including loops requires additional processing. Whenever a loop header is
+/// encountered, the corresponding loop is added to the @p LoopStack. Starting
+/// from an empty schedule, we first process all RegionNodes that are within
+/// this loop and complete the sequential schedule at this loop-level before
+/// processing about any other nodes. To implement this
+/// loop-nodes-first-processing, the reverse post-order traversal is
+/// insufficient. Hence, we additionally check if the traversal yields
+/// sub-regions or blocks that are outside the last loop on the @p LoopStack.
+/// These region-nodes are then queue and only traverse after the all nodes
+/// within the current loop have been processed.
+void Scop::buildSchedule(Region *R, LoopStackTy &LoopStack) {
+ Loop *OuterScopLoop = getLoopSurroundingRegion(getRegion(), LI);
+
+ ReversePostOrderTraversal<Region *> RTraversal(R);
+ std::deque<RegionNode *> WorkList(RTraversal.begin(), RTraversal.end());
+ std::deque<RegionNode *> DelayList;
+ bool LastRNWaiting = false;
+
+ // Iterate over the region @p R in reverse post-order but queue
+ // sub-regions/blocks iff they are not part of the last encountered but not
+ // completely traversed loop. The variable LastRNWaiting is a flag to indicate
+ // that we queued the last sub-region/block from the reverse post-order
+ // iterator. If it is set we have to explore the next sub-region/block from
+ // the iterator (if any) to guarantee progress. If it is not set we first try
+ // the next queued sub-region/blocks.
+ while (!WorkList.empty() || !DelayList.empty()) {
+ RegionNode *RN;
+
+ if ((LastRNWaiting && !WorkList.empty()) || DelayList.size() == 0) {
+ RN = WorkList.front();
+ WorkList.pop_front();
+ LastRNWaiting = false;
+ } else {
+ RN = DelayList.front();
+ DelayList.pop_front();
+ }
+
+ Loop *L = getRegionNodeLoop(RN, LI);
+ if (!getRegion().contains(L))
+ L = OuterScopLoop;
+
+ Loop *LastLoop = LoopStack.back().L;
+ if (LastLoop != L) {
+ if (!LastLoop->contains(L)) {
+ LastRNWaiting = true;
+ DelayList.push_back(RN);
+ continue;
+ }
+ LoopStack.push_back({L, nullptr, 0});
+ }
+ buildSchedule(RN, LoopStack);
+ }
+
+ return;
+}
+
+void Scop::buildSchedule(RegionNode *RN, LoopStackTy &LoopStack) {
if (RN->isSubRegion()) {
auto *LocalRegion = RN->getNodeAs<Region>();
if (!SD.isNonAffineSubRegion(LocalRegion, &getRegion())) {
- ReversePostOrderTraversal<Region *> RTraversal(LocalRegion);
- for (auto *Child : RTraversal)
- buildSchedule(Child, LoopSchedules);
+ buildSchedule(LocalRegion, LoopStack);
return;
}
}
- Loop *L = getRegionNodeLoop(RN, LI);
- if (!getRegion().contains(L))
- L = getLoopSurroundingRegion(getRegion(), LI);
-
- int LD = getRelativeLoopDepth(L);
- auto &LSchedulePair = LoopSchedules[L];
- LSchedulePair.second += getNumBlocksInRegionNode(RN);
+ auto &LoopData = LoopStack.back();
+ LoopData.NumBlocksProcessed += getNumBlocksInRegionNode(RN);
if (auto *Stmt = getStmtForRegionNode(RN)) {
auto *UDomain = isl_union_set_from_set(Stmt->getDomain());
auto *StmtSchedule = isl_schedule_from_domain(UDomain);
- LSchedulePair.first = combineInSequence(LSchedulePair.first, StmtSchedule);
+ LoopData.Schedule = combineInSequence(LoopData.Schedule, StmtSchedule);
}
- isl_schedule *LSchedule = LSchedulePair.first;
- unsigned NumVisited = LSchedulePair.second;
- while (L && NumVisited == L->getNumBlocks()) {
- auto *PL = L->getParentLoop();
-
- auto &PSchedulePair = LoopSchedules[PL];
-
- if (LSchedule) {
- auto *LDomain = isl_schedule_get_domain(LSchedule);
- auto *MUPA = mapToDimension(LDomain, LD + 1);
- LSchedule = isl_schedule_insert_partial_schedule(LSchedule, MUPA);
- PSchedulePair.first = combineInSequence(PSchedulePair.first, LSchedule);
+ // Check if we just processed the last node in this loop. If we did, finalize
+ // the loop by:
+ //
+ // - adding new schedule dimensions
+ // - folding the resulting schedule into the parent loop schedule
+ // - dropping the loop schedule from the LoopStack.
+ //
+ // Then continue to check surrounding loops, which might also have been
+ // completed by this node.
+ while (LoopData.L &&
+ LoopData.NumBlocksProcessed == LoopData.L->getNumBlocks()) {
+ auto Schedule = LoopData.Schedule;
+ auto NumBlocksProcessed = LoopData.NumBlocksProcessed;
+
+ LoopStack.pop_back();
+ auto &NextLoopData = LoopStack.back();
+
+ if (Schedule) {
+ auto *Domain = isl_schedule_get_domain(Schedule);
+ auto *MUPA = mapToDimension(Domain, LoopStack.size());
+ Schedule = isl_schedule_insert_partial_schedule(Schedule, MUPA);
+ NextLoopData.Schedule =
+ combineInSequence(NextLoopData.Schedule, Schedule);
}
- PSchedulePair.second += NumVisited;
-
- L = PL;
- LD--;
- NumVisited = PSchedulePair.second;
- LSchedule = PSchedulePair.first;
+ NextLoopData.NumBlocksProcessed += NumBlocksProcessed;
+ LoopData = NextLoopData;
}
}
diff --git a/polly/test/ScopInfo/schedule-const-post-dominator-walk-2.ll b/polly/test/ScopInfo/schedule-const-post-dominator-walk-2.ll
new file mode 100644
index 00000000000..4fe4d17cfb0
--- /dev/null
+++ b/polly/test/ScopInfo/schedule-const-post-dominator-walk-2.ll
@@ -0,0 +1,42 @@
+; RUN: opt %loadPolly -analyze -polly-scops < %s | FileCheck %s
+
+; CHECK: Stmt_loopA[i0] -> [0, 0, 0]
+; CHECK-DAG: Stmt_loopB[i0] -> [0, 0, 1]
+; CHECK-DAG: Stmt_bbB[] -> [1, 0, 0]
+; CHECK-DAG: Stmt_bbA[] -> [2, 0, 0]
+; CHECK-DAG: Stmt_bbMerge[] -> [3, 0, 0]
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @hoge(i64 %p0, i64 %p1, i64 %p2, i64 %p3, float* %A) {
+entry:
+ br label %loopA
+
+loopA:
+ %tmp4 = phi i64 [ 0, %entry ], [ 0, %loopB]
+ store float 42.0, float* %A
+ %cmp0 = icmp sle i64 %p0, 100
+ br i1 %cmp0, label %loopB, label %bbB
+
+loopB:
+ store float 42.0, float* %A
+ %cmp1 = icmp sle i64 %p1, 100
+ br i1 %cmp1, label %loopA, label %bbA
+
+bbA:
+ store float 42.0, float* %A
+ %cmpbbA = icmp sle i64 %p2, 50
+ br i1 %cmpbbA, label %bbMerge, label %exit
+
+bbB:
+ store float 42.0, float* %A
+ %cmpbbB= icmp sle i64 %p3, 200
+ br i1 %cmpbbB, label %exit, label %bbMerge
+
+bbMerge:
+ store float 42.0, float* %A
+ br label %exit
+
+exit:
+ ret void
+}
diff --git a/polly/test/ScopInfo/schedule-const-post-dominator-walk.ll b/polly/test/ScopInfo/schedule-const-post-dominator-walk.ll
new file mode 100644
index 00000000000..e44d98a1e2f
--- /dev/null
+++ b/polly/test/ScopInfo/schedule-const-post-dominator-walk.ll
@@ -0,0 +1,34 @@
+; RUN: opt %loadPolly -analyze -polly-scops < %s | FileCheck %s
+
+; CHECK: { Stmt_bb3[i0] -> [0, 0] };
+; CHECK: { Stmt_bb2[] -> [1, 0] };
+
+; Verify that we generate the correct schedule. In older versions of Polly,
+; we generated an incorrect schedule:
+;
+; { Stmt_bb3[i0] -> [1, 0]; Stmt_bb2[] -> [0, 0] }
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @hoge() {
+bb:
+ br label %bb3
+
+bb1: ; preds = %bb5
+ br label %bb6
+
+bb2: ; preds = %bb3
+ %tmp = phi i64 [ %tmp4, %bb3 ]
+ br label %bb6
+
+bb3: ; preds = %bb5, %bb
+ %tmp4 = phi i64 [ 0, %bb ], [ 0, %bb5 ]
+ br i1 false, label %bb5, label %bb2
+
+bb5: ; preds = %bb3
+ br i1 false, label %bb3, label %bb1
+
+bb6: ; preds = %bb2, %bb1
+ %tmp2 = phi i64 [ %tmp, %bb2 ], [ undef, %bb1 ]
+ ret void
+}
OpenPOWER on IntegriCloud