summaryrefslogtreecommitdiffstats
path: root/polly/lib/ScheduleOptimizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'polly/lib/ScheduleOptimizer.cpp')
-rw-r--r--polly/lib/ScheduleOptimizer.cpp353
1 files changed, 168 insertions, 185 deletions
diff --git a/polly/lib/ScheduleOptimizer.cpp b/polly/lib/ScheduleOptimizer.cpp
index 266ec907d06..10639771765 100644
--- a/polly/lib/ScheduleOptimizer.cpp
+++ b/polly/lib/ScheduleOptimizer.cpp
@@ -39,13 +39,10 @@
using namespace llvm;
using namespace polly;
-namespace polly {
- bool DisablePollyTiling;
-}
-static cl::opt<bool, true>
-DisableTiling("polly-no-tiling",
- cl::desc("Disable tiling in the scheduler"), cl::Hidden,
- cl::location(polly::DisablePollyTiling), cl::init(false));
+namespace polly { bool DisablePollyTiling; }
+static cl::opt<bool, true> DisableTiling(
+ "polly-no-tiling", cl::desc("Disable tiling in the scheduler"), cl::Hidden,
+ cl::location(polly::DisablePollyTiling), cl::init(false));
static cl::opt<std::string>
OptimizeDeps("polly-opt-optimize-only",
@@ -54,8 +51,8 @@ OptimizeDeps("polly-opt-optimize-only",
static cl::opt<std::string>
SimplifyDeps("polly-opt-simplify-deps",
- cl::desc("Dependences should be simplified (yes/no)"),
- cl::Hidden, cl::init("yes"));
+ cl::desc("Dependences should be simplified (yes/no)"), cl::Hidden,
+ cl::init("yes"));
static cl::opt<int>
MaxConstantTerm("polly-opt-max-constant-term",
@@ -67,140 +64,133 @@ MaxCoefficient("polly-opt-max-coefficient",
cl::desc("The maximal coefficient allowed (-1 is unlimited)"),
cl::Hidden, cl::init(20));
-static cl::opt<std::string>
-FusionStrategy("polly-opt-fusion",
- cl::desc("The fusion strategy to choose (min/max)"),
- cl::Hidden, cl::init("min"));
+static cl::opt<std::string> FusionStrategy(
+ "polly-opt-fusion", cl::desc("The fusion strategy to choose (min/max)"),
+ cl::Hidden, cl::init("min"));
-static cl::opt<std::string>
-MaximizeBandDepth("polly-opt-maximize-bands",
- cl::desc("Maximize the band depth (yes/no)"),
- cl::Hidden, cl::init("yes"));
+static cl::opt<std::string> MaximizeBandDepth(
+ "polly-opt-maximize-bands", cl::desc("Maximize the band depth (yes/no)"),
+ cl::Hidden, cl::init("yes"));
namespace {
- class IslScheduleOptimizer : public ScopPass {
-
- public:
- static char ID;
- explicit IslScheduleOptimizer() : ScopPass(ID) {
- LastSchedule = NULL;
- }
-
- ~IslScheduleOptimizer() {
- isl_schedule_free(LastSchedule);
- }
-
- virtual bool runOnScop(Scop &S);
- void printScop(llvm::raw_ostream &OS) const;
- void getAnalysisUsage(AnalysisUsage &AU) const;
-
- private:
- isl_schedule *LastSchedule;
-
- static void extendScattering(Scop &S, unsigned NewDimensions);
-
- /// @brief Create a map that describes a n-dimensonal tiling.
- ///
- /// getTileMap creates a map from a n-dimensional scattering space into an
- /// 2*n-dimensional scattering space. The map describes a rectangular
- /// tiling.
- ///
- /// Example:
- /// scheduleDimensions = 2, parameterDimensions = 1, tileSize = 32
- ///
- /// tileMap := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]:
- /// t0 % 32 = 0 and t0 <= s0 < t0 + 32 and
- /// t1 % 32 = 0 and t1 <= s1 < t1 + 32}
- ///
- /// Before tiling:
- ///
- /// for (i = 0; i < N; i++)
- /// for (j = 0; j < M; j++)
- /// S(i,j)
- ///
- /// After tiling:
- ///
- /// for (t_i = 0; t_i < N; i+=32)
- /// for (t_j = 0; t_j < M; j+=32)
- /// for (i = t_i; i < min(t_i + 32, N); i++) | Unknown that N % 32 = 0
- /// for (j = t_j; j < t_j + 32; j++) | Known that M % 32 = 0
- /// S(i,j)
- ///
- static isl_basic_map *getTileMap(isl_ctx *ctx, int scheduleDimensions,
- isl_space *SpaceModel, int tileSize = 32);
-
- /// @brief Get the schedule for this band.
- ///
- /// Polly applies transformations like tiling on top of the isl calculated
- /// value. This can influence the number of scheduling dimension. The
- /// number of schedule dimensions is returned in the parameter 'Dimension'.
- static isl_union_map *getScheduleForBand(isl_band *Band, int *Dimensions);
-
- /// @brief Create a map that pre-vectorizes one scheduling dimension.
- ///
- /// getPrevectorMap creates a map that maps each input dimension to the same
- /// output dimension, except for the dimension DimToVectorize.
- /// DimToVectorize is strip mined by 'VectorWidth' and the newly created
- /// point loop of DimToVectorize is moved to the innermost level.
- ///
- /// Example (DimToVectorize=0, ScheduleDimensions=2, VectorWidth=4):
- ///
- /// | Before transformation
- /// |
- /// | A[i,j] -> [i,j]
- /// |
- /// | for (i = 0; i < 128; i++)
- /// | for (j = 0; j < 128; j++)
- /// | A(i,j);
- ///
- /// Prevector map:
- /// [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
- ///
- /// | After transformation:
- /// |
- /// | A[i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
- /// |
- /// | for (it = 0; it < 128; it+=4)
- /// | for (j = 0; j < 128; j++)
- /// | for (ip = max(0,it); ip < min(128, it + 3); ip++)
- /// | A(ip,j);
- ///
- /// The goal of this transformation is to create a trivially vectorizable
- /// loop. This means a parallel loop at the innermost level that has a
- /// constant number of iterations corresponding to the target vector width.
- ///
- /// This transformation creates a loop at the innermost level. The loop has
- /// a constant number of iterations, if the number of loop iterations at
- /// DimToVectorize can be divided by VectorWidth. The default VectorWidth is
- /// currently constant and not yet target specific. This function does not
- /// reason about parallelism.
- static isl_map *getPrevectorMap(isl_ctx *ctx, int DimToVectorize,
- int ScheduleDimensions,
- int VectorWidth = 4);
-
- /// @brief Get the scheduling map for a list of bands.
- ///
- /// Walk recursively the forest of bands to combine the schedules of the
- /// individual bands to the overall schedule. In case tiling is requested,
- /// the individual bands are tiled.
- static isl_union_map *getScheduleForBandList(isl_band_list *BandList);
-
- static isl_union_map *getScheduleMap(isl_schedule *Schedule);
-
- bool doFinalization() {
- isl_schedule_free(LastSchedule);
- LastSchedule = NULL;
- return true;
- }
- };
+class IslScheduleOptimizer : public ScopPass {
+
+public:
+ static char ID;
+ explicit IslScheduleOptimizer() : ScopPass(ID) { LastSchedule = NULL; }
+
+ ~IslScheduleOptimizer() { isl_schedule_free(LastSchedule); }
+
+ virtual bool runOnScop(Scop &S);
+ void printScop(llvm::raw_ostream &OS) const;
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+
+private:
+ isl_schedule *LastSchedule;
+
+ static void extendScattering(Scop &S, unsigned NewDimensions);
+
+ /// @brief Create a map that describes a n-dimensonal tiling.
+ ///
+ /// getTileMap creates a map from a n-dimensional scattering space into an
+ /// 2*n-dimensional scattering space. The map describes a rectangular
+ /// tiling.
+ ///
+ /// Example:
+ /// scheduleDimensions = 2, parameterDimensions = 1, tileSize = 32
+ ///
+ /// tileMap := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]:
+ /// t0 % 32 = 0 and t0 <= s0 < t0 + 32 and
+ /// t1 % 32 = 0 and t1 <= s1 < t1 + 32}
+ ///
+ /// Before tiling:
+ ///
+ /// for (i = 0; i < N; i++)
+ /// for (j = 0; j < M; j++)
+ /// S(i,j)
+ ///
+ /// After tiling:
+ ///
+ /// for (t_i = 0; t_i < N; i+=32)
+ /// for (t_j = 0; t_j < M; j+=32)
+ /// for (i = t_i; i < min(t_i + 32, N); i++) | Unknown that N % 32 = 0
+ /// for (j = t_j; j < t_j + 32; j++) | Known that M % 32 = 0
+ /// S(i,j)
+ ///
+ static isl_basic_map *getTileMap(isl_ctx *ctx, int scheduleDimensions,
+ isl_space *SpaceModel, int tileSize = 32);
+
+ /// @brief Get the schedule for this band.
+ ///
+ /// Polly applies transformations like tiling on top of the isl calculated
+ /// value. This can influence the number of scheduling dimension. The
+ /// number of schedule dimensions is returned in the parameter 'Dimension'.
+ static isl_union_map *getScheduleForBand(isl_band *Band, int *Dimensions);
+
+ /// @brief Create a map that pre-vectorizes one scheduling dimension.
+ ///
+ /// getPrevectorMap creates a map that maps each input dimension to the same
+ /// output dimension, except for the dimension DimToVectorize.
+ /// DimToVectorize is strip mined by 'VectorWidth' and the newly created
+ /// point loop of DimToVectorize is moved to the innermost level.
+ ///
+ /// Example (DimToVectorize=0, ScheduleDimensions=2, VectorWidth=4):
+ ///
+ /// | Before transformation
+ /// |
+ /// | A[i,j] -> [i,j]
+ /// |
+ /// | for (i = 0; i < 128; i++)
+ /// | for (j = 0; j < 128; j++)
+ /// | A(i,j);
+ ///
+ /// Prevector map:
+ /// [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
+ ///
+ /// | After transformation:
+ /// |
+ /// | A[i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
+ /// |
+ /// | for (it = 0; it < 128; it+=4)
+ /// | for (j = 0; j < 128; j++)
+ /// | for (ip = max(0,it); ip < min(128, it + 3); ip++)
+ /// | A(ip,j);
+ ///
+ /// The goal of this transformation is to create a trivially vectorizable
+ /// loop. This means a parallel loop at the innermost level that has a
+ /// constant number of iterations corresponding to the target vector width.
+ ///
+ /// This transformation creates a loop at the innermost level. The loop has
+ /// a constant number of iterations, if the number of loop iterations at
+ /// DimToVectorize can be divided by VectorWidth. The default VectorWidth is
+ /// currently constant and not yet target specific. This function does not
+ /// reason about parallelism.
+ static isl_map *getPrevectorMap(isl_ctx *ctx, int DimToVectorize,
+ int ScheduleDimensions, int VectorWidth = 4);
+
+ /// @brief Get the scheduling map for a list of bands.
+ ///
+ /// Walk recursively the forest of bands to combine the schedules of the
+ /// individual bands to the overall schedule. In case tiling is requested,
+ /// the individual bands are tiled.
+ static isl_union_map *getScheduleForBandList(isl_band_list *BandList);
+
+ static isl_union_map *getScheduleMap(isl_schedule *Schedule);
+
+ bool doFinalization() {
+ isl_schedule_free(LastSchedule);
+ LastSchedule = NULL;
+ return true;
+ }
+};
}
char IslScheduleOptimizer::ID = 0;
static int getSingleMap(__isl_take isl_map *map, void *user) {
- isl_map **singleMap = (isl_map **) user;
+ isl_map **singleMap = (isl_map **)user;
*singleMap = map;
return 0;
@@ -222,17 +212,14 @@ void IslScheduleOptimizer::extendScattering(Scop &S, unsigned NewDimensions) {
for (unsigned i = OldDimensions; i < NewDimensions; i++)
Map = isl_map_fix_si(Map, isl_dim_out, i, 0);
-
Map = isl_map_align_params(Map, S.getParamSpace());
New = isl_map_apply_range(Stmt->getScattering(), Map);
Stmt->setScattering(New);
}
}
-isl_basic_map *IslScheduleOptimizer::getTileMap(isl_ctx *ctx,
- int scheduleDimensions,
- isl_space *SpaceModel,
- int tileSize) {
+isl_basic_map *IslScheduleOptimizer::getTileMap(
+ isl_ctx *ctx, int scheduleDimensions, isl_space *SpaceModel, int tileSize) {
// We construct
//
// tileMap := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]:
@@ -240,8 +227,8 @@ isl_basic_map *IslScheduleOptimizer::getTileMap(isl_ctx *ctx,
// s1 = a1 * 32 and s1 = p1 and t1 <= p1 < t1 + 32}
//
// and project out the auxilary dimensions a0 and a1.
- isl_space *Space = isl_space_alloc(ctx, 0, scheduleDimensions,
- scheduleDimensions * 3);
+ isl_space *Space =
+ isl_space_alloc(ctx, 0, scheduleDimensions, scheduleDimensions * 3);
isl_basic_map *tileMap = isl_basic_map_universe(isl_space_copy(Space));
isl_local_space *LocalSpace = isl_local_space_from_space(Space);
@@ -285,15 +272,14 @@ isl_basic_map *IslScheduleOptimizer::getTileMap(isl_ctx *ctx,
// The auxilary dimensions are transformed into existentially quantified ones.
// This reduces the number of visible scattering dimensions and allows Cloog
// to produces better code.
- tileMap = isl_basic_map_project_out(tileMap, isl_dim_out,
- 2 * scheduleDimensions,
- scheduleDimensions);
+ tileMap = isl_basic_map_project_out(
+ tileMap, isl_dim_out, 2 * scheduleDimensions, scheduleDimensions);
isl_local_space_free(LocalSpace);
return tileMap;
}
-isl_union_map *IslScheduleOptimizer::getScheduleForBand(isl_band *Band,
- int *Dimensions) {
+isl_union_map *
+IslScheduleOptimizer::getScheduleForBand(isl_band *Band, int *Dimensions) {
isl_union_map *PartialSchedule;
isl_ctx *ctx;
isl_space *Space;
@@ -321,10 +307,8 @@ isl_union_map *IslScheduleOptimizer::getScheduleForBand(isl_band *Band,
return isl_union_map_apply_range(PartialSchedule, TileUMap);
}
-isl_map *IslScheduleOptimizer::getPrevectorMap(isl_ctx *ctx,
- int DimToVectorize,
- int ScheduleDimensions,
- int VectorWidth) {
+isl_map *IslScheduleOptimizer::getPrevectorMap(
+ isl_ctx *ctx, int DimToVectorize, int ScheduleDimensions, int VectorWidth) {
isl_space *Space;
isl_local_space *LocalSpace, *LocalSpaceRange;
isl_set *Modulo;
@@ -335,7 +319,7 @@ isl_map *IslScheduleOptimizer::getPrevectorMap(isl_ctx *ctx,
int TileDimension; /* it */
isl_int VectorWidthMP;
- assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);
+ assert(0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);
Space = isl_space_alloc(ctx, 0, ScheduleDimensions, ScheduleDimensions + 1);
TilingMap = isl_map_universe(isl_space_copy(Space));
@@ -385,8 +369,8 @@ isl_map *IslScheduleOptimizer::getPrevectorMap(isl_ctx *ctx,
return TilingMap;
}
-isl_union_map *IslScheduleOptimizer::getScheduleForBandList(
- isl_band_list *BandList) {
+isl_union_map *
+IslScheduleOptimizer::getScheduleForBandList(isl_band_list *BandList) {
int NumBands;
isl_union_map *Schedule;
isl_ctx *ctx;
@@ -411,24 +395,24 @@ isl_union_map *IslScheduleOptimizer::getScheduleForBandList(
Children = isl_band_get_children(Band);
SuffixSchedule = getScheduleForBandList(Children);
- PartialSchedule = isl_union_map_flat_range_product(PartialSchedule,
- SuffixSchedule);
+ PartialSchedule =
+ isl_union_map_flat_range_product(PartialSchedule, SuffixSchedule);
isl_band_list_free(Children);
} else if (PollyVectorizerChoice != VECTORIZER_NONE) {
- for (int j = 0; j < isl_band_n_member(Band); j++) {
- if (isl_band_member_is_zero_distance(Band, j)) {
+ for (int j = 0; j < isl_band_n_member(Band); j++) {
+ if (isl_band_member_is_zero_distance(Band, j)) {
isl_map *TileMap;
isl_union_map *TileUMap;
- TileMap = getPrevectorMap(ctx, ScheduleDimensions - j - 1,
+ TileMap = getPrevectorMap(ctx, ScheduleDimensions - j - 1,
ScheduleDimensions);
- TileUMap = isl_union_map_from_map(TileMap);
- TileUMap = isl_union_map_align_params(TileUMap,
- isl_space_copy(Space));
- PartialSchedule = isl_union_map_apply_range(PartialSchedule,
- TileUMap);
- break;
- }
+ TileUMap = isl_union_map_from_map(TileMap);
+ TileUMap =
+ isl_union_map_align_params(TileUMap, isl_space_copy(Space));
+ PartialSchedule =
+ isl_union_map_apply_range(PartialSchedule, TileUMap);
+ break;
+ }
}
}
@@ -455,20 +439,20 @@ bool IslScheduleOptimizer::runOnScop(Scop &S) {
LastSchedule = NULL;
// Build input data.
- int ValidityKinds = Dependences::TYPE_RAW | Dependences::TYPE_WAR
- | Dependences::TYPE_WAW;
+ int ValidityKinds =
+ Dependences::TYPE_RAW | Dependences::TYPE_WAR | Dependences::TYPE_WAW;
int ProximityKinds;
if (OptimizeDeps == "all")
- ProximityKinds = Dependences::TYPE_RAW | Dependences::TYPE_WAR
- | Dependences::TYPE_WAW;
+ ProximityKinds =
+ Dependences::TYPE_RAW | Dependences::TYPE_WAR | Dependences::TYPE_WAW;
else if (OptimizeDeps == "raw")
ProximityKinds = Dependences::TYPE_RAW;
else {
errs() << "Do not know how to optimize for '" << OptimizeDeps << "'"
- << " Falling back to optimizing all dependences.\n";
- ProximityKinds = Dependences::TYPE_RAW | Dependences::TYPE_WAR
- | Dependences::TYPE_WAW;
+ << " Falling back to optimizing all dependences.\n";
+ ProximityKinds =
+ Dependences::TYPE_RAW | Dependences::TYPE_WAR | Dependences::TYPE_WAW;
}
isl_union_set *Domain = S.getDomains();
@@ -489,8 +473,8 @@ bool IslScheduleOptimizer::runOnScop(Scop &S) {
if (SimplifyDeps == "yes") {
Validity = isl_union_map_gist_domain(Validity, isl_union_set_copy(Domain));
Validity = isl_union_map_gist_range(Validity, isl_union_set_copy(Domain));
- Proximity = isl_union_map_gist_domain(Proximity,
- isl_union_set_copy(Domain));
+ Proximity =
+ isl_union_map_gist_domain(Proximity, isl_union_set_copy(Domain));
Proximity = isl_union_map_gist_range(Proximity, isl_union_set_copy(Domain));
} else if (SimplifyDeps != "no") {
errs() << "warning: Option -polly-opt-simplify-deps should either be 'yes' "
@@ -535,7 +519,7 @@ bool IslScheduleOptimizer::runOnScop(Scop &S) {
isl_options_set_on_error(S.getIslCtx(), ISL_ON_ERROR_CONTINUE);
isl_schedule *Schedule;
- Schedule = isl_union_set_compute_schedule(Domain, Validity, Proximity);
+ Schedule = isl_union_set_compute_schedule(Domain, Validity, Proximity);
isl_options_set_on_error(S.getIslCtx(), ISL_ON_ERROR_ABORT);
// In cases the scheduler is not able to optimize the code, we just do not
@@ -543,8 +527,7 @@ bool IslScheduleOptimizer::runOnScop(Scop &S) {
if (!Schedule)
return false;
- DEBUG(dbgs() << "Schedule := "; isl_schedule_dump(Schedule);
- dbgs() << ";\n");
+ DEBUG(dbgs() << "Schedule := "; isl_schedule_dump(Schedule); dbgs() << ";\n");
isl_union_map *ScheduleMap = getScheduleMap(Schedule);
@@ -553,7 +536,7 @@ bool IslScheduleOptimizer::runOnScop(Scop &S) {
isl_set *Domain = Stmt->getDomain();
isl_union_map *StmtBand;
StmtBand = isl_union_map_intersect_domain(isl_union_map_copy(ScheduleMap),
- isl_union_set_from_set(Domain));
+ isl_union_set_from_set(Domain));
isl_map *StmtSchedule;
isl_union_map_foreach_map(StmtBand, getSingleMap, &StmtSchedule);
Stmt->setScattering(StmtSchedule);
@@ -596,13 +579,13 @@ void IslScheduleOptimizer::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<Dependences>();
}
-INITIALIZE_PASS_BEGIN(IslScheduleOptimizer, "polly-opt-isl",
- "Polly - Optimize schedule of SCoP", false, false)
-INITIALIZE_PASS_DEPENDENCY(Dependences)
-INITIALIZE_PASS_DEPENDENCY(ScopInfo)
-INITIALIZE_PASS_END(IslScheduleOptimizer, "polly-opt-isl",
- "Polly - Optimize schedule of SCoP", false, false)
-
-Pass* polly::createIslScheduleOptimizerPass() {
+Pass *polly::createIslScheduleOptimizerPass() {
return new IslScheduleOptimizer();
}
+
+INITIALIZE_PASS_BEGIN(IslScheduleOptimizer, "polly-opt-isl",
+ "Polly - Optimize schedule of SCoP", false, false);
+INITIALIZE_PASS_DEPENDENCY(Dependences);
+INITIALIZE_PASS_DEPENDENCY(ScopInfo);
+INITIALIZE_PASS_END(IslScheduleOptimizer, "polly-opt-isl",
+ "Polly - Optimize schedule of SCoP", false, false)
OpenPOWER on IntegriCloud