summaryrefslogtreecommitdiffstats
path: root/polly/lib/Transform
diff options
context:
space:
mode:
Diffstat (limited to 'polly/lib/Transform')
-rw-r--r--polly/lib/Transform/ScheduleOptimizer.cpp123
1 files changed, 41 insertions, 82 deletions
diff --git a/polly/lib/Transform/ScheduleOptimizer.cpp b/polly/lib/Transform/ScheduleOptimizer.cpp
index 8c35898de56..26b9668d972 100644
--- a/polly/lib/Transform/ScheduleOptimizer.cpp
+++ b/polly/lib/Transform/ScheduleOptimizer.cpp
@@ -118,16 +118,14 @@ private:
/// @return True, if we believe @p NewSchedule is an improvement for @p S.
bool isProfitableSchedule(Scop &S, __isl_keep isl_union_map *NewSchedule);
- /// @brief Create a map that pre-vectorizes one scheduling dimension.
+ /// @brief Pre-vectorizes one scheduling dimension of a schedule band.
///
- /// getPrevectorMap creates a map that maps each input dimension to the same
- /// output dimension, except for the dimension DimToVectorize.
- /// DimToVectorize is strip mined by 'VectorWidth' and the newly created
- /// point loop of DimToVectorize is moved to the innermost level.
+ /// prevectSchedBand splits out the dimension DimToVectorize, tiles it and
+ /// sinks the resulting point loop.
///
- /// Example (DimToVectorize=0, ScheduleDimensions=2, VectorWidth=4):
+ /// Example (DimToVectorize=0, VectorWidth=4):
///
- /// | Before transformation
+ /// | Before transformation:
/// |
/// | A[i,j] -> [i,j]
/// |
@@ -135,17 +133,12 @@ private:
/// | for (j = 0; j < 128; j++)
/// | A(i,j);
///
- /// Prevector map:
- /// [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
- ///
/// | After transformation:
/// |
- /// | A[i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
- /// |
- /// | for (it = 0; it < 128; it+=4)
+ /// | for (it = 0; it < 32; it+=1)
/// | for (j = 0; j < 128; j++)
- /// | for (ip = max(0,it); ip < min(128, it + 3); ip++)
- /// | A(ip,j);
+ /// | for (ip = 0; ip <= 3; ip++)
+ /// | A(4 * it + ip,j);
///
/// The goal of this transformation is to create a trivially vectorizable
/// loop. This means a parallel loop at the innermost level that has a
@@ -156,9 +149,9 @@ private:
/// DimToVectorize can be divided by VectorWidth. The default VectorWidth is
/// currently constant and not yet target specific. This function does not
/// reason about parallelism.
- static __isl_give isl_map *getPrevectorMap(isl_ctx *ctx, int DimToVectorize,
- int ScheduleDimensions,
- int VectorWidth = 4);
+ static __isl_give isl_schedule_node *
+ prevectSchedBand(__isl_take isl_schedule_node *Node, unsigned DimToVectorize,
+ int VectorWidth = 4);
/// @brief Apply additional optimizations on the bands in the schedule tree.
///
@@ -170,7 +163,7 @@ private:
/// - if the band has more than one loop dimension
///
/// - Prevectorize the schedule of the band (or the point loop in case of
- /// tiling)
+ /// tiling).
/// - if vectorization is enabled
///
/// @param Node The schedule node to (possibly) optimize.
@@ -204,58 +197,33 @@ private:
char IslScheduleOptimizer::ID = 0;
-__isl_give isl_map *
-IslScheduleOptimizer::getPrevectorMap(isl_ctx *ctx, int DimToVectorize,
- int ScheduleDimensions, int VectorWidth) {
- isl_space *Space;
- isl_local_space *LocalSpace, *LocalSpaceRange;
- isl_set *Modulo;
- isl_map *TilingMap;
- isl_constraint *c;
- isl_aff *Aff;
- int PointDimension; /* ip */
- int TileDimension; /* it */
- isl_val *VectorWidthMP;
-
- assert(0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);
-
- Space = isl_space_alloc(ctx, 0, ScheduleDimensions, ScheduleDimensions + 1);
- TilingMap = isl_map_universe(isl_space_copy(Space));
- LocalSpace = isl_local_space_from_space(Space);
- PointDimension = ScheduleDimensions;
- TileDimension = DimToVectorize;
-
- // Create an identity map for everything except DimToVectorize and map
- // DimToVectorize to the point loop at the innermost dimension.
- for (int i = 0; i < ScheduleDimensions; i++)
- if (i == DimToVectorize)
- TilingMap =
- isl_map_equate(TilingMap, isl_dim_in, i, isl_dim_out, PointDimension);
- else
- TilingMap = isl_map_equate(TilingMap, isl_dim_in, i, isl_dim_out, i);
-
- // it % 'VectorWidth' = 0
- LocalSpaceRange = isl_local_space_range(isl_local_space_copy(LocalSpace));
- Aff = isl_aff_zero_on_domain(LocalSpaceRange);
- Aff = isl_aff_set_constant_si(Aff, VectorWidth);
- Aff = isl_aff_set_coefficient_si(Aff, isl_dim_in, TileDimension, 1);
- VectorWidthMP = isl_val_int_from_si(ctx, VectorWidth);
- Aff = isl_aff_mod_val(Aff, VectorWidthMP);
- Modulo = isl_pw_aff_zero_set(isl_pw_aff_from_aff(Aff));
- TilingMap = isl_map_intersect_range(TilingMap, Modulo);
-
- // it <= ip
- TilingMap = isl_map_order_le(TilingMap, isl_dim_out, TileDimension,
- isl_dim_out, PointDimension);
-
- // ip <= it + ('VectorWidth' - 1)
- c = isl_inequality_alloc(LocalSpace);
- isl_constraint_set_coefficient_si(c, isl_dim_out, TileDimension, 1);
- isl_constraint_set_coefficient_si(c, isl_dim_out, PointDimension, -1);
- isl_constraint_set_constant_si(c, VectorWidth - 1);
- TilingMap = isl_map_add_constraint(TilingMap, c);
-
- return TilingMap;
+__isl_give isl_schedule_node *
+IslScheduleOptimizer::prevectSchedBand(__isl_take isl_schedule_node *Node,
+ unsigned DimToVectorize,
+ int VectorWidth) {
+ assert(isl_schedule_node_get_type(Node) == isl_schedule_node_band);
+
+ auto Space = isl_schedule_node_band_get_space(Node);
+ auto ScheduleDimensions = isl_space_dim(Space, isl_dim_set);
+ isl_space_free(Space);
+ assert(DimToVectorize < ScheduleDimensions);
+
+ if (DimToVectorize > 0) {
+ Node = isl_schedule_node_band_split(Node, DimToVectorize);
+ Node = isl_schedule_node_child(Node, 0);
+ }
+ if (DimToVectorize < ScheduleDimensions - 1)
+ Node = isl_schedule_node_band_split(Node, 1);
+ Space = isl_schedule_node_band_get_space(Node);
+ auto Sizes = isl_multi_val_zero(Space);
+ auto Ctx = isl_schedule_node_get_ctx(Node);
+ Sizes =
+ isl_multi_val_set_val(Sizes, 0, isl_val_int_from_si(Ctx, VectorWidth));
+ Node = isl_schedule_node_band_tile(Node, Sizes);
+ Node = isl_schedule_node_child(Node, 0);
+ Node = isl_schedule_node_band_sink(Node);
+ Node = isl_schedule_node_child(Node, 0);
+ return Node;
}
isl_schedule_node *IslScheduleOptimizer::optimizeBand(isl_schedule_node *Node,
@@ -307,21 +275,12 @@ isl_schedule_node *IslScheduleOptimizer::optimizeBand(isl_schedule_node *Node,
if (PollyVectorizerChoice == VECTORIZER_NONE)
return Res;
- auto Schedule = isl_schedule_node_band_get_partial_schedule(Res);
-
- for (int i = Dims - 1; i >= 0; i--) {
+ for (int i = Dims - 1; i >= 0; i--)
if (isl_schedule_node_band_member_get_coincident(Res, i)) {
- auto TileMap = IslScheduleOptimizer::getPrevectorMap(Ctx, i, Dims);
- auto TileUMap = isl_union_map_from_map(TileMap);
- auto Schedule2 = isl_union_map_apply_range(
- isl_union_map_from_multi_union_pw_aff(Schedule), TileUMap);
- Schedule = isl_multi_union_pw_aff_from_union_map(Schedule2);
+ Res = IslScheduleOptimizer::prevectSchedBand(Res, i);
break;
}
- }
- Res = isl_schedule_node_delete(Res);
- Res = isl_schedule_node_insert_partial_schedule(Res, Schedule);
return Res;
}
OpenPOWER on IntegriCloud