diff options
Diffstat (limited to 'polly/lib')
-rw-r--r-- | polly/lib/External/isl/GIT_HEAD_ID | 2 | ||||
-rw-r--r-- | polly/lib/External/isl/doc/manual.pdf | bin | 495859 -> 496038 bytes | |||
-rw-r--r-- | polly/lib/External/isl/doc/user.pod | 39 | ||||
-rw-r--r-- | polly/lib/External/isl/include/isl/schedule.h | 6 | ||||
-rw-r--r-- | polly/lib/External/isl/isl_map.c | 4 | ||||
-rw-r--r-- | polly/lib/External/isl/isl_map_private.h | 3 | ||||
-rw-r--r-- | polly/lib/External/isl/isl_map_simplify.c | 3 | ||||
-rw-r--r-- | polly/lib/External/isl/isl_mat.c | 43 | ||||
-rw-r--r-- | polly/lib/External/isl/isl_options.c | 16 | ||||
-rw-r--r-- | polly/lib/External/isl/isl_options_private.h | 2 | ||||
-rw-r--r-- | polly/lib/External/isl/isl_scheduler.c | 1797 | ||||
-rw-r--r-- | polly/lib/External/isl/isl_tarjan.c | 33 | ||||
-rw-r--r-- | polly/lib/External/isl/isl_tarjan.h | 4 | ||||
-rw-r--r-- | polly/lib/External/isl/isl_test.c | 38 | ||||
-rw-r--r-- | polly/lib/External/isl/ltmain.sh | 4 |
15 files changed, 1875 insertions, 119 deletions
diff --git a/polly/lib/External/isl/GIT_HEAD_ID b/polly/lib/External/isl/GIT_HEAD_ID index faede1aa4d7..a33d8b7db60 100644 --- a/polly/lib/External/isl/GIT_HEAD_ID +++ b/polly/lib/External/isl/GIT_HEAD_ID @@ -1 +1 @@ -isl-0.16.1 +isl-0.16.1-20-gee54b48 diff --git a/polly/lib/External/isl/doc/manual.pdf b/polly/lib/External/isl/doc/manual.pdf Binary files differindex 5ef653503bb..cbeba086f7a 100644 --- a/polly/lib/External/isl/doc/manual.pdf +++ b/polly/lib/External/isl/doc/manual.pdf diff --git a/polly/lib/External/isl/doc/user.pod b/polly/lib/External/isl/doc/user.pod index a78ee9d7d01..d006f325d7a 100644 --- a/polly/lib/External/isl/doc/user.pod +++ b/polly/lib/External/isl/doc/user.pod @@ -8911,6 +8911,10 @@ L</"Schedule Trees">. isl_stat isl_options_set_schedule_serialize_sccs( isl_ctx *ctx, int val); int isl_options_get_schedule_serialize_sccs(isl_ctx *ctx); + isl_stat isl_options_set_schedule_whole_component( + isl_ctx *ctx, int val); + int isl_options_get_schedule_whole_component( + isl_ctx *ctx); isl_stat isl_options_set_schedule_maximize_band_depth( isl_ctx *ctx, int val); int isl_options_get_schedule_maximize_band_depth( @@ -8960,16 +8964,35 @@ appear in the same band node if these statements belong to the same strongly connected component at the point where the band node is constructed. +=item * schedule_whole_component + +If this option is set, then entire (weakly) connected +components in the dependence graph are scheduled together +as a whole. +Otherwise, each strongly connected component within +such a weakly connected component is first scheduled separately +and then combined with other strongly connected components. +This option has no effect if C<schedule_serialize_sccs> is set. + =item * schedule_maximize_band_depth -If this option is set, we do not split bands at the point -where we detect splitting is necessary. Instead, we -backtrack and split bands as early as possible. This -reduces the number of splits and maximizes the width of -the bands. Wider bands give more possibilities for tiling. -Note that if the C<schedule_serialize_sccs> options is set, -then bands will be split as early as possible, even if there is no need. -The C<schedule_maximize_band_depth> option therefore has no effect in this case. +If this option is set, then the scheduler tries to maximize +the width of the bands. Wider bands give more possibilities for tiling. +In particular, if the C<schedule_whole_component> option is set, +then bands are split if this might result in wider bands. +Otherwise, the effect of this option is to only allow +strongly connected components to be combined if this does +not reduce the width of the bands. +Note that if the C<schedule_serialize_sccs> options is set, then +the C<schedule_maximize_band_depth> option therefore has no effect. + +=item * schedule_maximize_coincidence + +This option is only effective if the C<schedule_whole_component> +option is turned off. +If the C<schedule_maximize_coincidence> option is set, then (clusters of) +strongly connected components are only combined with each other +if this does not reduce the number of coincident band members. =item * schedule_outer_coincidence diff --git a/polly/lib/External/isl/include/isl/schedule.h b/polly/lib/External/isl/include/isl/schedule.h index 793f253c5b0..b2b5ee1c0b9 100644 --- a/polly/lib/External/isl/include/isl/schedule.h +++ b/polly/lib/External/isl/include/isl/schedule.h @@ -26,6 +26,9 @@ int isl_options_get_schedule_max_constant_term(isl_ctx *ctx); isl_stat isl_options_set_schedule_maximize_band_depth(isl_ctx *ctx, int val); int isl_options_get_schedule_maximize_band_depth(isl_ctx *ctx); +isl_stat isl_options_set_schedule_maximize_coincidence(isl_ctx *ctx, int val); +int isl_options_get_schedule_maximize_coincidence(isl_ctx *ctx); + isl_stat isl_options_set_schedule_outer_coincidence(isl_ctx *ctx, int val); int isl_options_get_schedule_outer_coincidence(isl_ctx *ctx); @@ -38,6 +41,9 @@ int isl_options_get_schedule_separate_components(isl_ctx *ctx); isl_stat isl_options_set_schedule_serialize_sccs(isl_ctx *ctx, int val); int isl_options_get_schedule_serialize_sccs(isl_ctx *ctx); +isl_stat isl_options_set_schedule_whole_component(isl_ctx *ctx, int val); +int isl_options_get_schedule_whole_component(isl_ctx *ctx); + __isl_give isl_schedule_constraints *isl_schedule_constraints_copy( __isl_keep isl_schedule_constraints *sc); __isl_give isl_schedule_constraints *isl_schedule_constraints_on_domain( diff --git a/polly/lib/External/isl/isl_map.c b/polly/lib/External/isl/isl_map.c index f92dd616697..0532bb1eeba 100644 --- a/polly/lib/External/isl/isl_map.c +++ b/polly/lib/External/isl/isl_map.c @@ -11090,7 +11090,8 @@ __isl_give isl_map *isl_map_flatten_range(__isl_take isl_map *map) } /* Reorder the dimensions of "bmap" according to the given dim_map - * and set the dimension specification to "dim". + * and set the dimension specification to "dim" and + * perform Gaussian elimination on the result. */ __isl_give isl_basic_map *isl_basic_map_realign(__isl_take isl_basic_map *bmap, __isl_take isl_space *dim, __isl_take struct isl_dim_map *dim_map) @@ -11111,6 +11112,7 @@ __isl_give isl_basic_map *isl_basic_map_realign(__isl_take isl_basic_map *bmap, res = isl_basic_map_add_constraints_dim_map(res, bmap, dim_map); if (res) res->flags = flags; + res = isl_basic_map_gauss(res, NULL); res = isl_basic_map_finalize(res); return res; error: diff --git a/polly/lib/External/isl/isl_map_private.h b/polly/lib/External/isl/isl_map_private.h index df44fb02beb..d9be851a5d1 100644 --- a/polly/lib/External/isl/isl_map_private.h +++ b/polly/lib/External/isl/isl_map_private.h @@ -373,6 +373,9 @@ struct isl_basic_set *isl_basic_set_preimage(struct isl_basic_set *bset, struct isl_mat *mat); struct isl_set *isl_set_preimage(struct isl_set *set, struct isl_mat *mat); +__isl_give isl_basic_map *isl_basic_map_transform_dims( + __isl_take isl_basic_map *bmap, enum isl_dim_type type, unsigned first, + __isl_take isl_mat *trans); __isl_give isl_basic_set *isl_basic_set_transform_dims( __isl_take isl_basic_set *bset, enum isl_dim_type type, unsigned first, __isl_take isl_mat *trans); diff --git a/polly/lib/External/isl/isl_map_simplify.c b/polly/lib/External/isl/isl_map_simplify.c index 62260c11a63..e44b0323331 100644 --- a/polly/lib/External/isl/isl_map_simplify.c +++ b/polly/lib/External/isl/isl_map_simplify.c @@ -3884,13 +3884,14 @@ static struct isl_basic_map *coalesce_divs(struct isl_basic_map *bmap, continue; if (isl_int_is_zero(bmap->ineq[i][1 + dim + div2])) continue; - if (isl_int_is_zero(bmap->ineq[i][1 + dim + div1])) + if (isl_int_is_zero(bmap->ineq[i][1 + dim + div1])) { if (isl_int_is_pos(bmap->ineq[i][1 + dim + div2])) isl_seq_combine(bmap->ineq[i], m, bmap->ineq[i], ctx->one, bmap->ineq[l], total); else isl_seq_combine(bmap->ineq[i], m, bmap->ineq[i], ctx->one, bmap->ineq[u], total); + } isl_int_set(bmap->ineq[i][1 + dim + div2], bmap->ineq[i][1 + dim + div1]); isl_int_set_si(bmap->ineq[i][1 + dim + div1], 0); diff --git a/polly/lib/External/isl/isl_mat.c b/polly/lib/External/isl/isl_mat.c index 0cb67723a47..483eaf078e9 100644 --- a/polly/lib/External/isl/isl_mat.c +++ b/polly/lib/External/isl/isl_mat.c @@ -1235,53 +1235,66 @@ static int transform(isl_ctx *ctx, isl_int **q, unsigned n, return 0; } -/* Replace the variables x of type "type" starting at "first" in "bset" +/* Replace the variables x of type "type" starting at "first" in "bmap" * by x' with x = M x' with M the matrix trans. * That is, replace the corresponding coefficients c by c M. * * The transformation matrix should be a square matrix. */ -__isl_give isl_basic_set *isl_basic_set_transform_dims( - __isl_take isl_basic_set *bset, enum isl_dim_type type, unsigned first, +__isl_give isl_basic_map *isl_basic_map_transform_dims( + __isl_take isl_basic_map *bmap, enum isl_dim_type type, unsigned first, __isl_take isl_mat *trans) { isl_ctx *ctx; unsigned pos; - bset = isl_basic_set_cow(bset); - if (!bset || !trans) + bmap = isl_basic_map_cow(bmap); + if (!bmap || !trans) goto error; - ctx = isl_basic_set_get_ctx(bset); + ctx = isl_basic_map_get_ctx(bmap); if (trans->n_row != trans->n_col) isl_die(trans->ctx, isl_error_invalid, "expecting square transformation matrix", goto error); - if (first + trans->n_row > isl_basic_set_dim(bset, type)) + if (first + trans->n_row > isl_basic_map_dim(bmap, type)) isl_die(trans->ctx, isl_error_invalid, "oversized transformation matrix", goto error); - pos = isl_basic_set_offset(bset, type) + first; + pos = isl_basic_map_offset(bmap, type) + first; - if (transform(ctx, bset->eq, bset->n_eq, pos, isl_mat_copy(trans)) < 0) + if (transform(ctx, bmap->eq, bmap->n_eq, pos, isl_mat_copy(trans)) < 0) goto error; - if (transform(ctx, bset->ineq, bset->n_ineq, pos, + if (transform(ctx, bmap->ineq, bmap->n_ineq, pos, isl_mat_copy(trans)) < 0) goto error; - if (transform(ctx, bset->div, bset->n_div, 1 + pos, + if (transform(ctx, bmap->div, bmap->n_div, 1 + pos, isl_mat_copy(trans)) < 0) goto error; - ISL_F_CLR(bset, ISL_BASIC_SET_NORMALIZED); - ISL_F_CLR(bset, ISL_BASIC_SET_NORMALIZED_DIVS); + ISL_F_CLR(bmap, ISL_BASIC_MAP_NORMALIZED); + ISL_F_CLR(bmap, ISL_BASIC_MAP_NORMALIZED_DIVS); isl_mat_free(trans); - return bset; + return bmap; error: isl_mat_free(trans); - isl_basic_set_free(bset); + isl_basic_map_free(bmap); return NULL; } +/* Replace the variables x of type "type" starting at "first" in "bset" + * by x' with x = M x' with M the matrix trans. + * That is, replace the corresponding coefficients c by c M. + * + * The transformation matrix should be a square matrix. + */ +__isl_give isl_basic_set *isl_basic_set_transform_dims( + __isl_take isl_basic_set *bset, enum isl_dim_type type, unsigned first, + __isl_take isl_mat *trans) +{ + return isl_basic_map_transform_dims(bset, type, first, trans); +} + void isl_mat_print_internal(__isl_keep isl_mat *mat, FILE *out, int indent) { int i, j; diff --git a/polly/lib/External/isl/isl_options.c b/polly/lib/External/isl/isl_options.c index cdb578ae5ff..bbf5a5989a8 100644 --- a/polly/lib/External/isl/isl_options.c +++ b/polly/lib/External/isl/isl_options.c @@ -151,12 +151,18 @@ ISL_ARG_BOOL(struct isl_options, schedule_outer_coincidence, 0, ISL_ARG_BOOL(struct isl_options, schedule_maximize_band_depth, 0, "schedule-maximize-band-depth", 0, "maximize the number of scheduling dimensions in a band") +ISL_ARG_BOOL(struct isl_options, schedule_maximize_coincidence, 0, + "schedule-maximize-coincidence", 0, + "maximize the number of coincident dimensions in a band") ISL_ARG_BOOL(struct isl_options, schedule_split_scaled, 0, "schedule-split-scaled", 1, "split non-tilable bands with scaled schedules") ISL_ARG_BOOL(struct isl_options, schedule_separate_components, 0, "schedule-separate-components", 1, "separate components in dependence graph") +ISL_ARG_BOOL(struct isl_options, schedule_whole_component, 0, + "schedule-whole-component", 1, + "try and compute schedule for entire component first") ISL_ARG_CHOICE(struct isl_options, schedule_algorithm, 0, "schedule-algorithm", isl_schedule_algorithm_choice, ISL_SCHEDULE_ALGORITHM_ISL, "scheduling algorithm to use") @@ -242,6 +248,11 @@ ISL_CTX_GET_BOOL_DEF(isl_options, struct isl_options, isl_options_args, schedule_maximize_band_depth) ISL_CTX_SET_BOOL_DEF(isl_options, struct isl_options, isl_options_args, + schedule_maximize_coincidence) +ISL_CTX_GET_BOOL_DEF(isl_options, struct isl_options, isl_options_args, + schedule_maximize_coincidence) + +ISL_CTX_SET_BOOL_DEF(isl_options, struct isl_options, isl_options_args, schedule_split_scaled) ISL_CTX_GET_BOOL_DEF(isl_options, struct isl_options, isl_options_args, schedule_split_scaled) @@ -252,6 +263,11 @@ ISL_CTX_GET_BOOL_DEF(isl_options, struct isl_options, isl_options_args, schedule_separate_components) ISL_CTX_SET_BOOL_DEF(isl_options, struct isl_options, isl_options_args, + schedule_whole_component) +ISL_CTX_GET_BOOL_DEF(isl_options, struct isl_options, isl_options_args, + schedule_whole_component) + +ISL_CTX_SET_BOOL_DEF(isl_options, struct isl_options, isl_options_args, schedule_outer_coincidence) ISL_CTX_GET_BOOL_DEF(isl_options, struct isl_options, isl_options_args, schedule_outer_coincidence) diff --git a/polly/lib/External/isl/isl_options_private.h b/polly/lib/External/isl/isl_options_private.h index 0907d219f23..f3119516306 100644 --- a/polly/lib/External/isl/isl_options_private.h +++ b/polly/lib/External/isl/isl_options_private.h @@ -40,8 +40,10 @@ struct isl_options { int schedule_parametric; int schedule_outer_coincidence; int schedule_maximize_band_depth; + int schedule_maximize_coincidence; int schedule_split_scaled; int schedule_separate_components; + int schedule_whole_component; unsigned schedule_algorithm; int schedule_serialize_sccs; diff --git a/polly/lib/External/isl/isl_scheduler.c b/polly/lib/External/isl/isl_scheduler.c index a214e1c3452..22e88c65f98 100644 --- a/polly/lib/External/isl/isl_scheduler.c +++ b/polly/lib/External/isl/isl_scheduler.c @@ -406,6 +406,7 @@ static __isl_give int isl_schedule_constraints_n_map( * coefficients; the first rank columns span the linear part of * the schedule rows * cinv is the inverse of cmap. + * ctrans is the transpose of cmap. * start is the first variable in the LP problem in the sequences that * represents the schedule coefficients of this node * nvar is the dimension of the domain @@ -419,6 +420,9 @@ static __isl_give int isl_schedule_constraints_n_map( * * scc is the index of SCC (or WCC) this node belongs to * + * "cluster" is only used inside extract_clusters and identifies + * the cluster of SCCs that the node belongs to. + * * coincident contains a boolean for each of the rows of the schedule, * indicating whether the corresponding scheduling dimension satisfies * the coincidence constraints in the sense that the corresponding @@ -435,11 +439,13 @@ struct isl_sched_node { int rank; isl_mat *cmap; isl_mat *cinv; + isl_mat *ctrans; int start; int nvar; int nparam; int scc; + int cluster; int *coincident; }; @@ -495,6 +501,16 @@ static int node_scc_at_least(struct isl_sched_node *node, int scc) * For validity edges, start and end mark the sequence of inequality * constraints in the LP problem that encode the validity constraint * corresponding to this edge. + * + * During clustering, an edge may be marked "no_merge" if it should + * not be used to merge clusters. + * The weight is also only used during clustering and it is + * an indication of how many schedule dimensions on either side + * of the schedule constraints can be aligned. + * If the weight is negative, then this means that this edge was postponed + * by has_bounded_distances or any_no_merge. The original weight can + * be retrieved by adding 1 + graph->max_weight, with "graph" + * the graph containing this edge. */ struct isl_sched_edge { isl_map *map; @@ -508,6 +524,9 @@ struct isl_sched_edge { int start; int end; + + int no_merge; + int weight; }; /* Is "edge" marked as being of type "type"? @@ -642,6 +661,9 @@ static int is_conditional_validity(struct isl_sched_edge *edge) * * scc represents the number of components * weak is set if the components are weakly connected + * + * max_weight is used during clustering and represents the maximal + * weight of the relevant proximity edges. */ struct isl_sched_graph { isl_map_to_basic_set *intra_hmap; @@ -675,6 +697,8 @@ struct isl_sched_graph { int scc; int weak; + + int max_weight; }; /* Initialize node_table based on the list of nodes. @@ -956,6 +980,7 @@ static void graph_free(isl_ctx *ctx, struct isl_sched_graph *graph) isl_map_free(graph->node[i].sched_map); isl_mat_free(graph->node[i].cmap); isl_mat_free(graph->node[i].cinv); + isl_mat_free(graph->node[i].ctrans); if (graph->root) free(graph->node[i].coincident); } @@ -1465,22 +1490,18 @@ static isl_bool node_follows_strong(int i, int j, void *user) } /* Use Tarjan's algorithm for computing the strongly connected components - * in the dependence graph (only validity edges). - * If weak is set, we consider the graph to be undirected and - * we effectively compute the (weakly) connected components. - * Additionally, we also consider other edges when weak is set. + * in the dependence graph only considering those edges defined by "follows". */ -static int detect_ccs(isl_ctx *ctx, struct isl_sched_graph *graph, int weak) +static int detect_ccs(isl_ctx *ctx, struct isl_sched_graph *graph, + isl_bool (*follows)(int i, int j, void *user)) { int i, n; struct isl_tarjan_graph *g = NULL; - g = isl_tarjan_graph_init(ctx, graph->n, - weak ? &node_follows_weak : &node_follows_strong, graph); + g = isl_tarjan_graph_init(ctx, graph->n, follows, graph); if (!g) return -1; - graph->weak = weak; graph->scc = 0; i = 0; n = graph->n; @@ -1501,18 +1522,22 @@ static int detect_ccs(isl_ctx *ctx, struct isl_sched_graph *graph, int weak) /* Apply Tarjan's algorithm to detect the strongly connected components * in the dependence graph. + * Only consider the (conditional) validity dependences and clear "weak". */ static int detect_sccs(isl_ctx *ctx, struct isl_sched_graph *graph) { - return detect_ccs(ctx, graph, 0); + graph->weak = 0; + return detect_ccs(ctx, graph, &node_follows_strong); } /* Apply Tarjan's algorithm to detect the (weakly) connected components * in the dependence graph. + * Consider all dependences and set "weak". */ static int detect_wccs(isl_ctx *ctx, struct isl_sched_graph *graph) { - return detect_ccs(ctx, graph, 1); + graph->weak = 1; + return detect_ccs(ctx, graph, &node_follows_weak); } static int cmp_scc(const void *a, const void *b, void *data) @@ -2083,12 +2108,14 @@ static int node_update_cmap(struct isl_sched_node *node) H = isl_mat_left_hermite(H, 0, &U, &Q); isl_mat_free(node->cmap); isl_mat_free(node->cinv); + isl_mat_free(node->ctrans); + node->ctrans = isl_mat_copy(Q); node->cmap = isl_mat_transpose(Q); node->cinv = isl_mat_transpose(U); node->rank = isl_mat_initial_non_zero_cols(H); isl_mat_free(H); - if (!node->cmap || !node->cinv || node->rank < 0) + if (!node->cmap || !node->cinv || !node->ctrans || node->rank < 0) return -1; return 0; } @@ -2577,6 +2604,8 @@ static __isl_give isl_multi_aff *node_extract_partial_schedule_multi_aff( isl_multi_aff *ma; int nrow; + if (!node) + return NULL; nrow = isl_mat_rows(node->sched); if (node->compressed) space = isl_multi_aff_get_domain_space(node->decompress); @@ -3092,6 +3121,44 @@ static int compute_maxvar(struct isl_sched_graph *graph) return 0; } +/* Extract the subgraph of "graph" that consists of the node satisfying + * "node_pred" and the edges satisfying "edge_pred" and store + * the result in "sub". + */ +static int extract_sub_graph(isl_ctx *ctx, struct isl_sched_graph *graph, + int (*node_pred)(struct isl_sched_node *node, int data), + int (*edge_pred)(struct isl_sched_edge *edge, int data), + int data, struct isl_sched_graph *sub) +{ + int i, n = 0, n_edge = 0; + int t; + + for (i = 0; i < graph->n; ++i) + if (node_pred(&graph->node[i], data)) + ++n; + for (i = 0; i < graph->n_edge; ++i) + if (edge_pred(&graph->edge[i], data)) + ++n_edge; + if (graph_alloc(ctx, sub, n, n_edge) < 0) + return -1; + if (copy_nodes(sub, graph, node_pred, data) < 0) + return -1; + if (graph_init_table(ctx, sub) < 0) + return -1; + for (t = 0; t <= isl_edge_last; ++t) + sub->max_edge[t] = graph->max_edge[t]; + if (graph_init_edge_tables(ctx, sub) < 0) + return -1; + if (copy_edges(ctx, sub, graph, edge_pred, data) < 0) + return -1; + sub->n_row = graph->n_row; + sub->max_row = graph->max_row; + sub->n_total_row = graph->n_total_row; + sub->band_start = graph->band_start; + + return 0; +} + static __isl_give isl_schedule_node *compute_schedule(isl_schedule_node *node, struct isl_sched_graph *graph); static __isl_give isl_schedule_node *compute_schedule_wcc( @@ -3116,31 +3183,10 @@ static __isl_give isl_schedule_node *compute_sub_schedule( int data, int wcc) { struct isl_sched_graph split = { 0 }; - int i, n = 0, n_edge = 0; - int t; - for (i = 0; i < graph->n; ++i) - if (node_pred(&graph->node[i], data)) - ++n; - for (i = 0; i < graph->n_edge; ++i) - if (edge_pred(&graph->edge[i], data)) - ++n_edge; - if (graph_alloc(ctx, &split, n, n_edge) < 0) + if (extract_sub_graph(ctx, graph, node_pred, edge_pred, data, + &split) < 0) goto error; - if (copy_nodes(&split, graph, node_pred, data) < 0) - goto error; - if (graph_init_table(ctx, &split) < 0) - goto error; - for (t = 0; t <= isl_edge_last; ++t) - split.max_edge[t] = graph->max_edge[t]; - if (graph_init_edge_tables(ctx, &split) < 0) - goto error; - if (copy_edges(ctx, &split, graph, edge_pred, data) < 0) - goto error; - split.n_row = graph->n_row; - split.max_row = graph->max_row; - split.n_total_row = graph->n_total_row; - split.band_start = graph->band_start; if (wcc) node = compute_schedule_wcc(node, &split); @@ -3909,6 +3955,9 @@ static __isl_give isl_schedule_node *carry_dependences( /* Topologically sort statements mapped to the same schedule iteration * and add insert a sequence node in front of "node" * corresponding to this order. + * If "initialized" is set, then it may be assumed that compute_maxvar + * has been called on the current band. Otherwise, call + * compute_maxvar if and before carry_dependences gets called. * * If it turns out to be impossible to sort the statements apart, * because different dependences impose different orderings @@ -3916,7 +3965,8 @@ static __isl_give isl_schedule_node *carry_dependences( * it carries at least one more dependence. */ static __isl_give isl_schedule_node *sort_statements( - __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) + __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, + int initialized) { isl_ctx *ctx; isl_union_set_list *filters; @@ -3943,8 +3993,11 @@ static __isl_give isl_schedule_node *sort_statements( return isl_schedule_node_free(node); next_band(graph); - if (graph->scc < graph->n) + if (graph->scc < graph->n) { + if (!initialized && compute_maxvar(graph) < 0) + return isl_schedule_node_free(node); return carry_dependences(node, graph); + } filters = extract_sccs(ctx, graph); node = isl_schedule_node_insert_sequence(node, filters); @@ -4196,14 +4249,17 @@ error: return -1; } -/* Compute a schedule for a connected dependence graph and return - * the updated schedule node. +/* Examine the current band (the rows between graph->band_start and + * graph->n_total_row), deciding whether to drop it or add it to "node" + * and then continue with the computation of the next band, if any. + * If "initialized" is set, then it may be assumed that compute_maxvar + * has been called on the current band. Otherwise, call + * compute_maxvar if and before carry_dependences gets called. * - * We try to find a sequence of as many schedule rows as possible that result - * in non-negative dependence distances (independent of the previous rows - * in the sequence, i.e., such that the sequence is tilable), with as - * many of the initial rows as possible satisfying the coincidence constraints. - * If we can't find any more rows we either + * The caller keeps looking for a new row as long as + * graph->n_row < graph->maxvar. If the latest attempt to find + * such a row failed (i.e., we still have graph->n_row < graph->maxvar), + * then we either * - split between SCCs and start over (assuming we found an interesting * pair of SCCs between which to split) * - continue with the next band (assuming the current band has at least @@ -4213,13 +4269,57 @@ error: * In each case, we first insert a band node in the schedule tree * if any rows have been computed. * - * If Feautrier's algorithm is selected, we first recursively try to satisfy - * as many validity dependences as possible. When all validity dependences - * are satisfied we extend the schedule to a full-dimensional schedule. - * - * If we manage to complete the schedule, we insert a band node + * If the caller managed to complete the schedule, we insert a band node * (if any schedule rows were computed) and we finish off by topologically * sorting the statements based on the remaining dependences. + */ +static __isl_give isl_schedule_node *compute_schedule_finish_band( + __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, + int initialized) +{ + int insert; + + if (!node) + return NULL; + + if (graph->n_row < graph->maxvar) { + isl_ctx *ctx; + int empty = graph->n_total_row == graph->band_start; + + ctx = isl_schedule_node_get_ctx(node); + if (!ctx->opt->schedule_maximize_band_depth && !empty) + return compute_next_band(node, graph, 1); + if (graph->src_scc >= 0) + return compute_split_schedule(node, graph); + if (!empty) + return compute_next_band(node, graph, 1); + if (!initialized && compute_maxvar(graph) < 0) + return isl_schedule_node_free(node); + return carry_dependences(node, graph); + } + + insert = graph->n_total_row > graph->band_start; + if (insert) { + node = insert_current_band(node, graph, 1); + node = isl_schedule_node_child(node, 0); + } + node = sort_statements(node, graph, initialized); + if (insert) + node = isl_schedule_node_parent(node); + + return node; +} + +/* Construct a band of schedule rows for a connected dependence graph. + * The caller is responsible for determining the strongly connected + * components and calling compute_maxvar first. + * + * We try to find a sequence of as many schedule rows as possible that result + * in non-negative dependence distances (independent of the previous rows + * in the sequence, i.e., such that the sequence is tilable), with as + * many of the initial rows as possible satisfying the coincidence constraints. + * The computation stops if we can't find any more rows or if we have found + * all the rows we wanted to find. * * If ctx->opt->schedule_outer_coincidence is set, then we force the * outermost dimension to satisfy the coincidence constraints. If this @@ -4242,30 +4342,16 @@ error: * Since there are only a finite number of dependences, * there will only be a finite number of iterations. */ -static __isl_give isl_schedule_node *compute_schedule_wcc( - __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) +static isl_stat compute_schedule_wcc_band(isl_ctx *ctx, + struct isl_sched_graph *graph) { int has_coincidence; int use_coincidence; int force_coincidence = 0; int check_conditional; - int insert; - isl_ctx *ctx; - - if (!node) - return NULL; - ctx = isl_schedule_node_get_ctx(node); - if (detect_sccs(ctx, graph) < 0) - return isl_schedule_node_free(node); if (sort_sccs(graph) < 0) - return isl_schedule_node_free(node); - - if (compute_maxvar(graph) < 0) - return isl_schedule_node_free(node); - - if (need_feautrier_step(ctx, graph)) - return compute_schedule_wcc_feautrier(node, graph); + return isl_stat_error; clear_local_edges(graph); check_conditional = need_condition_check(graph); @@ -4284,10 +4370,10 @@ static __isl_give isl_schedule_node *compute_schedule_wcc( graph->dst_scc = -1; if (setup_lp(ctx, graph, use_coincidence) < 0) - return isl_schedule_node_free(node); + return isl_stat_error; sol = solve_lp(graph); if (!sol) - return isl_schedule_node_free(node); + return isl_stat_error; if (sol->size == 0) { int empty = graph->n_total_row == graph->band_start; @@ -4296,40 +4382,1585 @@ static __isl_give isl_schedule_node *compute_schedule_wcc( use_coincidence = 0; continue; } - if (!ctx->opt->schedule_maximize_band_depth && !empty) - return compute_next_band(node, graph, 1); - if (graph->src_scc >= 0) - return compute_split_schedule(node, graph); - if (!empty) - return compute_next_band(node, graph, 1); - return carry_dependences(node, graph); + return isl_stat_ok; } coincident = !has_coincidence || use_coincidence; if (update_schedule(graph, sol, 1, coincident) < 0) - return isl_schedule_node_free(node); + return isl_stat_error; if (!check_conditional) continue; violated = has_violated_conditional_constraint(ctx, graph); if (violated < 0) - return isl_schedule_node_free(node); + return isl_stat_error; if (!violated) continue; if (reset_band(graph) < 0) - return isl_schedule_node_free(node); + return isl_stat_error; use_coincidence = has_coincidence; } - insert = graph->n_total_row > graph->band_start; - if (insert) { - node = insert_current_band(node, graph, 1); - node = isl_schedule_node_child(node, 0); + return isl_stat_ok; +} + +/* Compute a schedule for a connected dependence graph by considering + * the graph as a whole and return the updated schedule node. + * + * The actual schedule rows of the current band are computed by + * compute_schedule_wcc_band. compute_schedule_finish_band takes + * care of integrating the band into "node" and continuing + * the computation. + */ +static __isl_give isl_schedule_node *compute_schedule_wcc_whole( + __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) +{ + isl_ctx *ctx; + + if (!node) + return NULL; + + ctx = isl_schedule_node_get_ctx(node); + if (compute_schedule_wcc_band(ctx, graph) < 0) + return isl_schedule_node_free(node); + + return compute_schedule_finish_band(node, graph, 1); +} + +/* Clustering information used by compute_schedule_wcc_clustering. + * + * "n" is the number of SCCs in the original dependence graph + * "scc" is an array of "n" elements, each representing an SCC + * of the original dependence graph. All entries in the same cluster + * have the same number of schedule rows. + * "scc_cluster" maps each SCC index to the cluster to which it belongs, + * where each cluster is represented by the index of the first SCC + * in the cluster. Initially, each SCC belongs to a cluster containing + * only that SCC. + * + * "scc_in_merge" is used by merge_clusters_along_edge to keep + * track of which SCCs need to be merged. + * + * "cluster" contains the merged clusters of SCCs after the clustering + * has completed. + * + * "scc_node" is a temporary data structure used inside copy_partial. + * For each SCC, it keeps track of the number of nodes in the SCC + * that have already been copied. + */ +struct isl_clustering { + int n; + struct isl_sched_graph *scc; + struct isl_sched_graph *cluster; + int *scc_cluster; + int *scc_node; + int *scc_in_merge; +}; + +/* Initialize the clustering data structure "c" from "graph". + * + * In particular, allocate memory, extract the SCCs from "graph" + * into c->scc, initialize scc_cluster and construct + * a band of schedule rows for each SCC. + * Within each SCC, there is only one SCC by definition. + * Each SCC initially belongs to a cluster containing only that SCC. + */ +static isl_stat clustering_init(isl_ctx *ctx, struct isl_clustering *c, + struct isl_sched_graph *graph) +{ + int i; + + c->n = graph->scc; + c->scc = isl_calloc_array(ctx, struct isl_sched_graph, c->n); + c->cluster = isl_calloc_array(ctx, struct isl_sched_graph, c->n); + c->scc_cluster = isl_calloc_array(ctx, int, c->n); + c->scc_node = isl_calloc_array(ctx, int, c->n); + c->scc_in_merge = isl_calloc_array(ctx, int, c->n); + if (!c->scc || !c->cluster || + !c->scc_cluster || !c->scc_node || !c->scc_in_merge) + return isl_stat_error; + + for (i = 0; i < c->n; ++i) { + if (extract_sub_graph(ctx, graph, &node_scc_exactly, + &edge_scc_exactly, i, &c->scc[i]) < 0) + return isl_stat_error; + c->scc[i].scc = 1; + if (compute_maxvar(&c->scc[i]) < 0) + return isl_stat_error; + if (compute_schedule_wcc_band(ctx, &c->scc[i]) < 0) + return isl_stat_error; + c->scc_cluster[i] = i; } - node = sort_statements(node, graph); - if (insert) + + return isl_stat_ok; +} + +/* Free all memory allocated for "c". + */ +static void clustering_free(isl_ctx *ctx, struct isl_clustering *c) +{ + int i; + + if (c->scc) + for (i = 0; i < c->n; ++i) + graph_free(ctx, &c->scc[i]); + free(c->scc); + if (c->cluster) + for (i = 0; i < c->n; ++i) + graph_free(ctx, &c->cluster[i]); + free(c->cluster); + free(c->scc_cluster); + free(c->scc_node); + free(c->scc_in_merge); +} + +/* Should we refrain from merging the cluster in "graph" with + * any other cluster? + * In particular, is its current schedule band empty and incomplete. + */ +static int bad_cluster(struct isl_sched_graph *graph) +{ + return graph->n_row < graph->maxvar && + graph->n_total_row == graph->band_start; +} + +/* Return the index of an edge in "graph" that can be used to merge + * two clusters in "c". + * Return graph->n_edge if no such edge can be found. + * Return -1 on error. + * + * In particular, return a proximity edge between two clusters + * that is not marked "no_merge" and such that neither of the + * two clusters has an incomplete, empty band. + * + * If there are multiple such edges, then try and find the most + * appropriate edge to use for merging. In particular, pick the edge + * with the greatest weight. If there are multiple of those, + * then pick one with the shortest distance between + * the two cluster representatives. + */ +static int find_proximity(struct isl_sched_graph *graph, + struct isl_clustering *c) +{ + int i, best = graph->n_edge, best_dist, best_weight; + + for (i = 0; i < graph->n_edge; ++i) { + struct isl_sched_edge *edge = &graph->edge[i]; + int dist, weight; + + if (!is_proximity(edge)) + continue; + if (edge->no_merge) + continue; + if (bad_cluster(&c->scc[edge->src->scc]) || + bad_cluster(&c->scc[edge->dst->scc])) + continue; + dist = c->scc_cluster[edge->dst->scc] - + c->scc_cluster[edge->src->scc]; + if (dist == 0) + continue; + weight = edge->weight; + if (best < graph->n_edge) { + if (best_weight > weight) + continue; + if (best_weight == weight && best_dist <= dist) + continue; + } + best = i; + best_dist = dist; + best_weight = weight; + } + + return best; +} + +/* Internal data structure used in mark_merge_sccs. + * + * "graph" is the dependence graph in which a strongly connected + * component is constructed. + * "scc_cluster" maps each SCC index to the cluster to which it belongs. + * "src" and "dst" are the indices of the nodes that are being merged. + */ +struct isl_mark_merge_sccs_data { + struct isl_sched_graph *graph; + int *scc_cluster; + int src; + int dst; +}; + +/* Check whether the cluster containing node "i" depends on the cluster + * containing node "j". If "i" and "j" belong to the same cluster, + * then they are taken to depend on each other to ensure that + * the resulting strongly connected component consists of complete + * clusters. Furthermore, if "i" and "j" are the two nodes that + * are being merged, then they are taken to depend on each other as well. + * Otherwise, check if there is a (conditional) validity dependence + * from node[j] to node[i], forcing node[i] to follow node[j]. + */ +static isl_bool cluster_follows(int i, int j, void *user) +{ + struct isl_mark_merge_sccs_data *data = user; + struct isl_sched_graph *graph = data->graph; + int *scc_cluster = data->scc_cluster; + + if (data->src == i && data->dst == j) + return isl_bool_true; + if (data->src == j && data->dst == i) + return isl_bool_true; + if (scc_cluster[graph->node[i].scc] == scc_cluster[graph->node[j].scc]) + return isl_bool_true; + + return graph_has_validity_edge(graph, &graph->node[j], &graph->node[i]); +} + +/* Mark all SCCs that belong to either of the two clusters in "c" + * connected by the edge in "graph" with index "edge", or to any + * of the intermediate clusters. + * The marking is recorded in c->scc_in_merge. + * + * The given edge has been selected for merging two clusters, + * meaning that there is at least a proximity edge between the two nodes. + * However, there may also be (indirect) validity dependences + * between the two nodes. When merging the two clusters, all clusters + * containing one or more of the intermediate nodes along the + * indirect validity dependences need to be merged in as well. + * + * First collect all such nodes by computing the strongly connected + * component (SCC) containing the two nodes connected by the edge, where + * the two nodes are considered to depend on each other to make + * sure they end up in the same SCC. Similarly, each node is considered + * to depend on every other node in the same cluster to ensure + * that the SCC consists of complete clusters. + * + * Then the original SCCs that contain any of these nodes are marked + * in c->scc_in_merge. + */ +static isl_stat mark_merge_sccs(isl_ctx *ctx, struct isl_sched_graph *graph, + int edge, struct isl_clustering *c) +{ + struct isl_mark_merge_sccs_data data; + struct isl_tarjan_graph *g; + int i; + + for (i = 0; i < c->n; ++i) + c->scc_in_merge[i] = 0; + + data.graph = graph; + data.scc_cluster = c->scc_cluster; + data.src = graph->edge[edge].src - graph->node; + data.dst = graph->edge[edge].dst - graph->node; + + g = isl_tarjan_graph_component(ctx, graph->n, data.dst, + &cluster_follows, &data); + if (!g) + goto error; + + i = g->op; + if (i < 3) + isl_die(ctx, isl_error_internal, + "expecting at least two nodes in component", + goto error); + if (g->order[--i] != -1) + isl_die(ctx, isl_error_internal, + "expecting end of component marker", goto error); + + for (--i; i >= 0 && g->order[i] != -1; --i) { + int scc = graph->node[g->order[i]].scc; + c->scc_in_merge[scc] = 1; + } + + isl_tarjan_graph_free(g); + return isl_stat_ok; +error: + isl_tarjan_graph_free(g); + return isl_stat_error; +} + +/* Construct the identifier "cluster_i". + */ +static __isl_give isl_id *cluster_id(isl_ctx *ctx, int i) +{ + char name[40]; + + snprintf(name, sizeof(name), "cluster_%d", i); + return isl_id_alloc(ctx, name, NULL); +} + +/* Construct the space of the cluster with index "i" containing + * the strongly connected component "scc". + * + * In particular, construct a space called cluster_i with dimension equal + * to the number of schedule rows in the current band of "scc". + */ +static __isl_give isl_space *cluster_space(struct isl_sched_graph *scc, int i) +{ + int nvar; + isl_space *space; + isl_id *id; + + nvar = scc->n_total_row - scc->band_start; + space = isl_space_copy(scc->node[0].space); + space = isl_space_params(space); + space = isl_space_set_from_params(space); + space = isl_space_add_dims(space, isl_dim_set, nvar); + id = cluster_id(isl_space_get_ctx(space), i); + space = isl_space_set_tuple_id(space, isl_dim_set, id); + + return space; +} + +/* Collect the domain of the graph for merging clusters. + * + * In particular, for each cluster with first SCC "i", construct + * a set in the space called cluster_i with dimension equal + * to the number of schedule rows in the current band of the cluster. + */ +static __isl_give isl_union_set *collect_domain(isl_ctx *ctx, + struct isl_sched_graph *graph, struct isl_clustering *c) +{ + int i; + isl_space *space; + isl_union_set *domain; + + space = isl_space_params_alloc(ctx, 0); + domain = isl_union_set_empty(space); + + for (i = 0; i < graph->scc; ++i) { + isl_space *space; + + if (!c->scc_in_merge[i]) + continue; + if (c->scc_cluster[i] != i) + continue; + space = cluster_space(&c->scc[i], i); + domain = isl_union_set_add_set(domain, isl_set_universe(space)); + } + + return domain; +} + +/* Construct a map from the original instances to the corresponding + * cluster instance in the current bands of the clusters in "c". + */ +static __isl_give isl_union_map *collect_cluster_map(isl_ctx *ctx, + struct isl_sched_graph *graph, struct isl_clustering *c) +{ + int i, j; + isl_space *space; + isl_union_map *cluster_map; + + space = isl_space_params_alloc(ctx, 0); + cluster_map = isl_union_map_empty(space); + for (i = 0; i < graph->scc; ++i) { + int start, n; + isl_id *id; + + if (!c->scc_in_merge[i]) + continue; + + id = cluster_id(ctx, c->scc_cluster[i]); + start = c->scc[i].band_start; + n = c->scc[i].n_total_row - start; + for (j = 0; j < c->scc[i].n; ++j) { + isl_multi_aff *ma; + isl_map *map; + struct isl_sched_node *node = &c->scc[i].node[j]; + + ma = node_extract_partial_schedule_multi_aff(node, + start, n); + ma = isl_multi_aff_set_tuple_id(ma, isl_dim_out, + isl_id_copy(id)); + map = isl_map_from_multi_aff(ma); + cluster_map = isl_union_map_add_map(cluster_map, map); + } + isl_id_free(id); + } + + return cluster_map; +} + +/* Add "umap" to the schedule constraints "sc" of all types of "edge" + * that are not isl_edge_condition or isl_edge_conditional_validity. + */ +static __isl_give isl_schedule_constraints *add_non_conditional_constraints( + struct isl_sched_edge *edge, __isl_keep isl_union_map *umap, + __isl_take isl_schedule_constraints *sc) +{ + enum isl_edge_type t; + + if (!sc) + return NULL; + + for (t = isl_edge_first; t <= isl_edge_last; ++t) { + if (t == isl_edge_condition || + t == isl_edge_conditional_validity) + continue; + if (!is_type(edge, t)) + continue; + sc->constraint[t] = isl_union_map_union(sc->constraint[t], + isl_union_map_copy(umap)); + if (!sc->constraint[t]) + return isl_schedule_constraints_free(sc); + } + + return sc; +} + +/* Add schedule constraints of types isl_edge_condition and + * isl_edge_conditional_validity to "sc" by applying "umap" to + * the domains of the wrapped relations in domain and range + * of the corresponding tagged constraints of "edge". + */ +static __isl_give isl_schedule_constraints *add_conditional_constraints( + struct isl_sched_edge *edge, __isl_keep isl_union_map *umap, + __isl_take isl_schedule_constraints *sc) +{ + enum isl_edge_type t; + isl_union_map *tagged; + + for (t = isl_edge_condition; t <= isl_edge_conditional_validity; ++t) { + if (!is_type(edge, t)) + continue; + if (t == isl_edge_condition) + tagged = isl_union_map_copy(edge->tagged_condition); + else + tagged = isl_union_map_copy(edge->tagged_validity); + tagged = isl_union_map_zip(tagged); + tagged = isl_union_map_apply_domain(tagged, + isl_union_map_copy(umap)); + tagged = isl_union_map_zip(tagged); + sc->constraint[t] = isl_union_map_union(sc->constraint[t], + tagged); + if (!sc->constraint[t]) + return isl_schedule_constraints_free(sc); + } + + return sc; +} + +/* Given a mapping "cluster_map" from the original instances to + * the cluster instances, add schedule constraints on the clusters + * to "sc" corresponding to the original constraints represented by "edge". + * + * For non-tagged dependence constraints, the cluster constraints + * are obtained by applying "cluster_map" to the edge->map. + * + * For tagged dependence constraints, "cluster_map" needs to be applied + * to the domains of the wrapped relations in domain and range + * of the tagged dependence constraints. Pick out the mappings + * from these domains from "cluster_map" and construct their product. + * This mapping can then be applied to the pair of domains. + */ +static __isl_give isl_schedule_constraints *collect_edge_constraints( + struct isl_sched_edge *edge, __isl_keep isl_union_map *cluster_map, + __isl_take isl_schedule_constraints *sc) +{ + isl_union_map *umap; + isl_space *space; + isl_union_set *uset; + isl_union_map *umap1, *umap2; + + if (!sc) + return NULL; + + umap = isl_union_map_from_map(isl_map_copy(edge->map)); + umap = isl_union_map_apply_domain(umap, + isl_union_map_copy(cluster_map)); + umap = isl_union_map_apply_range(umap, + isl_union_map_copy(cluster_map)); + sc = add_non_conditional_constraints(edge, umap, sc); + isl_union_map_free(umap); + + if (!sc || (!is_condition(edge) && !is_conditional_validity(edge))) + return sc; + + space = isl_space_domain(isl_map_get_space(edge->map)); + uset = isl_union_set_from_set(isl_set_universe(space)); + umap1 = isl_union_map_copy(cluster_map); + umap1 = isl_union_map_intersect_domain(umap1, uset); + space = isl_space_range(isl_map_get_space(edge->map)); + uset = isl_union_set_from_set(isl_set_universe(space)); + umap2 = isl_union_map_copy(cluster_map); + umap2 = isl_union_map_intersect_domain(umap2, uset); + umap = isl_union_map_product(umap1, umap2); + + sc = add_conditional_constraints(edge, umap, sc); + + isl_union_map_free(umap); + return sc; +} + +/* Given a mapping "cluster_map" from the original instances to + * the cluster instances, add schedule constraints on the clusters + * to "sc" corresponding to all edges in "graph" between nodes that + * belong to SCCs that are marked for merging in "scc_in_merge". + */ +static __isl_give isl_schedule_constraints *collect_constraints( + struct isl_sched_graph *graph, int *scc_in_merge, + __isl_keep isl_union_map *cluster_map, + __isl_take isl_schedule_constraints *sc) +{ + int i; + + for (i = 0; i < graph->n_edge; ++i) { + struct isl_sched_edge *edge = &graph->edge[i]; + + if (!scc_in_merge[edge->src->scc]) + continue; + if (!scc_in_merge[edge->dst->scc]) + continue; + sc = collect_edge_constraints(edge, cluster_map, sc); + } + + return sc; +} + +/* Construct a dependence graph for scheduling clusters with respect + * to each other and store the result in "merge_graph". + * In particular, the nodes of the graph correspond to the schedule + * dimensions of the current bands of those clusters that have been + * marked for merging in "c". + * + * First construct an isl_schedule_constraints object for this domain + * by transforming the edges in "graph" to the domain. + * Then initialize a dependence graph for scheduling from these + * constraints. + */ +static isl_stat init_merge_graph(isl_ctx *ctx, struct isl_sched_graph *graph, + struct isl_clustering *c, struct isl_sched_graph *merge_graph) +{ + isl_union_set *domain; + isl_union_map *cluster_map; + isl_schedule_constraints *sc; + isl_stat r; + + domain = collect_domain(ctx, graph, c); + sc = isl_schedule_constraints_on_domain(domain); + if (!sc) + return isl_stat_error; + cluster_map = collect_cluster_map(ctx, graph, c); + sc = collect_constraints(graph, c->scc_in_merge, cluster_map, sc); + isl_union_map_free(cluster_map); + + r = graph_init(merge_graph, sc); + + isl_schedule_constraints_free(sc); + + return r; +} + +/* Compute the maximal number of remaining schedule rows that still need + * to be computed for the nodes that belong to clusters with the maximal + * dimension for the current band (i.e., the band that is to be merged). + * Only clusters that are about to be merged are considered. + * "maxvar" is the maximal dimension for the current band. + * "c" contains information about the clusters. + * + * Return the maximal number of remaining schedule rows or -1 on error. + */ +static int compute_maxvar_max_slack(int maxvar, struct isl_clustering *c) +{ + int i, j; + int max_slack; + + max_slack = 0; + for (i = 0; i < c->n; ++i) { + int nvar; + struct isl_sched_graph *scc; + + if (!c->scc_in_merge[i]) + continue; + scc = &c->scc[i]; + nvar = scc->n_total_row - scc->band_start; + if (nvar != maxvar) + continue; + for (j = 0; j < scc->n; ++j) { + struct isl_sched_node *node = &scc->node[j]; + int slack; + + if (node_update_cmap(node) < 0) + return -1; + slack = node->nvar - node->rank; + if (slack > max_slack) + max_slack = slack; + } + } + + return max_slack; +} + +/* If there are any clusters where the dimension of the current band + * (i.e., the band that is to be merged) is smaller than "maxvar" and + * if there are any nodes in such a cluster where the number + * of remaining schedule rows that still need to be computed + * is greater than "max_slack", then return the smallest current band + * dimension of all these clusters. Otherwise return the original value + * of "maxvar". Return -1 in case of any error. + * Only clusters that are about to be merged are considered. + * "c" contains information about the clusters. + */ +static int limit_maxvar_to_slack(int maxvar, int max_slack, + struct isl_clustering *c) +{ + int i, j; + + for (i = 0; i < c->n; ++i) { + int nvar; + struct isl_sched_graph *scc; + + if (!c->scc_in_merge[i]) + continue; + scc = &c->scc[i]; + nvar = scc->n_total_row - scc->band_start; + if (nvar >= maxvar) + continue; + for (j = 0; j < scc->n; ++j) { + struct isl_sched_node *node = &scc->node[j]; + int slack; + + if (node_update_cmap(node) < 0) + return -1; + slack = node->nvar - node->rank; + if (slack > max_slack) { + maxvar = nvar; + break; + } + } + } + + return maxvar; +} + +/* Adjust merge_graph->maxvar based on the number of remaining schedule rows + * that still need to be computed. In particular, if there is a node + * in a cluster where the dimension of the current band is smaller + * than merge_graph->maxvar, but the number of remaining schedule rows + * is greater than that of any node in a cluster with the maximal + * dimension for the current band (i.e., merge_graph->maxvar), + * then adjust merge_graph->maxvar to the (smallest) current band dimension + * of those clusters. Without this adjustment, the total number of + * schedule dimensions would be increased, resulting in a skewed view + * of the number of coincident dimensions. + * "c" contains information about the clusters. + * + * If the maximize_band_depth option is set and merge_graph->maxvar is reduced, + * then there is no point in attempting any merge since it will be rejected + * anyway. Set merge_graph->maxvar to zero in such cases. + */ +static isl_stat adjust_maxvar_to_slack(isl_ctx *ctx, + struct isl_sched_graph *merge_graph, struct isl_clustering *c) +{ + int max_slack, maxvar; + + max_slack = compute_maxvar_max_slack(merge_graph->maxvar, c); + if (max_slack < 0) + return isl_stat_error; + maxvar = limit_maxvar_to_slack(merge_graph->maxvar, max_slack, c); + if (maxvar < 0) + return isl_stat_error; + + if (maxvar < merge_graph->maxvar) { + if (isl_options_get_schedule_maximize_band_depth(ctx)) + merge_graph->maxvar = 0; + else + merge_graph->maxvar = maxvar; + } + + return isl_stat_ok; +} + +/* Return the number of coincident dimensions in the current band of "graph", + * where the nodes of "graph" are assumed to be scheduled by a single band. + */ +static int get_n_coincident(struct isl_sched_graph *graph) +{ + int i; + + for (i = graph->band_start; i < graph->n_total_row; ++i) + if (!graph->node[0].coincident[i]) + break; + + return i - graph->band_start; +} + +/* Should the clusters be merged based on the cluster schedule + * in the current (and only) band of "merge_graph", given that + * coincidence should be maximized? + * + * If the number of coincident schedule dimensions in the merged band + * would be less than the maximal number of coincident schedule dimensions + * in any of the merged clusters, then the clusters should not be merged. + */ +static isl_bool ok_to_merge_coincident(struct isl_clustering *c, + struct isl_sched_graph *merge_graph) +{ + int i; + int n_coincident; + int max_coincident; + + max_coincident = 0; + for (i = 0; i < c->n; ++i) { + if (!c->scc_in_merge[i]) + continue; + n_coincident = get_n_coincident(&c->scc[i]); + if (n_coincident > max_coincident) + max_coincident = n_coincident; + } + + n_coincident = get_n_coincident(merge_graph); + + return n_coincident >= max_coincident; +} + +/* Return the transformation on "node" expressed by the current (and only) + * band of "merge_graph" applied to the clusters in "c". + * + * First find the representation of "node" in its SCC in "c" and + * extract the transformation expressed by the current band. + * Then extract the transformation applied by "merge_graph" + * to the cluster to which this SCC belongs. + * Combine the two to obtain the complete transformation on the node. + * + * Note that the range of the first transformation is an anonymous space, + * while the domain of the second is named "cluster_X". The range + * of the former therefore needs to be adjusted before the two + * can be combined. + */ +static __isl_give isl_map *extract_node_transformation(isl_ctx *ctx, + struct isl_sched_node *node, struct isl_clustering *c, + struct isl_sched_graph *merge_graph) +{ + struct isl_sched_node *scc_node, *cluster_node; + int start, n; + isl_id *id; + isl_space *space; + isl_multi_aff *ma, *ma2; + + scc_node = graph_find_node(ctx, &c->scc[node->scc], node->space); + start = c->scc[node->scc].band_start; + n = c->scc[node->scc].n_total_row - start; + ma = node_extract_partial_schedule_multi_aff(scc_node, start, n); + space = cluster_space(&c->scc[node->scc], c->scc_cluster[node->scc]); + cluster_node = graph_find_node(ctx, merge_graph, space); + if (space && !cluster_node) + isl_die(ctx, isl_error_internal, "unable to find cluster", + space = isl_space_free(space)); + id = isl_space_get_tuple_id(space, isl_dim_set); + ma = isl_multi_aff_set_tuple_id(ma, isl_dim_out, id); + isl_space_free(space); + n = merge_graph->n_total_row; + ma2 = node_extract_partial_schedule_multi_aff(cluster_node, 0, n); + ma = isl_multi_aff_pullback_multi_aff(ma2, ma); + + return isl_map_from_multi_aff(ma); +} + +/* Give a set of distances "set", are they bounded by a small constant + * in direction "pos"? + * In practice, check if they are bounded by 2 by checking that there + * are no elements with a value greater than or equal to 3 or + * smaller than or equal to -3. + */ +static isl_bool distance_is_bounded(__isl_keep isl_set *set, int pos) +{ + isl_bool bounded; + isl_set *test; + + if (!set) + return isl_bool_error; + + test = isl_set_copy(set); + test = isl_set_lower_bound_si(test, isl_dim_set, pos, 3); + bounded = isl_set_is_empty(test); + isl_set_free(test); + + if (bounded < 0 || !bounded) + return bounded; + + test = isl_set_copy(set); + test = isl_set_upper_bound_si(test, isl_dim_set, pos, -3); + bounded = isl_set_is_empty(test); + isl_set_free(test); + + return bounded; +} + +/* Does the set "set" have a fixed (but possible parametric) value + * at dimension "pos"? + */ +static isl_bool has_single_value(__isl_keep isl_set *set, int pos) +{ + int n; + isl_bool single; + + if (!set) + return isl_bool_error; + set = isl_set_copy(set); + n = isl_set_dim(set, isl_dim_set); + set = isl_set_project_out(set, isl_dim_set, pos + 1, n - (pos + 1)); + set = isl_set_project_out(set, isl_dim_set, 0, pos); + single = isl_set_is_singleton(set); + isl_set_free(set); + + return single; +} + +/* Does "map" have a fixed (but possible parametric) value + * at dimension "pos" of either its domain or its range? + */ +static isl_bool has_singular_src_or_dst(__isl_keep isl_map *map, int pos) +{ + isl_set *set; + isl_bool single; + + set = isl_map_domain(isl_map_copy(map)); + single = has_single_value(set, pos); + isl_set_free(set); + + if (single < 0 || single) + return single; + + set = isl_map_range(isl_map_copy(map)); + single = has_single_value(set, pos); + isl_set_free(set); + + return single; +} + +/* Does the edge "edge" from "graph" have bounded dependence distances + * in the merged graph "merge_graph" of a selection of clusters in "c"? + * + * Extract the complete transformations of the source and destination + * nodes of the edge, apply them to the edge constraints and + * compute the differences. Finally, check if these differences are bounded + * in each direction. + * + * If the dimension of the band is greater than the number of + * dimensions that can be expected to be optimized by the edge + * (based on its weight), then also allow the differences to be unbounded + * in the remaining dimensions, but only if either the source or + * the destination has a fixed value in that direction. + * This allows a statement that produces values that are used by + * several instance of another statement to be merged with that + * other statement. + * However, merging such clusters will introduce an inherently + * large proximity distance inside the merged cluster, meaning + * that proximity distances will no longer be optimized in + * subsequent merges. These merges are therefore only allowed + * after all other possible merges have been tried. + * The first time such a merge is encountered, the weight of the edge + * is replaced by a negative weight. The second time (i.e., after + * all merges over edges with a non-negative weight have been tried), + * the merge is allowed. + */ +static isl_bool has_bounded_distances(isl_ctx *ctx, struct isl_sched_edge *edge, + struct isl_sched_graph *graph, struct isl_clustering *c, + struct isl_sched_graph *merge_graph) +{ + int i, n, n_slack; + isl_bool bounded; + isl_map *map, *t; + isl_set *dist; + + map = isl_map_copy(edge->map); + t = extract_node_transformation(ctx, edge->src, c, merge_graph); + map = isl_map_apply_domain(map, t); + t = extract_node_transformation(ctx, edge->dst, c, merge_graph); + map = isl_map_apply_range(map, t); + dist = isl_map_deltas(isl_map_copy(map)); + + bounded = isl_bool_true; + n = isl_set_dim(dist, isl_dim_set); + n_slack = n - edge->weight; + if (edge->weight < 0) + n_slack -= graph->max_weight + 1; + for (i = 0; i < n; ++i) { + isl_bool bounded_i, singular_i; + + bounded_i = distance_is_bounded(dist, i); + if (bounded_i < 0) + goto error; + if (bounded_i) + continue; + if (edge->weight >= 0) + bounded = isl_bool_false; + n_slack--; + if (n_slack < 0) + break; + singular_i = has_singular_src_or_dst(map, i); + if (singular_i < 0) + goto error; + if (singular_i) + continue; + bounded = isl_bool_false; + break; + } + if (!bounded && i >= n && edge->weight >= 0) + edge->weight -= graph->max_weight + 1; + isl_map_free(map); + isl_set_free(dist); + + return bounded; +error: + isl_map_free(map); + isl_set_free(dist); + return isl_bool_error; +} + +/* Should the clusters be merged based on the cluster schedule + * in the current (and only) band of "merge_graph"? + * "graph" is the original dependence graph, while "c" records + * which SCCs are involved in the latest merge. + * + * In particular, is there at least one proximity constraint + * that is optimized by the merge? + * + * A proximity constraint is considered to be optimized + * if the dependence distances are small. + */ +static isl_bool ok_to_merge_proximity(isl_ctx *ctx, + struct isl_sched_graph *graph, struct isl_clustering *c, + struct isl_sched_graph *merge_graph) +{ + int i; + + for (i = 0; i < graph->n_edge; ++i) { + struct isl_sched_edge *edge = &graph->edge[i]; + isl_bool bounded; + + if (!is_proximity(edge)) + continue; + if (!c->scc_in_merge[edge->src->scc]) + continue; + if (!c->scc_in_merge[edge->dst->scc]) + continue; + if (c->scc_cluster[edge->dst->scc] == + c->scc_cluster[edge->src->scc]) + continue; + bounded = has_bounded_distances(ctx, edge, graph, c, + merge_graph); + if (bounded < 0 || bounded) + return bounded; + } + + return isl_bool_false; +} + +/* Should the clusters be merged based on the cluster schedule + * in the current (and only) band of "merge_graph"? + * "graph" is the original dependence graph, while "c" records + * which SCCs are involved in the latest merge. + * + * If the current band is empty, then the clusters should not be merged. + * + * If the band depth should be maximized and the merge schedule + * is incomplete (meaning that the dimension of some of the schedule + * bands in the original schedule will be reduced), then the clusters + * should not be merged. + * + * If the schedule_maximize_coincidence option is set, then check that + * the number of coincident schedule dimensions is not reduced. + * + * Finally, only allow the merge if at least one proximity + * constraint is optimized. + */ +static isl_bool ok_to_merge(isl_ctx *ctx, struct isl_sched_graph *graph, + struct isl_clustering *c, struct isl_sched_graph *merge_graph) +{ + if (merge_graph->n_total_row == merge_graph->band_start) + return isl_bool_false; + + if (isl_options_get_schedule_maximize_band_depth(ctx) && + merge_graph->n_total_row < merge_graph->maxvar) + return isl_bool_false; + + if (isl_options_get_schedule_maximize_coincidence(ctx)) { + isl_bool ok; + + ok = ok_to_merge_coincident(c, merge_graph); + if (ok < 0 || !ok) + return ok; + } + + return ok_to_merge_proximity(ctx, graph, c, merge_graph); +} + +/* Apply the schedule in "t_node" to the "n" rows starting at "first" + * of the schedule in "node" and return the result. + * + * That is, essentially compute + * + * T * N(first:first+n-1) + * + * taking into account the constant term and the parameter coefficients + * in "t_node". + */ +static __isl_give isl_mat *node_transformation(isl_ctx *ctx, + struct isl_sched_node *t_node, struct isl_sched_node *node, + int first, int n) +{ + int i, j; + isl_mat *t; + int n_row, n_col, n_param, n_var; + + n_param = node->nparam; + n_var = node->nvar; + n_row = isl_mat_rows(t_node->sched); + n_col = isl_mat_cols(node->sched); + t = isl_mat_alloc(ctx, n_row, n_col); + if (!t) + return NULL; + for (i = 0; i < n_row; ++i) { + isl_seq_cpy(t->row[i], t_node->sched->row[i], 1 + n_param); + isl_seq_clr(t->row[i] + 1 + n_param, n_var); + for (j = 0; j < n; ++j) + isl_seq_addmul(t->row[i], + t_node->sched->row[i][1 + n_param + j], + node->sched->row[first + j], + 1 + n_param + n_var); + } + return t; +} + +/* Apply the cluster schedule in "t_node" to the current band + * schedule of the nodes in "graph". + * + * In particular, replace the rows starting at band_start + * by the result of applying the cluster schedule in "t_node" + * to the original rows. + * + * The coincidence of the schedule is determined by the coincidence + * of the cluster schedule. + */ +static isl_stat transform(isl_ctx *ctx, struct isl_sched_graph *graph, + struct isl_sched_node *t_node) +{ + int i, j; + int n_new; + int start, n; + + start = graph->band_start; + n = graph->n_total_row - start; + + n_new = isl_mat_rows(t_node->sched); + for (i = 0; i < graph->n; ++i) { + struct isl_sched_node *node = &graph->node[i]; + isl_mat *t; + + t = node_transformation(ctx, t_node, node, start, n); + node->sched = isl_mat_drop_rows(node->sched, start, n); + node->sched = isl_mat_concat(node->sched, t); + node->sched_map = isl_map_free(node->sched_map); + if (!node->sched) + return isl_stat_error; + for (j = 0; j < n_new; ++j) + node->coincident[start + j] = t_node->coincident[j]; + } + graph->n_total_row -= n; + graph->n_row -= n; + graph->n_total_row += n_new; + graph->n_row += n_new; + + + return isl_stat_ok; +} + +/* Merge the clusters marked for merging in "c" into a single + * cluster using the cluster schedule in the current band of "merge_graph". + * The representative SCC for the new cluster is the SCC with + * the smallest index. + * + * The current band schedule of each SCC in the new cluster is obtained + * by applying the schedule of the corresponding original cluster + * to the original band schedule. + * All SCCs in the new cluster have the same number of schedule rows. + */ +static isl_stat merge(isl_ctx *ctx, struct isl_clustering *c, + struct isl_sched_graph *merge_graph) +{ + int i; + int cluster = -1; + isl_space *space; + + for (i = 0; i < c->n; ++i) { + struct isl_sched_node *node; + + if (!c->scc_in_merge[i]) + continue; + if (cluster < 0) + cluster = i; + space = cluster_space(&c->scc[i], c->scc_cluster[i]); + if (!space) + return isl_stat_error; + node = graph_find_node(ctx, merge_graph, space); + isl_space_free(space); + if (!node) + isl_die(ctx, isl_error_internal, + "unable to find cluster", + return isl_stat_error); + if (transform(ctx, &c->scc[i], node) < 0) + return isl_stat_error; + c->scc_cluster[i] = cluster; + } + + return isl_stat_ok; +} + +/* Try and merge the clusters of SCCs marked in c->scc_in_merge + * by scheduling the current cluster bands with respect to each other. + * + * Construct a dependence graph with a space for each cluster and + * with the coordinates of each space corresponding to the schedule + * dimensions of the current band of that cluster. + * Construct a cluster schedule in this cluster dependence graph and + * apply it to the current cluster bands if it is applicable + * according to ok_to_merge. + * + * If the number of remaining schedule dimensions in a cluster + * with a non-maximal current schedule dimension is greater than + * the number of remaining schedule dimensions in clusters + * with a maximal current schedule dimension, then restrict + * the number of rows to be computed in the cluster schedule + * to the minimal such non-maximal current schedule dimension. + * Do this by adjusting merge_graph.maxvar. + * + * Return isl_bool_true if the clusters have effectively been merged + * into a single cluster. + * + * Note that since the standard scheduling algorithm minimizes the maximal + * distance over proximity constraints, the proximity constraints between + * the merged clusters may not be optimized any further than what is + * sufficient to bring the distances within the limits of the internal + * proximity constraints inside the individual clusters. + * It may therefore make sense to perform an additional translation step + * to bring the clusters closer to each other, while maintaining + * the linear part of the merging schedule found using the standard + * scheduling algorithm. + */ +static isl_bool try_merge(isl_ctx *ctx, struct isl_sched_graph *graph, + struct isl_clustering *c) +{ + struct isl_sched_graph merge_graph = { 0 }; + isl_bool merged; + + if (init_merge_graph(ctx, graph, c, &merge_graph) < 0) + goto error; + + if (compute_maxvar(&merge_graph) < 0) + goto error; + if (adjust_maxvar_to_slack(ctx, &merge_graph,c) < 0) + goto error; + if (compute_schedule_wcc_band(ctx, &merge_graph) < 0) + goto error; + merged = ok_to_merge(ctx, graph, c, &merge_graph); + if (merged && merge(ctx, c, &merge_graph) < 0) + goto error; + + graph_free(ctx, &merge_graph); + return merged; +error: + graph_free(ctx, &merge_graph); + return isl_bool_error; +} + +/* Is there any edge marked "no_merge" between two SCCs that are + * about to be merged (i.e., that are set in "scc_in_merge")? + * "merge_edge" is the proximity edge along which the clusters of SCCs + * are going to be merged. + * + * If there is any edge between two SCCs with a negative weight, + * while the weight of "merge_edge" is non-negative, then this + * means that the edge was postponed. "merge_edge" should then + * also be postponed since merging along the edge with negative weight should + * be postponed until all edges with non-negative weight have been tried. + * Replace the weight of "merge_edge" by a negative weight as well and + * tell the caller not to attempt a merge. + */ +static int any_no_merge(struct isl_sched_graph *graph, int *scc_in_merge, + struct isl_sched_edge *merge_edge) +{ + int i; + + for (i = 0; i < graph->n_edge; ++i) { + struct isl_sched_edge *edge = &graph->edge[i]; + + if (!scc_in_merge[edge->src->scc]) + continue; + if (!scc_in_merge[edge->dst->scc]) + continue; + if (edge->no_merge) + return 1; + if (merge_edge->weight >= 0 && edge->weight < 0) { + merge_edge->weight -= graph->max_weight + 1; + return 1; + } + } + + return 0; +} + +/* Merge the two clusters in "c" connected by the edge in "graph" + * with index "edge" into a single cluster. + * If it turns out to be impossible to merge these two clusters, + * then mark the edge as "no_merge" such that it will not be + * considered again. + * + * First mark all SCCs that need to be merged. This includes the SCCs + * in the two clusters, but it may also include the SCCs + * of intermediate clusters. + * If there is already a no_merge edge between any pair of such SCCs, + * then simply mark the current edge as no_merge as well. + * Likewise, if any of those edges was postponed by has_bounded_distances, + * then postpone the current edge as well. + * Otherwise, try and merge the clusters and mark "edge" as "no_merge" + * if the clusters did not end up getting merged, unless the non-merge + * is due to the fact that the edge was postponed. This postponement + * can be recognized by a change in weight (from non-negative to negative). + */ +static isl_stat merge_clusters_along_edge(isl_ctx *ctx, + struct isl_sched_graph *graph, int edge, struct isl_clustering *c) +{ + isl_bool merged; + int edge_weight = graph->edge[edge].weight; + + if (mark_merge_sccs(ctx, graph, edge, c) < 0) + return isl_stat_error; + + if (any_no_merge(graph, c->scc_in_merge, &graph->edge[edge])) + merged = isl_bool_false; + else + merged = try_merge(ctx, graph, c); + if (merged < 0) + return isl_stat_error; + if (!merged && edge_weight == graph->edge[edge].weight) + graph->edge[edge].no_merge = 1; + + return isl_stat_ok; +} + +/* Does "node" belong to the cluster identified by "cluster"? + */ +static int node_cluster_exactly(struct isl_sched_node *node, int cluster) +{ + return node->cluster == cluster; +} + +/* Does "edge" connect two nodes belonging to the cluster + * identified by "cluster"? + */ +static int edge_cluster_exactly(struct isl_sched_edge *edge, int cluster) +{ + return edge->src->cluster == cluster && edge->dst->cluster == cluster; +} + +/* Swap the schedule of "node1" and "node2". + * Both nodes have been derived from the same node in a common parent graph. + * Since the "coincident" field is shared with that node + * in the parent graph, there is no need to also swap this field. + */ +static void swap_sched(struct isl_sched_node *node1, + struct isl_sched_node *node2) +{ + isl_mat *sched; + isl_map *sched_map; + + sched = node1->sched; + node1->sched = node2->sched; + node2->sched = sched; + + sched_map = node1->sched_map; + node1->sched_map = node2->sched_map; + node2->sched_map = sched_map; +} + +/* Copy the current band schedule from the SCCs that form the cluster + * with index "pos" to the actual cluster at position "pos". + * By construction, the index of the first SCC that belongs to the cluster + * is also "pos". + * + * The order of the nodes inside both the SCCs and the cluster + * is assumed to be same as the order in the original "graph". + * + * Since the SCC graphs will no longer be used after this function, + * the schedules are actually swapped rather than copied. + */ +static isl_stat copy_partial(struct isl_sched_graph *graph, + struct isl_clustering *c, int pos) +{ + int i, j; + + c->cluster[pos].n_total_row = c->scc[pos].n_total_row; + c->cluster[pos].n_row = c->scc[pos].n_row; + c->cluster[pos].maxvar = c->scc[pos].maxvar; + j = 0; + for (i = 0; i < graph->n; ++i) { + int k; + int s; + + if (graph->node[i].cluster != pos) + continue; + s = graph->node[i].scc; + k = c->scc_node[s]++; + swap_sched(&c->cluster[pos].node[j], &c->scc[s].node[k]); + if (c->scc[s].maxvar > c->cluster[pos].maxvar) + c->cluster[pos].maxvar = c->scc[s].maxvar; + ++j; + } + + return isl_stat_ok; +} + +/* Is there a (conditional) validity dependence from node[j] to node[i], + * forcing node[i] to follow node[j] or do the nodes belong to the same + * cluster? + */ +static isl_bool node_follows_strong_or_same_cluster(int i, int j, void *user) +{ + struct isl_sched_graph *graph = user; + + if (graph->node[i].cluster == graph->node[j].cluster) + return isl_bool_true; + return graph_has_validity_edge(graph, &graph->node[j], &graph->node[i]); +} + +/* Extract the merged clusters of SCCs in "graph", sort them, and + * store them in c->clusters. Update c->scc_cluster accordingly. + * + * First keep track of the cluster containing the SCC to which a node + * belongs in the node itself. + * Then extract the clusters into c->clusters, copying the current + * band schedule from the SCCs that belong to the cluster. + * Do this only once per cluster. + * + * Finally, topologically sort the clusters and update c->scc_cluster + * to match the new scc numbering. While the SCCs were originally + * sorted already, some SCCs that depend on some other SCCs may + * have been merged with SCCs that appear before these other SCCs. + * A reordering may therefore be required. + */ +static isl_stat extract_clusters(isl_ctx *ctx, struct isl_sched_graph *graph, + struct isl_clustering *c) +{ + int i; + + for (i = 0; i < graph->n; ++i) + graph->node[i].cluster = c->scc_cluster[graph->node[i].scc]; + + for (i = 0; i < graph->scc; ++i) { + if (c->scc_cluster[i] != i) + continue; + if (extract_sub_graph(ctx, graph, &node_cluster_exactly, + &edge_cluster_exactly, i, &c->cluster[i]) < 0) + return isl_stat_error; + c->cluster[i].src_scc = -1; + c->cluster[i].dst_scc = -1; + if (copy_partial(graph, c, i) < 0) + return isl_stat_error; + } + + if (detect_ccs(ctx, graph, &node_follows_strong_or_same_cluster) < 0) + return isl_stat_error; + for (i = 0; i < graph->n; ++i) + c->scc_cluster[graph->node[i].scc] = graph->node[i].cluster; + + return isl_stat_ok; +} + +/* Compute weights on the proximity edges of "graph" that can + * be used by find_proximity to find the most appropriate + * proximity edge to use to merge two clusters in "c". + * The weights are also used by has_bounded_distances to determine + * whether the merge should be allowed. + * Store the maximum of the computed weights in graph->max_weight. + * + * The computed weight is a measure for the number of remaining schedule + * dimensions that can still be completely aligned. + * In particular, compute the number of equalities between + * input dimensions and output dimensions in the proximity constraints. + * The directions that are already handled by outer schedule bands + * are projected out prior to determining this number. + * + * Edges that will never be considered by find_proximity are ignored. + */ +static isl_stat compute_weights(struct isl_sched_graph *graph, + struct isl_clustering *c) +{ + int i; + + graph->max_weight = 0; + + for (i = 0; i < graph->n_edge; ++i) { + struct isl_sched_edge *edge = &graph->edge[i]; + struct isl_sched_node *src = edge->src; + struct isl_sched_node *dst = edge->dst; + isl_basic_map *hull; + int n_in, n_out; + + if (!is_proximity(edge)) + continue; + if (bad_cluster(&c->scc[edge->src->scc]) || + bad_cluster(&c->scc[edge->dst->scc])) + continue; + if (c->scc_cluster[edge->dst->scc] == + c->scc_cluster[edge->src->scc]) + continue; + + hull = isl_map_affine_hull(isl_map_copy(edge->map)); + hull = isl_basic_map_transform_dims(hull, isl_dim_in, 0, + isl_mat_copy(src->ctrans)); + hull = isl_basic_map_transform_dims(hull, isl_dim_out, 0, + isl_mat_copy(dst->ctrans)); + hull = isl_basic_map_project_out(hull, + isl_dim_in, 0, src->rank); + hull = isl_basic_map_project_out(hull, + isl_dim_out, 0, dst->rank); + hull = isl_basic_map_remove_divs(hull); + n_in = isl_basic_map_dim(hull, isl_dim_in); + n_out = isl_basic_map_dim(hull, isl_dim_out); + hull = isl_basic_map_drop_constraints_not_involving_dims(hull, + isl_dim_in, 0, n_in); + hull = isl_basic_map_drop_constraints_not_involving_dims(hull, + isl_dim_out, 0, n_out); + if (!hull) + return isl_stat_error; + edge->weight = hull->n_eq; + isl_basic_map_free(hull); + + if (edge->weight > graph->max_weight) + graph->max_weight = edge->weight; + } + + return isl_stat_ok; +} + +/* Call compute_schedule_finish_band on each of the clusters in "c" + * in their topological order. This order is determined by the scc + * fields of the nodes in "graph". + * Combine the results in a sequence expressing the topological order. + * + * If there is only one cluster left, then there is no need to introduce + * a sequence node. Also, in this case, the cluster necessarily contains + * the SCC at position 0 in the original graph and is therefore also + * stored in the first cluster of "c". + */ +static __isl_give isl_schedule_node *finish_bands_clustering( + __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, + struct isl_clustering *c) +{ + int i; + isl_ctx *ctx; + isl_union_set_list *filters; + + if (graph->scc == 1) + return compute_schedule_finish_band(node, &c->cluster[0], 0); + + ctx = isl_schedule_node_get_ctx(node); + + filters = extract_sccs(ctx, graph); + node = isl_schedule_node_insert_sequence(node, filters); + + for (i = 0; i < graph->scc; ++i) { + int j = c->scc_cluster[i]; + node = isl_schedule_node_child(node, i); + node = isl_schedule_node_child(node, 0); + node = compute_schedule_finish_band(node, &c->cluster[j], 0); + node = isl_schedule_node_parent(node); node = isl_schedule_node_parent(node); + } + + return node; +} + +/* Compute a schedule for a connected dependence graph by first considering + * each strongly connected component (SCC) in the graph separately and then + * incrementally combining them into clusters. + * Return the updated schedule node. + * + * Initially, each cluster consists of a single SCC, each with its + * own band schedule. The algorithm then tries to merge pairs + * of clusters along a proximity edge until no more suitable + * proximity edges can be found. During this merging, the schedule + * is maintained in the individual SCCs. + * After the merging is completed, the full resulting clusters + * are extracted and in finish_bands_clustering, + * compute_schedule_finish_band is called on each of them to integrate + * the band into "node" and to continue the computation. + * + * compute_weights initializes the weights that are used by find_proximity. + */ +static __isl_give isl_schedule_node *compute_schedule_wcc_clustering( + __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) +{ + isl_ctx *ctx; + struct isl_clustering c; + int i; + + ctx = isl_schedule_node_get_ctx(node); + + if (clustering_init(ctx, &c, graph) < 0) + goto error; + + if (compute_weights(graph, &c) < 0) + goto error; + + for (;;) { + i = find_proximity(graph, &c); + if (i < 0) + goto error; + if (i >= graph->n_edge) + break; + if (merge_clusters_along_edge(ctx, graph, i, &c) < 0) + goto error; + } + if (extract_clusters(ctx, graph, &c) < 0) + goto error; + + node = finish_bands_clustering(node, graph, &c); + + clustering_free(ctx, &c); return node; +error: + clustering_free(ctx, &c); + return isl_schedule_node_free(node); +} + +/* Compute a schedule for a connected dependence graph and return + * the updated schedule node. + * + * If Feautrier's algorithm is selected, we first recursively try to satisfy + * as many validity dependences as possible. When all validity dependences + * are satisfied we extend the schedule to a full-dimensional schedule. + * + * Call compute_schedule_wcc_whole or compute_schedule_wcc_clustering + * depending on whether the user has selected the option to try and + * compute a schedule for the entire (weakly connected) component first. + * If there is only a single strongly connected component (SCC), then + * there is no point in trying to combine SCCs + * in compute_schedule_wcc_clustering, so compute_schedule_wcc_whole + * is called instead. + */ +static __isl_give isl_schedule_node *compute_schedule_wcc( + __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) +{ + isl_ctx *ctx; + + if (!node) + return NULL; + + ctx = isl_schedule_node_get_ctx(node); + if (detect_sccs(ctx, graph) < 0) + return isl_schedule_node_free(node); + + if (compute_maxvar(graph) < 0) + return isl_schedule_node_free(node); + + if (need_feautrier_step(ctx, graph)) + return compute_schedule_wcc_feautrier(node, graph); + + if (graph->scc <= 1 || isl_options_get_schedule_whole_component(ctx)) + return compute_schedule_wcc_whole(node, graph); + else + return compute_schedule_wcc_clustering(node, graph); } /* Compute a schedule for each group of nodes identified by node->scc diff --git a/polly/lib/External/isl/isl_tarjan.c b/polly/lib/External/isl/isl_tarjan.c index 414a2e09cc2..a958c016e0b 100644 --- a/polly/lib/External/isl/isl_tarjan.c +++ b/polly/lib/External/isl/isl_tarjan.c @@ -14,14 +14,15 @@ #include <isl/ctx.h> #include <isl_tarjan.h> -void isl_tarjan_graph_free(struct isl_tarjan_graph *g) +struct isl_tarjan_graph *isl_tarjan_graph_free(struct isl_tarjan_graph *g) { if (!g) - return; + return NULL; free(g->node); free(g->stack); free(g->order); free(g); + return NULL; } static struct isl_tarjan_graph *isl_tarjan_graph_alloc(isl_ctx *ctx, int len) @@ -128,11 +129,31 @@ struct isl_tarjan_graph *isl_tarjan_graph_init(isl_ctx *ctx, int len, if (g->node[i].index >= 0) continue; if (isl_tarjan_components(g, i, follows, user) < 0) - goto error; + return isl_tarjan_graph_free(g); } return g; -error: - isl_tarjan_graph_free(g); - return NULL; +} + +/* Decompose the graph with "len" nodes and edges defined by "follows" + * into the strongly connected component (SCC) that contains "node" + * as well as all SCCs that are followed by this SCC. + * follows(i, j, user) should return 1 if "i" follows "j" and 0 otherwise. + * It should return -1 on error. + * + * The SCC containing "node" will appear as the last component + * in g->order. + */ +struct isl_tarjan_graph *isl_tarjan_graph_component(isl_ctx *ctx, int len, + int node, isl_bool (*follows)(int i, int j, void *user), void *user) +{ + struct isl_tarjan_graph *g; + + g = isl_tarjan_graph_alloc(ctx, len); + if (!g) + return NULL; + if (isl_tarjan_components(g, node, follows, user) < 0) + return isl_tarjan_graph_free(g); + + return g; } diff --git a/polly/lib/External/isl/isl_tarjan.h b/polly/lib/External/isl/isl_tarjan.h index 1f8f02e1fb4..af3452cc867 100644 --- a/polly/lib/External/isl/isl_tarjan.h +++ b/polly/lib/External/isl/isl_tarjan.h @@ -35,6 +35,8 @@ struct isl_tarjan_graph { struct isl_tarjan_graph *isl_tarjan_graph_init(isl_ctx *ctx, int len, isl_bool (*follows)(int i, int j, void *user), void *user); -void isl_tarjan_graph_free(struct isl_tarjan_graph *g); +struct isl_tarjan_graph *isl_tarjan_graph_component(isl_ctx *ctx, int len, + int node, isl_bool (*follows)(int i, int j, void *user), void *user); +struct isl_tarjan_graph *isl_tarjan_graph_free(struct isl_tarjan_graph *g); #endif diff --git a/polly/lib/External/isl/isl_test.c b/polly/lib/External/isl/isl_test.c index a7d6c9ba42f..e791fa27ac0 100644 --- a/polly/lib/External/isl/isl_test.c +++ b/polly/lib/External/isl/isl_test.c @@ -2896,6 +2896,7 @@ struct { "4e0 >= 58 + i0 - i1 and i0 >= 2 and i0 <= 511 and " "4e0 >= -61 + i0 + i1)) or " "(i1 <= 66 - i0 and i0 >= 2 and i1 >= 59 + i0) }", 1 }, + { "[a, b] -> { : a = 0 and b = -1 }", "[b, a] -> { : b >= -10 }", 1 }, }; static int test_subset(isl_ctx *ctx) @@ -3621,6 +3622,7 @@ static int test_bounded_coefficients_schedule(isl_ctx *ctx) int test_schedule(isl_ctx *ctx) { const char *D, *W, *R, *V, *P, *S; + int max_coincidence; /* Handle resulting schedule with zero bands. */ if (test_one_schedule(ctx, "{[]}", "{}", "{}", "{[] -> []}", 0, 0) < 0) @@ -3710,8 +3712,11 @@ int test_schedule(isl_ctx *ctx) "S4[i] -> a[i,N] }"; S = "{ S1[i] -> [0,i,0]; S2[i] -> [1,i,0]; S3[i,j] -> [2,i,j]; " "S4[i] -> [4,i,0] }"; + max_coincidence = isl_options_get_schedule_maximize_coincidence(ctx); + isl_options_set_schedule_maximize_coincidence(ctx, 0); if (test_one_schedule(ctx, D, W, R, S, 2, 0) < 0) return -1; + isl_options_set_schedule_maximize_coincidence(ctx, max_coincidence); D = "[N] -> { S_0[i, j] : i >= 1 and i <= N and j >= 1 and j <= N }"; W = "[N] -> { S_0[i, j] -> s[0] : i >= 1 and i <= N and j >= 1 and " @@ -3922,6 +3927,36 @@ int test_schedule(isl_ctx *ctx) return 0; } +/* Perform scheduling tests using the whole component scheduler. + */ +static int test_schedule_whole(isl_ctx *ctx) +{ + int whole; + int r; + + whole = isl_options_get_schedule_whole_component(ctx); + isl_options_set_schedule_whole_component(ctx, 1); + r = test_schedule(ctx); + isl_options_set_schedule_whole_component(ctx, whole); + + return r; +} + +/* Perform scheduling tests using the incremental scheduler. + */ +static int test_schedule_incremental(isl_ctx *ctx) +{ + int whole; + int r; + + whole = isl_options_get_schedule_whole_component(ctx); + isl_options_set_schedule_whole_component(ctx, 0); + r = test_schedule(ctx); + isl_options_set_schedule_whole_component(ctx, whole); + + return r; +} + int test_plain_injective(isl_ctx *ctx, const char *str, int injective) { isl_union_map *umap; @@ -6248,7 +6283,8 @@ struct { { "dim_max", &test_dim_max }, { "affine", &test_aff }, { "injective", &test_injective }, - { "schedule", &test_schedule }, + { "schedule (whole component)", &test_schedule_whole }, + { "schedule (incremental)", &test_schedule_incremental }, { "schedule tree grouping", &test_schedule_tree_group }, { "tile", &test_tile }, { "union_pw", &test_union_pw }, diff --git a/polly/lib/External/isl/ltmain.sh b/polly/lib/External/isl/ltmain.sh index bffda54187a..c29db3631ea 100644 --- a/polly/lib/External/isl/ltmain.sh +++ b/polly/lib/External/isl/ltmain.sh @@ -70,7 +70,7 @@ # compiler: $LTCC # compiler flags: $LTCFLAGS # linker: $LD (gnu? $with_gnu_ld) -# $progname: (GNU libtool) 2.4.2 Debian-2.4.2-1.11 +# $progname: (GNU libtool) 2.4.2 Debian-2.4.2-1.10ubuntu1 # automake: $automake_version # autoconf: $autoconf_version # @@ -80,7 +80,7 @@ PROGRAM=libtool PACKAGE=libtool -VERSION="2.4.2 Debian-2.4.2-1.11" +VERSION="2.4.2 Debian-2.4.2-1.10ubuntu1" TIMESTAMP="" package_revision=1.3337 |