summaryrefslogtreecommitdiffstats
path: root/polly/lib/External/ppcg/cpu.c
diff options
context:
space:
mode:
Diffstat (limited to 'polly/lib/External/ppcg/cpu.c')
-rw-r--r--polly/lib/External/ppcg/cpu.c552
1 files changed, 552 insertions, 0 deletions
diff --git a/polly/lib/External/ppcg/cpu.c b/polly/lib/External/ppcg/cpu.c
new file mode 100644
index 00000000000..c9ce02837bc
--- /dev/null
+++ b/polly/lib/External/ppcg/cpu.c
@@ -0,0 +1,552 @@
+/*
+ * Copyright 2012 INRIA Paris-Rocquencourt
+ *
+ * Use of this software is governed by the MIT license
+ *
+ * Written by Tobias Grosser, INRIA Paris-Rocquencourt,
+ * Domaine de Voluceau, Rocquenqourt, B.P. 105,
+ * 78153 Le Chesnay Cedex France
+ */
+
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <isl/aff.h>
+#include <isl/ctx.h>
+#include <isl/map.h>
+#include <isl/ast_build.h>
+#include <pet.h>
+
+#include "ppcg.h"
+#include "ppcg_options.h"
+#include "cpu.h"
+#include "print.h"
+
+/* Representation of a statement inside a generated AST.
+ *
+ * "stmt" refers to the original statement.
+ * "ref2expr" maps the reference identifier of each access in
+ * the statement to an AST expression that should be printed
+ * at the place of the access.
+ */
+struct ppcg_stmt {
+ struct pet_stmt *stmt;
+
+ isl_id_to_ast_expr *ref2expr;
+};
+
+static void ppcg_stmt_free(void *user)
+{
+ struct ppcg_stmt *stmt = user;
+ int i;
+
+ if (!stmt)
+ return;
+
+ isl_id_to_ast_expr_free(stmt->ref2expr);
+
+ free(stmt);
+}
+
+/* Derive the output file name from the input file name.
+ * 'input' is the entire path of the input file. The output
+ * is the file name plus the additional extension.
+ *
+ * We will basically replace everything after the last point
+ * with '.ppcg.c'. This means file.c becomes file.ppcg.c
+ */
+static FILE *get_output_file(const char *input, const char *output)
+{
+ char name[PATH_MAX];
+ const char *ext;
+ const char ppcg_marker[] = ".ppcg";
+ int len;
+ FILE *file;
+
+ len = ppcg_extract_base_name(name, input);
+
+ strcpy(name + len, ppcg_marker);
+ ext = strrchr(input, '.');
+ strcpy(name + len + sizeof(ppcg_marker) - 1, ext ? ext : ".c");
+
+ if (!output)
+ output = name;
+
+ file = fopen(output, "w");
+ if (!file) {
+ fprintf(stderr, "Unable to open '%s' for writing\n", output);
+ return NULL;
+ }
+
+ return file;
+}
+
+/* Data used to annotate for nodes in the ast.
+ */
+struct ast_node_userinfo {
+ /* The for node is an openmp parallel for node. */
+ int is_openmp;
+};
+
+/* Information used while building the ast.
+ */
+struct ast_build_userinfo {
+ /* The current ppcg scop. */
+ struct ppcg_scop *scop;
+
+ /* Are we currently in a parallel for loop? */
+ int in_parallel_for;
+};
+
+/* Check if the current scheduling dimension is parallel.
+ *
+ * We check for parallelism by verifying that the loop does not carry any
+ * dependences.
+ * If the live_range_reordering option is set, then this currently
+ * includes the order dependences. In principle, non-zero order dependences
+ * could be allowed, but this would require privatization and/or expansion.
+ *
+ * Parallelism test: if the distance is zero in all outer dimensions, then it
+ * has to be zero in the current dimension as well.
+ * Implementation: first, translate dependences into time space, then force
+ * outer dimensions to be equal. If the distance is zero in the current
+ * dimension, then the loop is parallel.
+ * The distance is zero in the current dimension if it is a subset of a map
+ * with equal values for the current dimension.
+ */
+static int ast_schedule_dim_is_parallel(__isl_keep isl_ast_build *build,
+ struct ppcg_scop *scop)
+{
+ isl_union_map *schedule_node, *schedule, *deps;
+ isl_map *schedule_deps, *test;
+ isl_space *schedule_space;
+ unsigned i, dimension, is_parallel;
+
+ schedule = isl_ast_build_get_schedule(build);
+ schedule_space = isl_ast_build_get_schedule_space(build);
+
+ dimension = isl_space_dim(schedule_space, isl_dim_out) - 1;
+
+ deps = isl_union_map_copy(scop->dep_flow);
+ deps = isl_union_map_union(deps, isl_union_map_copy(scop->dep_false));
+ if (scop->options->live_range_reordering) {
+ isl_union_map *order = isl_union_map_copy(scop->dep_order);
+ deps = isl_union_map_union(deps, order);
+ }
+ deps = isl_union_map_apply_range(deps, isl_union_map_copy(schedule));
+ deps = isl_union_map_apply_domain(deps, schedule);
+
+ if (isl_union_map_is_empty(deps)) {
+ isl_union_map_free(deps);
+ isl_space_free(schedule_space);
+ return 1;
+ }
+
+ schedule_deps = isl_map_from_union_map(deps);
+
+ for (i = 0; i < dimension; i++)
+ schedule_deps = isl_map_equate(schedule_deps, isl_dim_out, i,
+ isl_dim_in, i);
+
+ test = isl_map_universe(isl_map_get_space(schedule_deps));
+ test = isl_map_equate(test, isl_dim_out, dimension, isl_dim_in,
+ dimension);
+ is_parallel = isl_map_is_subset(schedule_deps, test);
+
+ isl_space_free(schedule_space);
+ isl_map_free(test);
+ isl_map_free(schedule_deps);
+
+ return is_parallel;
+}
+
+/* Mark a for node openmp parallel, if it is the outermost parallel for node.
+ */
+static void mark_openmp_parallel(__isl_keep isl_ast_build *build,
+ struct ast_build_userinfo *build_info,
+ struct ast_node_userinfo *node_info)
+{
+ if (build_info->in_parallel_for)
+ return;
+
+ if (ast_schedule_dim_is_parallel(build, build_info->scop)) {
+ build_info->in_parallel_for = 1;
+ node_info->is_openmp = 1;
+ }
+}
+
+/* Allocate an ast_node_info structure and initialize it with default values.
+ */
+static struct ast_node_userinfo *allocate_ast_node_userinfo()
+{
+ struct ast_node_userinfo *node_info;
+ node_info = (struct ast_node_userinfo *)
+ malloc(sizeof(struct ast_node_userinfo));
+ node_info->is_openmp = 0;
+ return node_info;
+}
+
+/* Free an ast_node_info structure.
+ */
+static void free_ast_node_userinfo(void *ptr)
+{
+ struct ast_node_userinfo *info;
+ info = (struct ast_node_userinfo *) ptr;
+ free(info);
+}
+
+/* This method is executed before the construction of a for node. It creates
+ * an isl_id that is used to annotate the subsequently generated ast for nodes.
+ *
+ * In this function we also run the following analyses:
+ *
+ * - Detection of openmp parallel loops
+ */
+static __isl_give isl_id *ast_build_before_for(
+ __isl_keep isl_ast_build *build, void *user)
+{
+ isl_id *id;
+ struct ast_build_userinfo *build_info;
+ struct ast_node_userinfo *node_info;
+
+ build_info = (struct ast_build_userinfo *) user;
+ node_info = allocate_ast_node_userinfo();
+ id = isl_id_alloc(isl_ast_build_get_ctx(build), "", node_info);
+ id = isl_id_set_free_user(id, free_ast_node_userinfo);
+
+ mark_openmp_parallel(build, build_info, node_info);
+
+ return id;
+}
+
+/* This method is executed after the construction of a for node.
+ *
+ * It performs the following actions:
+ *
+ * - Reset the 'in_parallel_for' flag, as soon as we leave a for node,
+ * that is marked as openmp parallel.
+ *
+ */
+static __isl_give isl_ast_node *ast_build_after_for(__isl_take isl_ast_node *node,
+ __isl_keep isl_ast_build *build, void *user) {
+ isl_id *id;
+ struct ast_build_userinfo *build_info;
+ struct ast_node_userinfo *info;
+
+ id = isl_ast_node_get_annotation(node);
+ info = isl_id_get_user(id);
+
+ if (info && info->is_openmp) {
+ build_info = (struct ast_build_userinfo *) user;
+ build_info->in_parallel_for = 0;
+ }
+
+ isl_id_free(id);
+
+ return node;
+}
+
+/* Find the element in scop->stmts that has the given "id".
+ */
+static struct pet_stmt *find_stmt(struct ppcg_scop *scop, __isl_keep isl_id *id)
+{
+ int i;
+
+ for (i = 0; i < scop->pet->n_stmt; ++i) {
+ struct pet_stmt *stmt = scop->pet->stmts[i];
+ isl_id *id_i;
+
+ id_i = isl_set_get_tuple_id(stmt->domain);
+ isl_id_free(id_i);
+
+ if (id_i == id)
+ return stmt;
+ }
+
+ isl_die(isl_id_get_ctx(id), isl_error_internal,
+ "statement not found", return NULL);
+}
+
+/* Print a user statement in the generated AST.
+ * The ppcg_stmt has been attached to the node in at_each_domain.
+ */
+static __isl_give isl_printer *print_user(__isl_take isl_printer *p,
+ __isl_take isl_ast_print_options *print_options,
+ __isl_keep isl_ast_node *node, void *user)
+{
+ struct ppcg_stmt *stmt;
+ isl_id *id;
+
+ id = isl_ast_node_get_annotation(node);
+ stmt = isl_id_get_user(id);
+ isl_id_free(id);
+
+ p = pet_stmt_print_body(stmt->stmt, p, stmt->ref2expr);
+
+ isl_ast_print_options_free(print_options);
+
+ return p;
+}
+
+
+/* Print a for loop node as an openmp parallel loop.
+ *
+ * To print an openmp parallel loop we print a normal for loop, but add
+ * "#pragma openmp parallel for" in front.
+ *
+ * Variables that are declared within the body of this for loop are
+ * automatically openmp 'private'. Iterators declared outside of the
+ * for loop are automatically openmp 'shared'. As ppcg declares all iterators
+ * at the position where they are assigned, there is no need to explicitly mark
+ * variables. Their automatically assigned type is already correct.
+ *
+ * This function only generates valid OpenMP code, if the ast was generated
+ * with the 'atomic-bounds' option enabled.
+ *
+ */
+static __isl_give isl_printer *print_for_with_openmp(
+ __isl_keep isl_ast_node *node, __isl_take isl_printer *p,
+ __isl_take isl_ast_print_options *print_options)
+{
+ p = isl_printer_start_line(p);
+ p = isl_printer_print_str(p, "#pragma omp parallel for");
+ p = isl_printer_end_line(p);
+
+ p = isl_ast_node_for_print(node, p, print_options);
+
+ return p;
+}
+
+/* Print a for node.
+ *
+ * Depending on how the node is annotated, we either print a normal
+ * for node or an openmp parallel for node.
+ */
+static __isl_give isl_printer *print_for(__isl_take isl_printer *p,
+ __isl_take isl_ast_print_options *print_options,
+ __isl_keep isl_ast_node *node, void *user)
+{
+ struct ppcg_print_info *print_info;
+ isl_id *id;
+ int openmp;
+
+ openmp = 0;
+ id = isl_ast_node_get_annotation(node);
+
+ if (id) {
+ struct ast_node_userinfo *info;
+
+ info = (struct ast_node_userinfo *) isl_id_get_user(id);
+ if (info && info->is_openmp)
+ openmp = 1;
+ }
+
+ if (openmp)
+ p = print_for_with_openmp(node, p, print_options);
+ else
+ p = isl_ast_node_for_print(node, p, print_options);
+
+ isl_id_free(id);
+
+ return p;
+}
+
+/* Index transformation callback for pet_stmt_build_ast_exprs.
+ *
+ * "index" expresses the array indices in terms of statement iterators
+ * "iterator_map" expresses the statement iterators in terms of
+ * AST loop iterators.
+ *
+ * The result expresses the array indices in terms of
+ * AST loop iterators.
+ */
+static __isl_give isl_multi_pw_aff *pullback_index(
+ __isl_take isl_multi_pw_aff *index, __isl_keep isl_id *id, void *user)
+{
+ isl_pw_multi_aff *iterator_map = user;
+
+ iterator_map = isl_pw_multi_aff_copy(iterator_map);
+ return isl_multi_pw_aff_pullback_pw_multi_aff(index, iterator_map);
+}
+
+/* Transform the accesses in the statement associated to the domain
+ * called by "node" to refer to the AST loop iterators, construct
+ * corresponding AST expressions using "build",
+ * collect them in a ppcg_stmt and annotate the node with the ppcg_stmt.
+ */
+static __isl_give isl_ast_node *at_each_domain(__isl_take isl_ast_node *node,
+ __isl_keep isl_ast_build *build, void *user)
+{
+ struct ppcg_scop *scop = user;
+ isl_ast_expr *expr, *arg;
+ isl_ctx *ctx;
+ isl_id *id;
+ isl_map *map;
+ isl_pw_multi_aff *iterator_map;
+ struct ppcg_stmt *stmt;
+
+ ctx = isl_ast_node_get_ctx(node);
+ stmt = isl_calloc_type(ctx, struct ppcg_stmt);
+ if (!stmt)
+ goto error;
+
+ expr = isl_ast_node_user_get_expr(node);
+ arg = isl_ast_expr_get_op_arg(expr, 0);
+ isl_ast_expr_free(expr);
+ id = isl_ast_expr_get_id(arg);
+ isl_ast_expr_free(arg);
+ stmt->stmt = find_stmt(scop, id);
+ isl_id_free(id);
+ if (!stmt->stmt)
+ goto error;
+
+ map = isl_map_from_union_map(isl_ast_build_get_schedule(build));
+ map = isl_map_reverse(map);
+ iterator_map = isl_pw_multi_aff_from_map(map);
+ stmt->ref2expr = pet_stmt_build_ast_exprs(stmt->stmt, build,
+ &pullback_index, iterator_map, NULL, NULL);
+ isl_pw_multi_aff_free(iterator_map);
+
+ id = isl_id_alloc(isl_ast_node_get_ctx(node), NULL, stmt);
+ id = isl_id_set_free_user(id, &ppcg_stmt_free);
+ return isl_ast_node_set_annotation(node, id);
+error:
+ ppcg_stmt_free(stmt);
+ return isl_ast_node_free(node);
+}
+
+/* Set *depth to the number of scheduling dimensions
+ * for the schedule of the first domain.
+ * We assume here that this number is the same for all domains.
+ */
+static isl_stat set_depth(__isl_take isl_map *map, void *user)
+{
+ unsigned *depth = user;
+
+ *depth = isl_map_dim(map, isl_dim_out);
+
+ isl_map_free(map);
+ return isl_stat_error;
+}
+
+/* Code generate the scop 'scop' and print the corresponding C code to 'p'.
+ */
+static __isl_give isl_printer *print_scop(struct ppcg_scop *scop,
+ __isl_take isl_printer *p, struct ppcg_options *options)
+{
+ isl_ctx *ctx = isl_printer_get_ctx(p);
+ isl_set *context;
+ isl_union_set *domain_set;
+ isl_union_map *schedule_map;
+ isl_ast_build *build;
+ isl_ast_print_options *print_options;
+ isl_ast_node *tree;
+ isl_id_list *iterators;
+ struct ast_build_userinfo build_info;
+ int depth;
+
+ context = isl_set_copy(scop->context);
+ domain_set = isl_union_set_copy(scop->domain);
+ schedule_map = isl_schedule_get_map(scop->schedule);
+ schedule_map = isl_union_map_intersect_domain(schedule_map, domain_set);
+
+ isl_union_map_foreach_map(schedule_map, &set_depth, &depth);
+
+ build = isl_ast_build_from_context(context);
+ iterators = ppcg_scop_generate_names(scop, depth, "c");
+ build = isl_ast_build_set_iterators(build, iterators);
+ build = isl_ast_build_set_at_each_domain(build, &at_each_domain, scop);
+
+ if (options->openmp) {
+ build_info.scop = scop;
+ build_info.in_parallel_for = 0;
+
+ build = isl_ast_build_set_before_each_for(build,
+ &ast_build_before_for,
+ &build_info);
+ build = isl_ast_build_set_after_each_for(build,
+ &ast_build_after_for,
+ &build_info);
+ }
+
+ tree = isl_ast_build_node_from_schedule_map(build, schedule_map);
+ isl_ast_build_free(build);
+
+ print_options = isl_ast_print_options_alloc(ctx);
+ print_options = isl_ast_print_options_set_print_user(print_options,
+ &print_user, NULL);
+
+ print_options = isl_ast_print_options_set_print_for(print_options,
+ &print_for, NULL);
+
+ p = ppcg_print_macros(p, tree);
+ p = isl_ast_node_print(tree, p, print_options);
+
+ isl_ast_node_free(tree);
+
+ return p;
+}
+
+/* Generate CPU code for the scop "ps" and print the corresponding C code
+ * to "p", including variable declarations.
+ */
+__isl_give isl_printer *print_cpu(__isl_take isl_printer *p,
+ struct ppcg_scop *ps, struct ppcg_options *options)
+{
+ int hidden;
+
+ p = isl_printer_start_line(p);
+ p = isl_printer_print_str(p, "/* ppcg generated CPU code */");
+ p = isl_printer_end_line(p);
+
+ p = isl_printer_start_line(p);
+ p = isl_printer_end_line(p);
+
+ p = isl_ast_op_type_print_macro(isl_ast_op_fdiv_q, p);
+ p = ppcg_print_exposed_declarations(p, ps);
+ hidden = ppcg_scop_any_hidden_declarations(ps);
+ if (hidden) {
+ p = ppcg_start_block(p);
+ p = ppcg_print_hidden_declarations(p, ps);
+ }
+ if (options->debug->dump_final_schedule)
+ isl_schedule_dump(ps->schedule);
+ p = print_scop(ps, p, options);
+ if (hidden)
+ p = ppcg_end_block(p);
+
+ return p;
+}
+
+/* Wrapper around print_cpu for use as a ppcg_transform callback.
+ */
+static __isl_give isl_printer *print_cpu_wrap(__isl_take isl_printer *p,
+ struct ppcg_scop *scop, void *user)
+{
+ struct ppcg_options *options = user;
+
+ return print_cpu(p, scop, options);
+}
+
+/* Transform the code in the file called "input" by replacing
+ * all scops by corresponding CPU code and write the results to a file
+ * called "output".
+ */
+int generate_cpu(isl_ctx *ctx, struct ppcg_options *options,
+ const char *input, const char *output)
+{
+ FILE *output_file;
+ int r;
+
+ output_file = get_output_file(input, output);
+ if (!output_file)
+ return -1;
+
+ r = ppcg_transform(ctx, input, output_file, options,
+ &print_cpu_wrap, options);
+
+ fclose(output_file);
+
+ return r;
+}
OpenPOWER on IntegriCloud