summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--polly/lib/CodeGen/PPCGCodeGeneration.cpp43
-rw-r--r--polly/lib/External/ppcg/gpu.c4
-rw-r--r--polly/lib/External/ppcg/gpu.h3
-rw-r--r--polly/test/GPGPU/double-parallel-loop.ll30
4 files changed, 73 insertions, 7 deletions
diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
index cd84d3c2090..b61fffb9139 100644
--- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp
+++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
@@ -155,6 +155,38 @@ public:
return getTaggedAccesses(MemoryAccess::MUST_WRITE);
}
+ /// Collect parameter and array names as isl_ids.
+ ///
+ /// To reason about the different parameters and arrays used, ppcg requires
+ /// a list of all isl_ids in use. As PPCG traditionally performs
+ /// source-to-source compilation each of these isl_ids is mapped to the
+ /// expression that represents it. As we do not have a corresponding
+ /// expression in Polly, we just map each id to a 'zero' expression to match
+ /// the data format that ppcg expects.
+ ///
+ /// @returns Retun a map from collected ids to 'zero' ast expressions.
+ __isl_give isl_id_to_ast_expr *getNames() {
+ auto *Names = isl_id_to_ast_expr_alloc(
+ S->getIslCtx(), S->getNumParams() + std::distance(S->array_begin(), S->array_end()));
+ auto *Zero = isl_ast_expr_from_val(isl_val_zero(S->getIslCtx()));
+ auto *Space = S->getParamSpace();
+
+ for (int I = 0, E = S->getNumParams(); I < E; ++I) {
+ isl_id *Id = isl_space_get_dim_id(Space, isl_dim_param, I);
+ Names = isl_id_to_ast_expr_set(Names, Id, isl_ast_expr_copy(Zero));
+ }
+
+ for (auto &Array : S->arrays()) {
+ auto Id = Array.second->getBasePtrId();
+ Names = isl_id_to_ast_expr_set(Names, Id, isl_ast_expr_copy(Zero));
+ }
+
+ isl_space_free(Space);
+ isl_ast_expr_free(Zero);
+
+ return Names;
+ }
+
/// Create a new PPCG scop from the current scop.
///
/// The PPCG scop is initialized with data from the current polly::Scop. From
@@ -194,7 +226,7 @@ public:
PPCGScop->tagged_dep_order = nullptr;
PPCGScop->schedule = S->getScheduleTree();
- PPCGScop->names = nullptr;
+ PPCGScop->names = getNames();
PPCGScop->pet = nullptr;
@@ -216,7 +248,7 @@ public:
PPCGProg->ctx = S->getIslCtx();
PPCGProg->scop = PPCGScop;
- PPCGProg->context = nullptr;
+ PPCGProg->context = isl_set_copy(PPCGScop->context);
PPCGProg->read = nullptr;
PPCGProg->may_write = nullptr;
PPCGProg->must_write = nullptr;
@@ -267,6 +299,13 @@ public:
isl_schedule *Schedule = get_schedule(PPCGGen);
+ int has_permutable = has_any_permutable_node(Schedule);
+
+ if (!has_permutable || has_permutable < 0)
+ Schedule = isl_schedule_free(Schedule);
+ else
+ Schedule = map_to_device(PPCGGen, Schedule);
+
if (DumpSchedule) {
isl_printer *P = isl_printer_to_str(S->getIslCtx());
P = isl_printer_set_yaml_style(P, ISL_YAML_STYLE_BLOCK);
diff --git a/polly/lib/External/ppcg/gpu.c b/polly/lib/External/ppcg/gpu.c
index 05f277cf3c6..3eecf45656c 100644
--- a/polly/lib/External/ppcg/gpu.c
+++ b/polly/lib/External/ppcg/gpu.c
@@ -2375,7 +2375,7 @@ static isl_bool set_permutable(__isl_keep isl_schedule_node *node, void *user)
/* Does "schedule" contain any permutable band with at least one coincident
* member?
*/
-static int has_any_permutable_node(__isl_keep isl_schedule *schedule)
+int has_any_permutable_node(__isl_keep isl_schedule *schedule)
{
int any_permutable = 0;
@@ -4938,7 +4938,7 @@ static __isl_give isl_schedule_node *add_to_from_device(
* are separated from the other children and are not mapped to
* the device.
*/
-static __isl_give isl_schedule *map_to_device(struct gpu_gen *gen,
+__isl_give isl_schedule *map_to_device(struct gpu_gen *gen,
__isl_take isl_schedule *schedule)
{
isl_schedule_node *node;
diff --git a/polly/lib/External/ppcg/gpu.h b/polly/lib/External/ppcg/gpu.h
index d06ddb28f3e..c5009c0b2c2 100644
--- a/polly/lib/External/ppcg/gpu.h
+++ b/polly/lib/External/ppcg/gpu.h
@@ -353,4 +353,7 @@ int generate_gpu(isl_ctx *ctx, const char *input, FILE *out,
struct gpu_types *types, void *user), void *user);
__isl_give isl_schedule *get_schedule(struct gpu_gen *gen);
+int has_any_permutable_node(__isl_keep isl_schedule *schedule);
+__isl_give isl_schedule *map_to_device(struct gpu_gen *gen,
+ __isl_take isl_schedule *schedule);
#endif
diff --git a/polly/test/GPGPU/double-parallel-loop.ll b/polly/test/GPGPU/double-parallel-loop.ll
index 4c1bc9551b9..7ae5010abd1 100644
--- a/polly/test/GPGPU/double-parallel-loop.ll
+++ b/polly/test/GPGPU/double-parallel-loop.ll
@@ -17,9 +17,33 @@
; SCHED: domain: "{ Stmt_bb5[i0, i1] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 }"
; SCHED: child:
-; SCHED: schedule: "[{ Stmt_bb5[i0, i1] -> [(i0)] }, { Stmt_bb5[i0, i1] -> [(i1)] }]"
-; SCHED: permutable: 1
-; SCHED: coincident: [ 1, 1 ]
+; SCHED: context: "{ [] }"
+; SCHED: child:
+; SCHED: extension: "{ }"
+; SCHED: child:
+; SCHED: sequence:
+; SCHED: - filter: "{ }"
+; SCHED: - filter: "{ Stmt_bb5[i0, i1] }"
+; SCHED: child:
+; SCHED: guard: "{ [] }"
+; SCHED: child:
+; SCHED: mark: "kernel"
+; SCHED: child:
+; SCHED: context: "[b0, b1, t0, t1] -> { [] : 0 <= b0 <= 255 and 0 <= b1 <= 255 and 0 <= t0 <= 3 and 0 <= t1 <= 3 }"
+; SCHED: child:
+; SCHED: filter: "[b0, b1] -> { Stmt_bb5[i0, i1] : -3 - 4b0 + i0 <= 1024*floor((i0)/1024) <= -4b0 + i0 and -3 - 4b1 + i1 <= 1024*floor((i1)/1024) <= -4b1 + i1 }"
+; SCHED: child:
+; SCHED: schedule: "[{ Stmt_bb5[i0, i1] -> [(floor((i0)/1024))] }, { Stmt_bb5[i0, i1] -> [(floor((i1)/1024))] }]"
+; SCHED: permutable: 1
+; SCHED: coincident: [ 1, 1 ]
+; SCHED: child:
+; SCHED: filter: "[t0, t1] -> { Stmt_bb5[i0, i1] : 4*floor((-t0 + i0)/4) = -t0 + i0 and 4*floor((-t1 + i1)/4) = -t1 + i1 and 0 <= t0 <= 3 and 0 <= t1 <= 3 }"
+; SCHED: child:
+; SCHED: schedule: "[{ Stmt_bb5[i0, i1] -> [(0)] }, { Stmt_bb5[i0, i1] -> [(0)] }]"
+; SCHED: permutable: 1
+; SCHED: coincident: [ 1, 1 ]
+; SCHED: - filter: "{ }"
+
; void double_parallel_loop(float A[][1024]) {
; for (long i = 0; i < 1024; i++)
OpenPOWER on IntegriCloud