diff options
| -rw-r--r-- | polly/lib/CodeGen/PPCGCodeGeneration.cpp | 43 | ||||
| -rw-r--r-- | polly/lib/External/ppcg/gpu.c | 4 | ||||
| -rw-r--r-- | polly/lib/External/ppcg/gpu.h | 3 | ||||
| -rw-r--r-- | polly/test/GPGPU/double-parallel-loop.ll | 30 | 
4 files changed, 73 insertions, 7 deletions
diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp index cd84d3c2090..b61fffb9139 100644 --- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp +++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp @@ -155,6 +155,38 @@ public:      return getTaggedAccesses(MemoryAccess::MUST_WRITE);    } +  /// Collect parameter and array names as isl_ids. +  /// +  /// To reason about the different parameters and arrays used, ppcg requires +  /// a list of all isl_ids in use. As PPCG traditionally performs +  /// source-to-source compilation each of these isl_ids is mapped to the +  /// expression that represents it. As we do not have a corresponding +  /// expression in Polly, we just map each id to a 'zero' expression to match +  /// the data format that ppcg expects. +  /// +  /// @returns Retun a map from collected ids to 'zero' ast expressions. +  __isl_give isl_id_to_ast_expr *getNames() { +    auto *Names = isl_id_to_ast_expr_alloc( +        S->getIslCtx(), S->getNumParams() + std::distance(S->array_begin(), S->array_end())); +    auto *Zero = isl_ast_expr_from_val(isl_val_zero(S->getIslCtx())); +    auto *Space = S->getParamSpace(); + +    for (int I = 0, E = S->getNumParams(); I < E; ++I) { +      isl_id *Id = isl_space_get_dim_id(Space, isl_dim_param, I); +      Names = isl_id_to_ast_expr_set(Names, Id, isl_ast_expr_copy(Zero)); +    } + +    for (auto &Array : S->arrays()) { +      auto Id = Array.second->getBasePtrId(); +      Names = isl_id_to_ast_expr_set(Names, Id, isl_ast_expr_copy(Zero)); +    } + +    isl_space_free(Space); +    isl_ast_expr_free(Zero); + +    return Names; +  } +    /// Create a new PPCG scop from the current scop.    ///    /// The PPCG scop is initialized with data from the current polly::Scop. From @@ -194,7 +226,7 @@ public:      PPCGScop->tagged_dep_order = nullptr;      PPCGScop->schedule = S->getScheduleTree(); -    PPCGScop->names = nullptr; +    PPCGScop->names = getNames();      PPCGScop->pet = nullptr; @@ -216,7 +248,7 @@ public:      PPCGProg->ctx = S->getIslCtx();      PPCGProg->scop = PPCGScop; -    PPCGProg->context = nullptr; +    PPCGProg->context = isl_set_copy(PPCGScop->context);      PPCGProg->read = nullptr;      PPCGProg->may_write = nullptr;      PPCGProg->must_write = nullptr; @@ -267,6 +299,13 @@ public:      isl_schedule *Schedule = get_schedule(PPCGGen); +    int has_permutable = has_any_permutable_node(Schedule); + +    if (!has_permutable || has_permutable < 0) +      Schedule = isl_schedule_free(Schedule); +    else +      Schedule = map_to_device(PPCGGen, Schedule); +      if (DumpSchedule) {        isl_printer *P = isl_printer_to_str(S->getIslCtx());        P = isl_printer_set_yaml_style(P, ISL_YAML_STYLE_BLOCK); diff --git a/polly/lib/External/ppcg/gpu.c b/polly/lib/External/ppcg/gpu.c index 05f277cf3c6..3eecf45656c 100644 --- a/polly/lib/External/ppcg/gpu.c +++ b/polly/lib/External/ppcg/gpu.c @@ -2375,7 +2375,7 @@ static isl_bool set_permutable(__isl_keep isl_schedule_node *node, void *user)  /* Does "schedule" contain any permutable band with at least one coincident   * member?   */ -static int has_any_permutable_node(__isl_keep isl_schedule *schedule) +int has_any_permutable_node(__isl_keep isl_schedule *schedule)  {  	int any_permutable = 0; @@ -4938,7 +4938,7 @@ static __isl_give isl_schedule_node *add_to_from_device(   * are separated from the other children and are not mapped to   * the device.   */ -static __isl_give isl_schedule *map_to_device(struct gpu_gen *gen, +__isl_give isl_schedule *map_to_device(struct gpu_gen *gen,  	__isl_take isl_schedule *schedule)  {  	isl_schedule_node *node; diff --git a/polly/lib/External/ppcg/gpu.h b/polly/lib/External/ppcg/gpu.h index d06ddb28f3e..c5009c0b2c2 100644 --- a/polly/lib/External/ppcg/gpu.h +++ b/polly/lib/External/ppcg/gpu.h @@ -353,4 +353,7 @@ int generate_gpu(isl_ctx *ctx, const char *input, FILE *out,  		struct gpu_types *types, void *user), void *user);  __isl_give isl_schedule *get_schedule(struct gpu_gen *gen); +int has_any_permutable_node(__isl_keep isl_schedule *schedule); +__isl_give isl_schedule *map_to_device(struct gpu_gen *gen, +                                       __isl_take isl_schedule *schedule);  #endif diff --git a/polly/test/GPGPU/double-parallel-loop.ll b/polly/test/GPGPU/double-parallel-loop.ll index 4c1bc9551b9..7ae5010abd1 100644 --- a/polly/test/GPGPU/double-parallel-loop.ll +++ b/polly/test/GPGPU/double-parallel-loop.ll @@ -17,9 +17,33 @@  ; SCHED: domain: "{ Stmt_bb5[i0, i1] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 }"  ; SCHED: child: -; SCHED:   schedule: "[{ Stmt_bb5[i0, i1] -> [(i0)] }, { Stmt_bb5[i0, i1] -> [(i1)] }]" -; SCHED:   permutable: 1 -; SCHED:   coincident: [ 1, 1 ] +; SCHED:   context: "{ [] }" +; SCHED:   child: +; SCHED:     extension: "{  }" +; SCHED:     child: +; SCHED:       sequence: +; SCHED:       - filter: "{  }" +; SCHED:       - filter: "{ Stmt_bb5[i0, i1] }" +; SCHED:         child: +; SCHED:           guard: "{ [] }" +; SCHED:           child: +; SCHED:             mark: "kernel" +; SCHED:             child: +; SCHED:               context: "[b0, b1, t0, t1] -> { [] : 0 <= b0 <= 255 and 0 <= b1 <= 255 and 0 <= t0 <= 3 and 0 <= t1 <= 3 }" +; SCHED:               child: +; SCHED:                 filter: "[b0, b1] -> { Stmt_bb5[i0, i1] : -3 - 4b0 + i0 <= 1024*floor((i0)/1024) <= -4b0 + i0 and -3 - 4b1 + i1 <= 1024*floor((i1)/1024) <= -4b1 + i1 }" +; SCHED:                 child: +; SCHED:                   schedule: "[{ Stmt_bb5[i0, i1] -> [(floor((i0)/1024))] }, { Stmt_bb5[i0, i1] -> [(floor((i1)/1024))] }]" +; SCHED:                   permutable: 1 +; SCHED:                   coincident: [ 1, 1 ] +; SCHED:                   child: +; SCHED:                     filter: "[t0, t1] -> { Stmt_bb5[i0, i1] : 4*floor((-t0 + i0)/4) = -t0 + i0 and 4*floor((-t1 + i1)/4) = -t1 + i1 and 0 <= t0 <= 3 and 0 <= t1 <= 3 }" +; SCHED:                     child: +; SCHED:                       schedule: "[{ Stmt_bb5[i0, i1] -> [(0)] }, { Stmt_bb5[i0, i1] -> [(0)] }]" +; SCHED:                       permutable: 1 +; SCHED:                       coincident: [ 1, 1 ] +; SCHED:       - filter: "{  }" +  ;    void double_parallel_loop(float A[][1024]) {  ;      for (long i = 0; i < 1024; i++)  | 

