diff options
-rw-r--r-- | polly/lib/CodeGen/PPCGCodeGeneration.cpp | 159 | ||||
-rw-r--r-- | polly/lib/External/ppcg/gpu.c | 2 | ||||
-rw-r--r-- | polly/lib/External/ppcg/gpu.h | 1 | ||||
-rw-r--r-- | polly/lib/External/ppcg/ppcg.c | 4 | ||||
-rw-r--r-- | polly/lib/External/ppcg/ppcg.h | 2 | ||||
-rw-r--r-- | polly/test/GPGPU/double-parallel-loop.ll | 13 |
6 files changed, 164 insertions, 17 deletions
diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp index 995957b75c4..43a651aebf5 100644 --- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp +++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp @@ -15,6 +15,7 @@ #include "polly/CodeGen/IslNodeBuilder.h" #include "polly/DependenceInfo.h" #include "polly/LinkAllPasses.h" +#include "polly/Options.h" #include "polly/ScopInfo.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" @@ -22,6 +23,8 @@ #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "isl/union_map.h" + extern "C" { #include "gpu.h" #include "ppcg.h" @@ -34,6 +37,11 @@ using namespace llvm; #define DEBUG_TYPE "polly-codegen-ppcg" +static cl::opt<bool> DumpSchedule("polly-acc-dump-schedule", + cl::desc("Dump the computed GPU Schedule"), + cl::Hidden, cl::init(true), cl::ZeroOrMore, + cl::cat(PollyCategory)); + namespace { class PPCGCodeGeneration : public ScopPass { public: @@ -89,10 +97,70 @@ public: return Options; } + /// Get a tagged access relation containing all accesses of type @p AccessTy. + /// + /// Instead of a normal access of the form: + /// + /// Stmt[i,j,k] -> Array[f_0(i,j,k), f_1(i,j,k)] + /// + /// a tagged access has the form + /// + /// [Stmt[i,j,k] -> id[]] -> Array[f_0(i,j,k), f_1(i,j,k)] + /// + /// where 'id' is an additional space that references the memory access that + /// triggered the access. + /// + /// @param AccessTy The type of the memory accesses to collect. + /// + /// @return The relation describing all tagged memory accesses. + isl_union_map *getTaggedAccesses(enum MemoryAccess::AccessType AccessTy) { + isl_union_map *Accesses = isl_union_map_empty(S->getParamSpace()); + + for (auto &Stmt : *S) + for (auto &Acc : Stmt) + if (Acc->getType() == AccessTy) { + isl_map *Relation = Acc->getAccessRelation(); + Relation = isl_map_intersect_domain(Relation, Stmt.getDomain()); + + isl_space *Space = isl_map_get_space(Relation); + Space = isl_space_range(Space); + Space = isl_space_from_range(Space); + isl_map *Universe = isl_map_universe(Space); + Relation = isl_map_domain_product(Relation, Universe); + Accesses = isl_union_map_add_map(Accesses, Relation); + } + + return Accesses; + } + + /// Get the set of all read accesses, tagged with the access id. + /// + /// @see getTaggedAccesses + isl_union_map *getTaggedReads() { + return getTaggedAccesses(MemoryAccess::READ); + } + + /// Get the set of all may (and must) accesses, tagged with the access id. + /// + /// @see getTaggedAccesses + isl_union_map *getTaggedMayWrites() { + return isl_union_map_union(getTaggedAccesses(MemoryAccess::MAY_WRITE), + getTaggedAccesses(MemoryAccess::MUST_WRITE)); + } + + /// Get the set of all must accesses, tagged with the access id. + /// + /// @see getTaggedAccesses + isl_union_map *getTaggedMustWrites() { + return getTaggedAccesses(MemoryAccess::MUST_WRITE); + } + /// Create a new PPCG scop from the current scop. /// - /// For now the created scop is initialized to 'zero' and does not contain - /// any scop-specific information. + /// The PPCG scop is initialized with data from the current polly::Scop. From + /// this initial data, the data-dependences in the PPCG scop are initialized. + /// We do not use Polly's dependence analysis for now, to ensure we match + /// the PPCG default behaviour more closely. /// /// @returns A new ppcg scop. ppcg_scop *createPPCGScop() { @@ -103,18 +171,18 @@ public: PPCGScop->start = 0; PPCGScop->end = 0; - PPCGScop->context = nullptr; - PPCGScop->domain = nullptr; + PPCGScop->context = S->getContext(); + PPCGScop->domain = S->getDomains(); PPCGScop->call = nullptr; - PPCGScop->tagged_reads = nullptr; - PPCGScop->reads = nullptr; + PPCGScop->tagged_reads = getTaggedReads(); + PPCGScop->reads = S->getReads(); PPCGScop->live_in = nullptr; - PPCGScop->tagged_may_writes = nullptr; - PPCGScop->may_writes = nullptr; - PPCGScop->tagged_must_writes = nullptr; - PPCGScop->must_writes = nullptr; + PPCGScop->tagged_may_writes = getTaggedMayWrites(); + PPCGScop->may_writes = S->getWrites(); + PPCGScop->tagged_must_writes = getTaggedMustWrites(); + PPCGScop->must_writes = S->getMustWrites(); PPCGScop->live_out = nullptr; - PPCGScop->tagged_must_kills = nullptr; + PPCGScop->tagged_must_kills = isl_union_map_empty(S->getParamSpace()); PPCGScop->tagger = nullptr; PPCGScop->independence = nullptr; @@ -125,11 +193,14 @@ public: PPCGScop->dep_order = nullptr; PPCGScop->tagged_dep_order = nullptr; - PPCGScop->schedule = nullptr; + PPCGScop->schedule = S->getScheduleTree(); PPCGScop->names = nullptr; PPCGScop->pet = nullptr; + compute_tagger(PPCGScop); + compute_dependences(PPCGScop); + return PPCGScop; } @@ -163,11 +234,75 @@ public: return PPCGProg; } + // Generate a GPU program using PPCG. + // + // GPU mapping consists of multiple steps: + // + // 1) Compute new schedule for the program. + // 2) Map schedule to GPU (TODO) + // 3) Generate code for new schedule (TODO) + // + // We do not use here the Polly ScheduleOptimizer, as the schedule optimizer + // is mostly CPU specific. Instead, we use PPCG's GPU code generation + // strategy directly from this pass. + gpu_gen *generateGPU(ppcg_scop *PPCGScop, gpu_prog *PPCGProg) { + + auto PPCGGen = isl_calloc_type(S->getIslCtx(), struct gpu_gen); + + PPCGGen->ctx = S->getIslCtx(); + PPCGGen->options = PPCGScop->options; + PPCGGen->print = nullptr; + PPCGGen->print_user = nullptr; + PPCGGen->prog = PPCGProg; + PPCGGen->tree = nullptr; + PPCGGen->types.n = 0; + PPCGGen->types.name = nullptr; + PPCGGen->sizes = nullptr; + PPCGGen->used_sizes = nullptr; + PPCGGen->kernel_id = 0; + + // Set scheduling strategy to same strategy PPCG is using. + isl_options_set_schedule_outer_coincidence(PPCGGen->ctx, true); + isl_options_set_schedule_maximize_band_depth(PPCGGen->ctx, true); + + isl_schedule *Schedule = get_schedule(PPCGGen); + + if (DumpSchedule) { + isl_printer *P = isl_printer_to_str(S->getIslCtx()); + P = isl_printer_set_yaml_style(P, ISL_YAML_STYLE_BLOCK); + P = isl_printer_print_str(P, "Schedule\n"); + P = isl_printer_print_str(P, "========\n"); + if (Schedule) + P = isl_printer_print_schedule(P, Schedule); + else + P = isl_printer_print_str(P, "No schedule found\n"); + + printf("%s\n", isl_printer_get_str(P)); + isl_printer_free(P); + } + + isl_schedule_free(Schedule); + + return PPCGGen; + } + + /// Free gpu_gen structure. + /// + /// @param PPCGGen The ppcg_gen object to free. + void freePPCGGen(gpu_gen *PPCGGen) { + isl_ast_node_free(PPCGGen->tree); + isl_union_map_free(PPCGGen->sizes); + isl_union_map_free(PPCGGen->used_sizes); + free(PPCGGen); + } + bool runOnScop(Scop &CurrentScop) override { S = &CurrentScop; auto PPCGScop = createPPCGScop(); auto PPCGProg = createPPCGProg(PPCGScop); + auto PPCGGen = generateGPU(PPCGScop, PPCGProg); + freePPCGGen(PPCGGen); gpu_prog_free(PPCGProg); ppcg_scop_free(PPCGScop); diff --git a/polly/lib/External/ppcg/gpu.c b/polly/lib/External/ppcg/gpu.c index 3aff938ff2d..05f277cf3c6 100644 --- a/polly/lib/External/ppcg/gpu.c +++ b/polly/lib/External/ppcg/gpu.c @@ -4265,7 +4265,7 @@ static __isl_give isl_schedule *determine_properties_original_schedule( * a file, by computing one or by determining the properties * of the original schedule. */ -static __isl_give isl_schedule *get_schedule(struct gpu_gen *gen) +__isl_give isl_schedule *get_schedule(struct gpu_gen *gen) { isl_schedule *schedule; diff --git a/polly/lib/External/ppcg/gpu.h b/polly/lib/External/ppcg/gpu.h index 204cf6b4472..d06ddb28f3e 100644 --- a/polly/lib/External/ppcg/gpu.h +++ b/polly/lib/External/ppcg/gpu.h @@ -352,4 +352,5 @@ int generate_gpu(isl_ctx *ctx, const char *input, FILE *out, struct gpu_prog *prog, __isl_keep isl_ast_node *tree, struct gpu_types *types, void *user), void *user); +__isl_give isl_schedule *get_schedule(struct gpu_gen *gen); #endif diff --git a/polly/lib/External/ppcg/ppcg.c b/polly/lib/External/ppcg/ppcg.c index 1c514a65f14..92665001af6 100644 --- a/polly/lib/External/ppcg/ppcg.c +++ b/polly/lib/External/ppcg/ppcg.c @@ -336,7 +336,7 @@ static __isl_give isl_union_map *project_out_tags( * * { [S[i,j] -> R_1[]] -> S[i,j]; [S[i,j] -> R_2[]] -> S[i,j] } */ -static void compute_tagger(struct ppcg_scop *ps) +void compute_tagger(struct ppcg_scop *ps) { isl_union_map *tagged; isl_union_pw_multi_aff *tagger; @@ -694,7 +694,7 @@ static void compute_flow_dep(struct ppcg_scop *ps) * set of order dependences and a set of external false dependences * in compute_live_range_reordering_dependences. */ -static void compute_dependences(struct ppcg_scop *scop) +void compute_dependences(struct ppcg_scop *scop) { isl_union_map *may_source; isl_union_access_info *access; diff --git a/polly/lib/External/ppcg/ppcg.h b/polly/lib/External/ppcg/ppcg.h index c03fc3c7bd9..ef4ab204955 100644 --- a/polly/lib/External/ppcg/ppcg.h +++ b/polly/lib/External/ppcg/ppcg.h @@ -114,6 +114,8 @@ int ppcg_transform(isl_ctx *ctx, const char *input, FILE *out, __isl_give isl_printer *(*fn)(__isl_take isl_printer *p, struct ppcg_scop *scop, void *user), void *user); +void compute_tagger(struct ppcg_scop *ps); +void compute_dependences(struct ppcg_scop *scop); void *ppcg_scop_free(struct ppcg_scop *ps); #endif diff --git a/polly/test/GPGPU/double-parallel-loop.ll b/polly/test/GPGPU/double-parallel-loop.ll index 35a12193033..4c1bc9551b9 100644 --- a/polly/test/GPGPU/double-parallel-loop.ll +++ b/polly/test/GPGPU/double-parallel-loop.ll @@ -1,5 +1,8 @@ ; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s -; RUN: opt %loadPolly -polly-codegen-ppcg -S < %s +; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-schedule \ +; RUN: -disable-output < %s | \ +; RUN: FileCheck -check-prefix=SCHED %s + ; REQUIRES: pollyacc ; CHECK: Stmt_bb5 @@ -11,7 +14,13 @@ ; CHECK: { Stmt_bb5[i0, i1] -> MemRef_A[i0, i1] }; ; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK: { Stmt_bb5[i0, i1] -> MemRef_A[i0, i1] }; -; + +; SCHED: domain: "{ Stmt_bb5[i0, i1] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 }" +; SCHED: child: +; SCHED: schedule: "[{ Stmt_bb5[i0, i1] -> [(i0)] }, { Stmt_bb5[i0, i1] -> [(i1)] }]" +; SCHED: permutable: 1 +; SCHED: coincident: [ 1, 1 ] + ; void double_parallel_loop(float A[][1024]) { ; for (long i = 0; i < 1024; i++) ; for (long j = 0; j < 1024; j++) |