summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--polly/lib/CodeGen/PPCGCodeGeneration.cpp131
-rw-r--r--polly/lib/External/ppcg/cuda.c19
-rw-r--r--polly/lib/External/ppcg/cuda.h3
-rw-r--r--polly/lib/External/ppcg/gpu.c2
-rw-r--r--polly/lib/External/ppcg/gpu.h2
-rw-r--r--polly/test/GPGPU/double-parallel-loop.ll18
6 files changed, 161 insertions, 14 deletions
diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
index bb6ea8cd27e..24fe61b5f80 100644
--- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp
+++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
@@ -26,8 +26,11 @@
#include "isl/union_map.h"
extern "C" {
+#include "cuda.h"
#include "gpu.h"
+#include "gpu_print.h"
#include "ppcg.h"
+#include "schedule.h"
}
#include "llvm/Support/Debug.h"
@@ -41,6 +44,12 @@ static cl::opt<bool> DumpSchedule("polly-acc-dump-schedule",
cl::desc("Dump the computed GPU Schedule"),
cl::Hidden, cl::init(false), cl::ZeroOrMore,
cl::cat(PollyCategory));
+
+static cl::opt<bool>
+ DumpCode("polly-acc-dump-code",
+ cl::desc("Dump C code describing the GPU mapping"), cl::Hidden,
+ cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
+
/// Create the ast expressions for a ScopStmt.
///
/// This function is a callback for to generate the ast expressions for each
@@ -256,6 +265,33 @@ public:
return PPCGScop;
}
+ /// Collect the list of GPU statements.
+ ///
+ /// Each statement has an id, a pointer to the underlying data structure,
+ /// as well as a list with all memory accesses.
+ ///
+ /// TODO: Initialize the list of memory accesses.
+ ///
+ /// @returns A linked-list of statements.
+ gpu_stmt *getStatements() {
+ gpu_stmt *Stmts = isl_calloc_array(S->getIslCtx(), struct gpu_stmt,
+ std::distance(S->begin(), S->end()));
+
+ int i = 0;
+ for (auto &Stmt : *S) {
+ gpu_stmt *GPUStmt = &Stmts[i];
+
+ GPUStmt->id = Stmt.getDomainId();
+
+ // We use the pet stmt pointer to keep track of the Polly statements.
+ GPUStmt->stmt = (pet_stmt *)&Stmt;
+ GPUStmt->accesses = nullptr;
+ i++;
+ }
+
+ return Stmts;
+ }
+
/// Create a default-initialized PPCG GPU program.
///
/// @returns A new gpu grogram description.
@@ -278,14 +314,90 @@ public:
PPCGProg->to_inner = nullptr;
PPCGProg->any_to_outer = nullptr;
PPCGProg->array_order = nullptr;
- PPCGProg->n_stmts = 0;
- PPCGProg->stmts = nullptr;
+ PPCGProg->n_stmts = std::distance(S->begin(), S->end());
+ PPCGProg->stmts = getStatements();
PPCGProg->n_array = 0;
PPCGProg->array = nullptr;
return PPCGProg;
}
+ struct PrintGPUUserData {
+ struct cuda_info *CudaInfo;
+ struct gpu_prog *PPCGProg;
+ std::vector<ppcg_kernel *> Kernels;
+ };
+
+ /// Print a user statement node in the host code.
+ ///
+ /// We use ppcg's printing facilities to print the actual statement and
+ /// additionally build up a list of all kernels that are encountered in the
+ /// host ast.
+ ///
+ /// @param P The printer to print to
+ /// @param Options The printing options to use
+ /// @param Node The node to print
+ /// @param User A user pointer to carry additional data. This pointer is
+ /// expected to be of type PrintGPUUserData.
+ ///
+ /// @returns A printer to which the output has been printed.
+ static __isl_give isl_printer *
+ printHostUser(__isl_take isl_printer *P,
+ __isl_take isl_ast_print_options *Options,
+ __isl_take isl_ast_node *Node, void *User) {
+ auto Data = (struct PrintGPUUserData *)User;
+ auto Id = isl_ast_node_get_annotation(Node);
+
+ if (Id) {
+ auto Kernel = (struct ppcg_kernel *)isl_id_get_user(Id);
+ isl_id_free(Id);
+ Data->Kernels.push_back(Kernel);
+ }
+
+ return print_host_user(P, Options, Node, User);
+ }
+
+ /// Print C code corresponding to the control flow in @p Kernel.
+ ///
+ /// @param Kernel The kernel to print
+ void printKernel(ppcg_kernel *Kernel) {
+ auto *P = isl_printer_to_str(S->getIslCtx());
+ P = isl_printer_set_output_format(P, ISL_FORMAT_C);
+ auto *Options = isl_ast_print_options_alloc(S->getIslCtx());
+ P = isl_ast_node_print(Kernel->tree, P, Options);
+ char *String = isl_printer_get_str(P);
+ printf("%s\n", String);
+ free(String);
+ isl_printer_free(P);
+ }
+
+ /// Print C code corresponding to the GPU code described by @p Tree.
+ ///
+ /// @param Tree An AST describing GPU code
+ /// @param PPCGProg The PPCG program from which @Tree has been constructed.
+ void printGPUTree(isl_ast_node *Tree, gpu_prog *PPCGProg) {
+ auto *P = isl_printer_to_str(S->getIslCtx());
+ P = isl_printer_set_output_format(P, ISL_FORMAT_C);
+
+ PrintGPUUserData Data;
+ Data.PPCGProg = PPCGProg;
+
+ auto *Options = isl_ast_print_options_alloc(S->getIslCtx());
+ Options =
+ isl_ast_print_options_set_print_user(Options, printHostUser, &Data);
+ P = isl_ast_node_print(Tree, P, Options);
+ char *String = isl_printer_get_str(P);
+ printf("# host\n");
+ printf("%s\n", String);
+ free(String);
+ isl_printer_free(P);
+
+ for (auto Kernel : Data.Kernels) {
+ printf("# kernel%d\n", Kernel->id);
+ printKernel(Kernel);
+ }
+ }
+
// Generate a GPU program using PPCG.
//
// GPU mapping consists of multiple steps:
@@ -322,10 +434,12 @@ public:
int has_permutable = has_any_permutable_node(Schedule);
- if (!has_permutable || has_permutable < 0)
+ if (!has_permutable || has_permutable < 0) {
Schedule = isl_schedule_free(Schedule);
- else
+ } else {
Schedule = map_to_device(PPCGGen, Schedule);
+ PPCGGen->tree = generate_code(PPCGGen, isl_schedule_copy(Schedule));
+ }
if (DumpSchedule) {
isl_printer *P = isl_printer_to_str(S->getIslCtx());
@@ -341,6 +455,15 @@ public:
isl_printer_free(P);
}
+ if (DumpCode) {
+ printf("Code\n");
+ printf("====\n");
+ if (PPCGGen->tree)
+ printGPUTree(PPCGGen->tree, PPCGProg);
+ else
+ printf("No code generated\n");
+ }
+
isl_schedule_free(Schedule);
return PPCGGen;
diff --git a/polly/lib/External/ppcg/cuda.c b/polly/lib/External/ppcg/cuda.c
index 3063f6df6a5..1b605f5089c 100644
--- a/polly/lib/External/ppcg/cuda.c
+++ b/polly/lib/External/ppcg/cuda.c
@@ -153,20 +153,20 @@ static __isl_give isl_printer *copy_array_from_device(
return p;
}
-static void print_reverse_list(FILE *out, int len, int *list)
+static isl_printer *print_reverse_list(isl_printer *p, int len, int *list)
{
int i;
if (len == 0)
- return;
+ return p;
- fprintf(out, "(");
+ p = isl_printer_print_str(p, "(");
for (i = 0; i < len; ++i) {
if (i)
- fprintf(out, ", ");
- fprintf(out, "%d", list[len - 1 - i]);
+ p = isl_printer_print_str(p, ", ");
+ p = isl_printer_print_int(p, list[len - 1 - i]);
}
- fprintf(out, ")");
+ return isl_printer_print_str(p, ")");
}
/* Print the effective grid size as a list of the sizes in each
@@ -534,7 +534,7 @@ struct print_host_user_data {
* In case of a kernel launch, print a block of statements that
* defines the grid and the block and then launches the kernel.
*/
-static __isl_give isl_printer *print_host_user(__isl_take isl_printer *p,
+__isl_give isl_printer *print_host_user(__isl_take isl_printer *p,
__isl_take isl_ast_print_options *print_options,
__isl_keep isl_ast_node *node, void *user)
{
@@ -569,8 +569,7 @@ static __isl_give isl_printer *print_host_user(__isl_take isl_printer *p,
p = isl_printer_print_str(p, "dim3 k");
p = isl_printer_print_int(p, kernel->id);
p = isl_printer_print_str(p, "_dimBlock");
- print_reverse_list(isl_printer_get_file(p),
- kernel->n_block, kernel->block_dim);
+ p = print_reverse_list(p, kernel->n_block, kernel->block_dim);
p = isl_printer_print_str(p, ";");
p = isl_printer_end_line(p);
@@ -600,7 +599,9 @@ static __isl_give isl_printer *print_host_user(__isl_take isl_printer *p,
p = isl_printer_start_line(p);
p = isl_printer_end_line(p);
+#if 0
print_kernel(data->prog, kernel, data->cuda);
+#endif
return p;
}
diff --git a/polly/lib/External/ppcg/cuda.h b/polly/lib/External/ppcg/cuda.h
index 89175fd0a8b..bd8dd3ddc2d 100644
--- a/polly/lib/External/ppcg/cuda.h
+++ b/polly/lib/External/ppcg/cuda.h
@@ -6,5 +6,8 @@
int generate_cuda(isl_ctx *ctx, struct ppcg_options *options,
const char *input);
+__isl_give isl_printer *print_host_user(__isl_take isl_printer *p,
+ __isl_take isl_ast_print_options *print_options,
+ __isl_keep isl_ast_node *node, void *user);
#endif
diff --git a/polly/lib/External/ppcg/gpu.c b/polly/lib/External/ppcg/gpu.c
index 7bda56a4648..218b918ddfe 100644
--- a/polly/lib/External/ppcg/gpu.c
+++ b/polly/lib/External/ppcg/gpu.c
@@ -2297,7 +2297,7 @@ static isl_bool update_depth(__isl_keep isl_schedule_node *node, void *user)
* The ASTs for the device code are embedded in ppcg_kernel objects
* attached to the leaf nodes that call "kernel".
*/
-static __isl_give isl_ast_node *generate_code(struct gpu_gen *gen,
+__isl_give isl_ast_node *generate_code(struct gpu_gen *gen,
__isl_take isl_schedule *schedule)
{
struct ppcg_at_domain_data data;
diff --git a/polly/lib/External/ppcg/gpu.h b/polly/lib/External/ppcg/gpu.h
index 7038901f1d7..7d617de3fab 100644
--- a/polly/lib/External/ppcg/gpu.h
+++ b/polly/lib/External/ppcg/gpu.h
@@ -367,4 +367,6 @@ __isl_give isl_schedule *get_schedule(struct gpu_gen *gen);
int has_any_permutable_node(__isl_keep isl_schedule *schedule);
__isl_give isl_schedule *map_to_device(struct gpu_gen *gen,
__isl_take isl_schedule *schedule);
+__isl_give isl_ast_node *generate_code(struct gpu_gen *gen,
+ __isl_take isl_schedule *schedule);
#endif
diff --git a/polly/test/GPGPU/double-parallel-loop.ll b/polly/test/GPGPU/double-parallel-loop.ll
index 963a411ce37..46aab52c43f 100644
--- a/polly/test/GPGPU/double-parallel-loop.ll
+++ b/polly/test/GPGPU/double-parallel-loop.ll
@@ -3,6 +3,10 @@
; RUN: -disable-output < %s | \
; RUN: FileCheck -check-prefix=SCHED %s
+; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \
+; RUN: -disable-output < %s | \
+; RUN: FileCheck -check-prefix=CODE %s
+
; REQUIRES: pollyacc
; CHECK: Stmt_bb5
@@ -44,6 +48,20 @@
; SCHED: coincident: [ 1, 1 ]
; SCHED: - filter: "{ }"
+; CODE: Code
+; CODE: ====
+; CODE: # host
+; CODE: {
+; CODE: dim3 k0_dimBlock(16, 32);
+; CODE: dim3 k0_dimGrid(32, 32);
+; CODE: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
+; CODE: }
+
+; CODE: # kernel0
+; CODE: for (int c3 = 0; c3 <= 1; c3 += 1)
+; CODE: Stmt_bb5(32 * b0 + t0, 32 * b1 + t1 + 16 * c3);
+
+
; void double_parallel_loop(float A[][1024]) {
; for (long i = 0; i < 1024; i++)
; for (long j = 0; j < 1024; j++)
OpenPOWER on IntegriCloud