summaryrefslogtreecommitdiffstats
path: root/polly/lib/External/ppcg/cuda.c
diff options
context:
space:
mode:
Diffstat (limited to 'polly/lib/External/ppcg/cuda.c')
-rw-r--r--polly/lib/External/ppcg/cuda.c704
1 files changed, 704 insertions, 0 deletions
diff --git a/polly/lib/External/ppcg/cuda.c b/polly/lib/External/ppcg/cuda.c
new file mode 100644
index 00000000000..3063f6df6a5
--- /dev/null
+++ b/polly/lib/External/ppcg/cuda.c
@@ -0,0 +1,704 @@
+/*
+ * Copyright 2012 Ecole Normale Superieure
+ *
+ * Use of this software is governed by the MIT license
+ *
+ * Written by Sven Verdoolaege,
+ * Ecole Normale Superieure, 45 rue d’Ulm, 75230 Paris, France
+ */
+
+#include <isl/aff.h>
+#include <isl/ast.h>
+
+#include "cuda_common.h"
+#include "cuda.h"
+#include "gpu.h"
+#include "gpu_print.h"
+#include "print.h"
+#include "util.h"
+
+static __isl_give isl_printer *print_cuda_macros(__isl_take isl_printer *p)
+{
+ const char *macros =
+ "#define cudaCheckReturn(ret) \\\n"
+ " do { \\\n"
+ " cudaError_t cudaCheckReturn_e = (ret); \\\n"
+ " if (cudaCheckReturn_e != cudaSuccess) { \\\n"
+ " fprintf(stderr, \"CUDA error: %s\\n\", "
+ "cudaGetErrorString(cudaCheckReturn_e)); \\\n"
+ " fflush(stderr); \\\n"
+ " } \\\n"
+ " assert(cudaCheckReturn_e == cudaSuccess); \\\n"
+ " } while(0)\n"
+ "#define cudaCheckKernel() \\\n"
+ " do { \\\n"
+ " cudaCheckReturn(cudaGetLastError()); \\\n"
+ " } while(0)\n\n";
+
+ p = isl_printer_print_str(p, macros);
+ return p;
+}
+
+/* Print a declaration for the device array corresponding to "array" on "p".
+ */
+static __isl_give isl_printer *declare_device_array(__isl_take isl_printer *p,
+ struct gpu_array_info *array)
+{
+ int i;
+
+ p = isl_printer_start_line(p);
+ p = isl_printer_print_str(p, array->type);
+ p = isl_printer_print_str(p, " ");
+ if (!array->linearize && array->n_index > 1)
+ p = isl_printer_print_str(p, "(");
+ p = isl_printer_print_str(p, "*dev_");
+ p = isl_printer_print_str(p, array->name);
+ if (!array->linearize && array->n_index > 1) {
+ p = isl_printer_print_str(p, ")");
+ for (i = 1; i < array->n_index; i++) {
+ p = isl_printer_print_str(p, "[");
+ p = isl_printer_print_pw_aff(p, array->bound[i]);
+ p = isl_printer_print_str(p, "]");
+ }
+ }
+ p = isl_printer_print_str(p, ";");
+ p = isl_printer_end_line(p);
+
+ return p;
+}
+
+static __isl_give isl_printer *declare_device_arrays(__isl_take isl_printer *p,
+ struct gpu_prog *prog)
+{
+ int i;
+
+ for (i = 0; i < prog->n_array; ++i) {
+ if (!gpu_array_requires_device_allocation(&prog->array[i]))
+ continue;
+
+ p = declare_device_array(p, &prog->array[i]);
+ }
+ p = isl_printer_start_line(p);
+ p = isl_printer_end_line(p);
+ return p;
+}
+
+static __isl_give isl_printer *allocate_device_arrays(
+ __isl_take isl_printer *p, struct gpu_prog *prog)
+{
+ int i;
+
+ for (i = 0; i < prog->n_array; ++i) {
+ if (!gpu_array_requires_device_allocation(&prog->array[i]))
+ continue;
+ p = isl_printer_start_line(p);
+ p = isl_printer_print_str(p,
+ "cudaCheckReturn(cudaMalloc((void **) &dev_");
+ p = isl_printer_print_str(p, prog->array[i].name);
+ p = isl_printer_print_str(p, ", ");
+ p = gpu_array_info_print_size(p, &prog->array[i]);
+ p = isl_printer_print_str(p, "));");
+ p = isl_printer_end_line(p);
+ }
+ p = isl_printer_start_line(p);
+ p = isl_printer_end_line(p);
+ return p;
+}
+
+/* Print code to "p" for copying "array" from the host to the device
+ * in its entirety. The bounds on the extent of "array" have
+ * been precomputed in extract_array_info and are used in
+ * gpu_array_info_print_size.
+ */
+static __isl_give isl_printer *copy_array_to_device(__isl_take isl_printer *p,
+ struct gpu_array_info *array)
+{
+ p = isl_printer_start_line(p);
+ p = isl_printer_print_str(p, "cudaCheckReturn(cudaMemcpy(dev_");
+ p = isl_printer_print_str(p, array->name);
+ p = isl_printer_print_str(p, ", ");
+
+ if (gpu_array_is_scalar(array))
+ p = isl_printer_print_str(p, "&");
+ p = isl_printer_print_str(p, array->name);
+ p = isl_printer_print_str(p, ", ");
+
+ p = gpu_array_info_print_size(p, array);
+ p = isl_printer_print_str(p, ", cudaMemcpyHostToDevice));");
+ p = isl_printer_end_line(p);
+
+ return p;
+}
+
+/* Print code to "p" for copying "array" back from the device to the host
+ * in its entirety. The bounds on the extent of "array" have
+ * been precomputed in extract_array_info and are used in
+ * gpu_array_info_print_size.
+ */
+static __isl_give isl_printer *copy_array_from_device(
+ __isl_take isl_printer *p, struct gpu_array_info *array)
+{
+ p = isl_printer_start_line(p);
+ p = isl_printer_print_str(p, "cudaCheckReturn(cudaMemcpy(");
+ if (gpu_array_is_scalar(array))
+ p = isl_printer_print_str(p, "&");
+ p = isl_printer_print_str(p, array->name);
+ p = isl_printer_print_str(p, ", dev_");
+ p = isl_printer_print_str(p, array->name);
+ p = isl_printer_print_str(p, ", ");
+ p = gpu_array_info_print_size(p, array);
+ p = isl_printer_print_str(p, ", cudaMemcpyDeviceToHost));");
+ p = isl_printer_end_line(p);
+
+ return p;
+}
+
+static void print_reverse_list(FILE *out, int len, int *list)
+{
+ int i;
+
+ if (len == 0)
+ return;
+
+ fprintf(out, "(");
+ for (i = 0; i < len; ++i) {
+ if (i)
+ fprintf(out, ", ");
+ fprintf(out, "%d", list[len - 1 - i]);
+ }
+ fprintf(out, ")");
+}
+
+/* Print the effective grid size as a list of the sizes in each
+ * dimension, from innermost to outermost.
+ */
+static __isl_give isl_printer *print_grid_size(__isl_take isl_printer *p,
+ struct ppcg_kernel *kernel)
+{
+ int i;
+ int dim;
+
+ dim = isl_multi_pw_aff_dim(kernel->grid_size, isl_dim_set);
+ if (dim == 0)
+ return p;
+
+ p = isl_printer_print_str(p, "(");
+ for (i = dim - 1; i >= 0; --i) {
+ isl_pw_aff *bound;
+
+ bound = isl_multi_pw_aff_get_pw_aff(kernel->grid_size, i);
+ p = isl_printer_print_pw_aff(p, bound);
+ isl_pw_aff_free(bound);
+
+ if (i > 0)
+ p = isl_printer_print_str(p, ", ");
+ }
+
+ p = isl_printer_print_str(p, ")");
+
+ return p;
+}
+
+/* Print the grid definition.
+ */
+static __isl_give isl_printer *print_grid(__isl_take isl_printer *p,
+ struct ppcg_kernel *kernel)
+{
+ p = isl_printer_start_line(p);
+ p = isl_printer_print_str(p, "dim3 k");
+ p = isl_printer_print_int(p, kernel->id);
+ p = isl_printer_print_str(p, "_dimGrid");
+ p = print_grid_size(p, kernel);
+ p = isl_printer_print_str(p, ";");
+ p = isl_printer_end_line(p);
+
+ return p;
+}
+
+/* Print the arguments to a kernel declaration or call. If "types" is set,
+ * then print a declaration (including the types of the arguments).
+ *
+ * The arguments are printed in the following order
+ * - the arrays accessed by the kernel
+ * - the parameters
+ * - the host loop iterators
+ */
+static __isl_give isl_printer *print_kernel_arguments(__isl_take isl_printer *p,
+ struct gpu_prog *prog, struct ppcg_kernel *kernel, int types)
+{
+ int i, n;
+ int first = 1;
+ unsigned nparam;
+ isl_space *space;
+ const char *type;
+
+ for (i = 0; i < prog->n_array; ++i) {
+ int required;
+
+ required = ppcg_kernel_requires_array_argument(kernel, i);
+ if (required < 0)
+ return isl_printer_free(p);
+ if (!required)
+ continue;
+
+ if (!first)
+ p = isl_printer_print_str(p, ", ");
+
+ if (types)
+ p = gpu_array_info_print_declaration_argument(p,
+ &prog->array[i], NULL);
+ else
+ p = gpu_array_info_print_call_argument(p,
+ &prog->array[i]);
+
+ first = 0;
+ }
+
+ space = isl_union_set_get_space(kernel->arrays);
+ nparam = isl_space_dim(space, isl_dim_param);
+ for (i = 0; i < nparam; ++i) {
+ const char *name;
+
+ name = isl_space_get_dim_name(space, isl_dim_param, i);
+
+ if (!first)
+ p = isl_printer_print_str(p, ", ");
+ if (types)
+ p = isl_printer_print_str(p, "int ");
+ p = isl_printer_print_str(p, name);
+
+ first = 0;
+ }
+ isl_space_free(space);
+
+ n = isl_space_dim(kernel->space, isl_dim_set);
+ type = isl_options_get_ast_iterator_type(prog->ctx);
+ for (i = 0; i < n; ++i) {
+ const char *name;
+
+ if (!first)
+ p = isl_printer_print_str(p, ", ");
+ name = isl_space_get_dim_name(kernel->space, isl_dim_set, i);
+ if (types) {
+ p = isl_printer_print_str(p, type);
+ p = isl_printer_print_str(p, " ");
+ }
+ p = isl_printer_print_str(p, name);
+
+ first = 0;
+ }
+
+ return p;
+}
+
+/* Print the header of the given kernel.
+ */
+static __isl_give isl_printer *print_kernel_header(__isl_take isl_printer *p,
+ struct gpu_prog *prog, struct ppcg_kernel *kernel)
+{
+ p = isl_printer_start_line(p);
+ p = isl_printer_print_str(p, "__global__ void kernel");
+ p = isl_printer_print_int(p, kernel->id);
+ p = isl_printer_print_str(p, "(");
+ p = print_kernel_arguments(p, prog, kernel, 1);
+ p = isl_printer_print_str(p, ")");
+
+ return p;
+}
+
+/* Print the header of the given kernel to both gen->cuda.kernel_h
+ * and gen->cuda.kernel_c.
+ */
+static void print_kernel_headers(struct gpu_prog *prog,
+ struct ppcg_kernel *kernel, struct cuda_info *cuda)
+{
+ isl_printer *p;
+
+ p = isl_printer_to_file(prog->ctx, cuda->kernel_h);
+ p = isl_printer_set_output_format(p, ISL_FORMAT_C);
+ p = print_kernel_header(p, prog, kernel);
+ p = isl_printer_print_str(p, ";");
+ p = isl_printer_end_line(p);
+ isl_printer_free(p);
+
+ p = isl_printer_to_file(prog->ctx, cuda->kernel_c);
+ p = isl_printer_set_output_format(p, ISL_FORMAT_C);
+ p = print_kernel_header(p, prog, kernel);
+ p = isl_printer_end_line(p);
+ isl_printer_free(p);
+}
+
+static void print_indent(FILE *dst, int indent)
+{
+ fprintf(dst, "%*s", indent, "");
+}
+
+/* Print a list of iterators of type "type" with names "ids" to "out".
+ * Each iterator is assigned one of the cuda identifiers in cuda_dims.
+ * In particular, the last iterator is assigned the x identifier
+ * (the first in the list of cuda identifiers).
+ */
+static void print_iterators(FILE *out, const char *type,
+ __isl_keep isl_id_list *ids, const char *cuda_dims[])
+{
+ int i, n;
+
+ n = isl_id_list_n_id(ids);
+ if (n <= 0)
+ return;
+ print_indent(out, 4);
+ fprintf(out, "%s ", type);
+ for (i = 0; i < n; ++i) {
+ isl_id *id;
+
+ if (i)
+ fprintf(out, ", ");
+ id = isl_id_list_get_id(ids, i);
+ fprintf(out, "%s = %s", isl_id_get_name(id),
+ cuda_dims[n - 1 - i]);
+ isl_id_free(id);
+ }
+ fprintf(out, ";\n");
+}
+
+static void print_kernel_iterators(FILE *out, struct ppcg_kernel *kernel)
+{
+ isl_ctx *ctx = isl_ast_node_get_ctx(kernel->tree);
+ const char *type;
+ const char *block_dims[] = { "blockIdx.x", "blockIdx.y" };
+ const char *thread_dims[] = { "threadIdx.x", "threadIdx.y",
+ "threadIdx.z" };
+
+ type = isl_options_get_ast_iterator_type(ctx);
+
+ print_iterators(out, type, kernel->block_ids, block_dims);
+ print_iterators(out, type, kernel->thread_ids, thread_dims);
+}
+
+static __isl_give isl_printer *print_kernel_var(__isl_take isl_printer *p,
+ struct ppcg_kernel_var *var)
+{
+ int j;
+
+ p = isl_printer_start_line(p);
+ if (var->type == ppcg_access_shared)
+ p = isl_printer_print_str(p, "__shared__ ");
+ p = isl_printer_print_str(p, var->array->type);
+ p = isl_printer_print_str(p, " ");
+ p = isl_printer_print_str(p, var->name);
+ for (j = 0; j < var->array->n_index; ++j) {
+ isl_val *v;
+
+ p = isl_printer_print_str(p, "[");
+ v = isl_vec_get_element_val(var->size, j);
+ p = isl_printer_print_val(p, v);
+ isl_val_free(v);
+ p = isl_printer_print_str(p, "]");
+ }
+ p = isl_printer_print_str(p, ";");
+ p = isl_printer_end_line(p);
+
+ return p;
+}
+
+static __isl_give isl_printer *print_kernel_vars(__isl_take isl_printer *p,
+ struct ppcg_kernel *kernel)
+{
+ int i;
+
+ for (i = 0; i < kernel->n_var; ++i)
+ p = print_kernel_var(p, &kernel->var[i]);
+
+ return p;
+}
+
+/* Print a sync statement.
+ */
+static __isl_give isl_printer *print_sync(__isl_take isl_printer *p,
+ struct ppcg_kernel_stmt *stmt)
+{
+ p = isl_printer_start_line(p);
+ p = isl_printer_print_str(p, "__syncthreads();");
+ p = isl_printer_end_line(p);
+
+ return p;
+}
+
+/* This function is called for each user statement in the AST,
+ * i.e., for each kernel body statement, copy statement or sync statement.
+ */
+static __isl_give isl_printer *print_kernel_stmt(__isl_take isl_printer *p,
+ __isl_take isl_ast_print_options *print_options,
+ __isl_keep isl_ast_node *node, void *user)
+{
+ isl_id *id;
+ struct ppcg_kernel_stmt *stmt;
+
+ id = isl_ast_node_get_annotation(node);
+ stmt = isl_id_get_user(id);
+ isl_id_free(id);
+
+ isl_ast_print_options_free(print_options);
+
+ switch (stmt->type) {
+ case ppcg_kernel_copy:
+ return ppcg_kernel_print_copy(p, stmt);
+ case ppcg_kernel_sync:
+ return print_sync(p, stmt);
+ case ppcg_kernel_domain:
+ return ppcg_kernel_print_domain(p, stmt);
+ }
+
+ return p;
+}
+
+static void print_kernel(struct gpu_prog *prog, struct ppcg_kernel *kernel,
+ struct cuda_info *cuda)
+{
+ isl_ctx *ctx = isl_ast_node_get_ctx(kernel->tree);
+ isl_ast_print_options *print_options;
+ isl_printer *p;
+
+ print_kernel_headers(prog, kernel, cuda);
+ fprintf(cuda->kernel_c, "{\n");
+ print_kernel_iterators(cuda->kernel_c, kernel);
+
+ p = isl_printer_to_file(ctx, cuda->kernel_c);
+ p = isl_printer_set_output_format(p, ISL_FORMAT_C);
+ p = isl_printer_indent(p, 4);
+
+ p = print_kernel_vars(p, kernel);
+ p = isl_printer_end_line(p);
+ p = isl_ast_op_type_print_macro(isl_ast_op_fdiv_q, p);
+ p = ppcg_print_macros(p, kernel->tree);
+
+ print_options = isl_ast_print_options_alloc(ctx);
+ print_options = isl_ast_print_options_set_print_user(print_options,
+ &print_kernel_stmt, NULL);
+ p = isl_ast_node_print(kernel->tree, p, print_options);
+ isl_printer_free(p);
+
+ fprintf(cuda->kernel_c, "}\n");
+}
+
+/* Print a statement for copying an array to or from the device.
+ * The statement identifier is called "to_device_<array name>" or
+ * "from_device_<array name>" and its user pointer points
+ * to the gpu_array_info of the array that needs to be copied.
+ *
+ * Extract the array from the identifier and call
+ * copy_array_to_device or copy_array_from_device.
+ */
+static __isl_give isl_printer *print_to_from_device(__isl_take isl_printer *p,
+ __isl_keep isl_ast_node *node, struct gpu_prog *prog)
+{
+ isl_ast_expr *expr, *arg;
+ isl_id *id;
+ const char *name;
+ struct gpu_array_info *array;
+
+ expr = isl_ast_node_user_get_expr(node);
+ arg = isl_ast_expr_get_op_arg(expr, 0);
+ id = isl_ast_expr_get_id(arg);
+ name = isl_id_get_name(id);
+ array = isl_id_get_user(id);
+ isl_id_free(id);
+ isl_ast_expr_free(arg);
+ isl_ast_expr_free(expr);
+
+ if (!name)
+ array = NULL;
+ if (!array)
+ return isl_printer_free(p);
+
+ if (!prefixcmp(name, "to_device"))
+ return copy_array_to_device(p, array);
+ else
+ return copy_array_from_device(p, array);
+}
+
+struct print_host_user_data {
+ struct cuda_info *cuda;
+ struct gpu_prog *prog;
+};
+
+/* Print the user statement of the host code to "p".
+ *
+ * The host code may contain original user statements, kernel launches and
+ * statements that copy data to/from the device.
+ * The original user statements and the kernel launches have
+ * an associated annotation, while the data copy statements do not.
+ * The latter are handled by print_to_from_device.
+ * The annotation on the user statements is called "user".
+ *
+ * In case of a kernel launch, print a block of statements that
+ * defines the grid and the block and then launches the kernel.
+ */
+static __isl_give isl_printer *print_host_user(__isl_take isl_printer *p,
+ __isl_take isl_ast_print_options *print_options,
+ __isl_keep isl_ast_node *node, void *user)
+{
+ isl_id *id;
+ int is_user;
+ struct ppcg_kernel *kernel;
+ struct ppcg_kernel_stmt *stmt;
+ struct print_host_user_data *data;
+
+ isl_ast_print_options_free(print_options);
+
+ data = (struct print_host_user_data *) user;
+
+ id = isl_ast_node_get_annotation(node);
+ if (!id)
+ return print_to_from_device(p, node, data->prog);
+
+ is_user = !strcmp(isl_id_get_name(id), "user");
+ kernel = is_user ? NULL : isl_id_get_user(id);
+ stmt = is_user ? isl_id_get_user(id) : NULL;
+ isl_id_free(id);
+
+ if (is_user)
+ return ppcg_kernel_print_domain(p, stmt);
+
+ p = isl_printer_start_line(p);
+ p = isl_printer_print_str(p, "{");
+ p = isl_printer_end_line(p);
+ p = isl_printer_indent(p, 2);
+
+ p = isl_printer_start_line(p);
+ p = isl_printer_print_str(p, "dim3 k");
+ p = isl_printer_print_int(p, kernel->id);
+ p = isl_printer_print_str(p, "_dimBlock");
+ print_reverse_list(isl_printer_get_file(p),
+ kernel->n_block, kernel->block_dim);
+ p = isl_printer_print_str(p, ";");
+ p = isl_printer_end_line(p);
+
+ p = print_grid(p, kernel);
+
+ p = isl_printer_start_line(p);
+ p = isl_printer_print_str(p, "kernel");
+ p = isl_printer_print_int(p, kernel->id);
+ p = isl_printer_print_str(p, " <<<k");
+ p = isl_printer_print_int(p, kernel->id);
+ p = isl_printer_print_str(p, "_dimGrid, k");
+ p = isl_printer_print_int(p, kernel->id);
+ p = isl_printer_print_str(p, "_dimBlock>>> (");
+ p = print_kernel_arguments(p, data->prog, kernel, 0);
+ p = isl_printer_print_str(p, ");");
+ p = isl_printer_end_line(p);
+
+ p = isl_printer_start_line(p);
+ p = isl_printer_print_str(p, "cudaCheckKernel();");
+ p = isl_printer_end_line(p);
+
+ p = isl_printer_indent(p, -2);
+ p = isl_printer_start_line(p);
+ p = isl_printer_print_str(p, "}");
+ p = isl_printer_end_line(p);
+
+ p = isl_printer_start_line(p);
+ p = isl_printer_end_line(p);
+
+ print_kernel(data->prog, kernel, data->cuda);
+
+ return p;
+}
+
+static __isl_give isl_printer *print_host_code(__isl_take isl_printer *p,
+ struct gpu_prog *prog, __isl_keep isl_ast_node *tree,
+ struct cuda_info *cuda)
+{
+ isl_ast_print_options *print_options;
+ isl_ctx *ctx = isl_ast_node_get_ctx(tree);
+ struct print_host_user_data data = { cuda, prog };
+
+ print_options = isl_ast_print_options_alloc(ctx);
+ print_options = isl_ast_print_options_set_print_user(print_options,
+ &print_host_user, &data);
+
+ p = ppcg_print_macros(p, tree);
+ p = isl_ast_node_print(tree, p, print_options);
+
+ return p;
+}
+
+static __isl_give isl_printer *free_device_arrays(__isl_take isl_printer *p,
+ struct gpu_prog *prog)
+{
+ int i;
+
+ for (i = 0; i < prog->n_array; ++i) {
+ if (!gpu_array_requires_device_allocation(&prog->array[i]))
+ continue;
+ p = isl_printer_start_line(p);
+ p = isl_printer_print_str(p, "cudaCheckReturn(cudaFree(dev_");
+ p = isl_printer_print_str(p, prog->array[i].name);
+ p = isl_printer_print_str(p, "));");
+ p = isl_printer_end_line(p);
+ }
+
+ return p;
+}
+
+/* Given a gpu_prog "prog" and the corresponding transformed AST
+ * "tree", print the entire CUDA code to "p".
+ * "types" collects the types for which a definition has already
+ * been printed.
+ */
+static __isl_give isl_printer *print_cuda(__isl_take isl_printer *p,
+ struct gpu_prog *prog, __isl_keep isl_ast_node *tree,
+ struct gpu_types *types, void *user)
+{
+ struct cuda_info *cuda = user;
+ isl_printer *kernel;
+
+ kernel = isl_printer_to_file(isl_printer_get_ctx(p), cuda->kernel_c);
+ kernel = isl_printer_set_output_format(kernel, ISL_FORMAT_C);
+ kernel = gpu_print_types(kernel, types, prog);
+ isl_printer_free(kernel);
+
+ if (!kernel)
+ return isl_printer_free(p);
+
+ p = ppcg_start_block(p);
+
+ p = print_cuda_macros(p);
+
+ p = gpu_print_local_declarations(p, prog);
+ p = declare_device_arrays(p, prog);
+ p = allocate_device_arrays(p, prog);
+
+ p = print_host_code(p, prog, tree, cuda);
+
+ p = free_device_arrays(p, prog);
+
+ p = ppcg_end_block(p);
+
+ return p;
+}
+
+/* Transform the code in the file called "input" by replacing
+ * all scops by corresponding CUDA code.
+ * The names of the output files are derived from "input".
+ *
+ * We let generate_gpu do all the hard work and then let it call
+ * us back for printing the AST in print_cuda.
+ *
+ * To prepare for this printing, we first open the output files
+ * and we close them after generate_gpu has finished.
+ */
+int generate_cuda(isl_ctx *ctx, struct ppcg_options *options,
+ const char *input)
+{
+ struct cuda_info cuda;
+ int r;
+
+ cuda_open_files(&cuda, input);
+
+ r = generate_gpu(ctx, input, cuda.host_c, options, &print_cuda, &cuda);
+
+ cuda_close_files(&cuda);
+
+ return r;
+}
OpenPOWER on IntegriCloud