summaryrefslogtreecommitdiffstats
path: root/openmp/runtime/test
diff options
context:
space:
mode:
authorJonathan Peyton <jonathan.l.peyton@intel.com>2019-05-01 17:54:01 +0000
committerJonathan Peyton <jonathan.l.peyton@intel.com>2019-05-01 17:54:01 +0000
commita8426ac8c2db8f4b589c4f3e635085d5fa7a54f3 (patch)
tree61a8f25a4d7a7d831a0c3afba14b286d3cb9a1f9 /openmp/runtime/test
parent9f6861449457046cfff468613ddd14ed8a6e12fb (diff)
downloadbcm5719-llvm-a8426ac8c2db8f4b589c4f3e635085d5fa7a54f3.tar.gz
bcm5719-llvm-a8426ac8c2db8f4b589c4f3e635085d5fa7a54f3.zip
[OpenMP] Implement task modifier for reduction clause
Implemented task modifier in two versions - one without taking into account omp_orig variable (the omp_orig still can be processed by compiler without help of the library, but each reduction object will need separate initializer with global access to omp_orig), another with omp_orig variable included into interface (single initializer can be used for multiple reduction objects of the same type). Second version can be used when the omp_orig is not globally accessible, or to optimize code in case of multiple reduction objects of the same type. Patch by Andrey Churbanov Differential Revision: https://reviews.llvm.org/D60976 llvm-svn: 359710
Diffstat (limited to 'openmp/runtime/test')
-rw-r--r--openmp/runtime/test/tasking/kmp_task_modifier_simple_par_new.cpp99
-rw-r--r--openmp/runtime/test/tasking/kmp_task_modifier_simple_par_old.cpp93
-rw-r--r--openmp/runtime/test/tasking/kmp_task_modifier_simple_ws_new.cpp114
-rw-r--r--openmp/runtime/test/tasking/kmp_task_modifier_simple_ws_old.cpp108
4 files changed, 414 insertions, 0 deletions
diff --git a/openmp/runtime/test/tasking/kmp_task_modifier_simple_par_new.cpp b/openmp/runtime/test/tasking/kmp_task_modifier_simple_par_new.cpp
new file mode 100644
index 00000000000..f2dea9d7b9a
--- /dev/null
+++ b/openmp/runtime/test/tasking/kmp_task_modifier_simple_par_new.cpp
@@ -0,0 +1,99 @@
+// RUN: %libomp-cxx-compile-and-run
+
+#include <stdio.h>
+#include <omp.h>
+
+#define NT 4
+#define INIT 10
+
+/*
+The test emulates code generation needed for reduction with task modifier on
+parallel construct.
+
+Note: tasks could just use in_reduction clause, but compiler does not accept
+this because of bug: it mistakenly requires reduction item to be shared, which
+is only true for reduction on worksharing and wrong for task reductions.
+*/
+
+//------------------------------------------------
+// OpenMP runtime library routines
+#ifdef __cplusplus
+extern "C" {
+#endif
+extern void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void *item);
+// extern void* __kmpc_task_reduction_modifier_init(void *loc, int gtid, int
+// is_ws, int num, void* data);
+extern void *__kmpc_taskred_modifier_init(void *loc, int gtid, int is_ws,
+ int num, void *data);
+extern void __kmpc_task_reduction_modifier_fini(void *loc, int gtid, int is_ws);
+extern int __kmpc_global_thread_num(void *);
+#ifdef __cplusplus
+}
+#endif
+
+//------------------------------------------------
+// Compiler-generated code
+
+typedef struct red_input {
+ void *reduce_shar; /**< shared between tasks item to reduce into */
+ void *reduce_orig; /**< original reduction item used for initialization */
+ size_t reduce_size; /**< size of data item in bytes */
+ // three compiler-generated routines (init, fini are optional):
+ void *reduce_init; /**< data initialization routine (single paramemter) */
+ void *reduce_fini; /**< data finalization routine */
+ void *reduce_comb; /**< data combiner routine */
+ unsigned flags; /**< flags for additional info from compiler */
+} red_input_t;
+
+void i_comb(void *lhs, void *rhs) { *(int *)lhs += *(int *)rhs; }
+
+int main() {
+ int var = INIT;
+ int *p_var_orig = &var;
+ omp_set_dynamic(0);
+ omp_set_num_threads(NT);
+// #pragma omp parallel reduction(task,+:var)
+#pragma omp parallel reduction(+ : var) shared(p_var_orig)
+ {
+ int gtid = __kmpc_global_thread_num(NULL);
+ void *tg; // pointer to taskgroup (optional)
+ red_input_t r_var;
+ r_var.reduce_shar = &var;
+ r_var.reduce_orig =
+ p_var_orig; // not used in this test but illustrates codegen
+ r_var.reduce_size = sizeof(var);
+ r_var.reduce_init = NULL;
+ r_var.reduce_fini = NULL;
+ r_var.reduce_comb = (void *)&i_comb;
+ tg = __kmpc_taskred_modifier_init(
+ NULL, // ident_t loc;
+ gtid,
+ 0, // 1 - worksharing construct, 0 - parallel
+ 1, // number of reduction objects
+ &r_var // related data
+ );
+ var++;
+#pragma omp task /*in_reduction(+:var)*/ shared(var)
+ {
+ int gtid = __kmpc_global_thread_num(NULL);
+ int *p_var = (int *)__kmpc_task_reduction_get_th_data(gtid, tg, &var);
+ *p_var += 1;
+ }
+ if (omp_get_thread_num() > 0) {
+#pragma omp task /*in_reduction(+:var)*/ shared(var)
+ {
+ int gtid = __kmpc_global_thread_num(NULL);
+ int *p_var = (int *)__kmpc_task_reduction_get_th_data(gtid, tg, &var);
+ *p_var += 1;
+ }
+ }
+ __kmpc_task_reduction_modifier_fini(NULL, gtid, 0);
+ }
+ if (var == INIT + NT * 3 - 1) {
+ printf("passed\n");
+ return 0;
+ } else {
+ printf("failed: var = %d (!= %d)\n", var, INIT + NT * 3 - 1);
+ return 1;
+ }
+}
diff --git a/openmp/runtime/test/tasking/kmp_task_modifier_simple_par_old.cpp b/openmp/runtime/test/tasking/kmp_task_modifier_simple_par_old.cpp
new file mode 100644
index 00000000000..2526d4e9db8
--- /dev/null
+++ b/openmp/runtime/test/tasking/kmp_task_modifier_simple_par_old.cpp
@@ -0,0 +1,93 @@
+// RUN: %libomp-cxx-compile-and-run
+
+#include <stdio.h>
+#include <omp.h>
+
+#define NT 4
+#define INIT 10
+
+/*
+The test emulates code generation needed for reduction with task modifier on
+parallel construct.
+
+Note: tasks could just use in_reduction clause, but compiler does not accept
+this because of bug: it mistakenly requires reduction item to be shared, which
+is only true for reduction on worksharing and wrong for task reductions.
+*/
+
+//------------------------------------------------
+// OpenMP runtime library routines
+#ifdef __cplusplus
+extern "C" {
+#endif
+extern void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void *item);
+extern void *__kmpc_task_reduction_modifier_init(void *loc, int gtid, int is_ws,
+ int num, void *data);
+extern void __kmpc_task_reduction_modifier_fini(void *loc, int gtid, int is_ws);
+extern int __kmpc_global_thread_num(void *);
+#ifdef __cplusplus
+}
+#endif
+
+//------------------------------------------------
+// Compiler-generated code
+
+typedef struct red_input {
+ void *reduce_shar; /**< shared between tasks item to reduce into */
+ size_t reduce_size; /**< size of data item in bytes */
+ // three compiler-generated routines (init, fini are optional):
+ void *reduce_init; /**< data initialization routine (single paramemter) */
+ void *reduce_fini; /**< data finalization routine */
+ void *reduce_comb; /**< data combiner routine */
+ unsigned flags; /**< flags for additional info from compiler */
+} red_input_t;
+
+void i_comb(void *lhs, void *rhs) { *(int *)lhs += *(int *)rhs; }
+
+int main() {
+ int var = INIT;
+ omp_set_dynamic(0);
+ omp_set_num_threads(NT);
+// #pragma omp parallel reduction(task,+:var)
+#pragma omp parallel reduction(+ : var)
+ {
+ int gtid = __kmpc_global_thread_num(NULL);
+ void *tg; // pointer to taskgroup (optional)
+ red_input_t r_var;
+ r_var.reduce_shar = &var;
+ r_var.reduce_size = sizeof(var);
+ r_var.reduce_init = NULL;
+ r_var.reduce_fini = NULL;
+ r_var.reduce_comb = (void *)&i_comb;
+ tg = __kmpc_task_reduction_modifier_init(
+ NULL, // ident_t loc;
+ gtid,
+ 0, // 1 - worksharing construct, 0 - parallel
+ 1, // number of reduction objects
+ &r_var // related data
+ );
+ var++;
+#pragma omp task /*in_reduction(+:var)*/ shared(var)
+ {
+ int gtid = __kmpc_global_thread_num(NULL);
+ int *p_var = (int *)__kmpc_task_reduction_get_th_data(gtid, tg, &var);
+ *p_var += 1;
+ }
+ if (omp_get_thread_num() > 0) {
+#pragma omp task /*in_reduction(+:var)*/ shared(var)
+ {
+ int gtid = __kmpc_global_thread_num(NULL);
+ int *p_var = (int *)__kmpc_task_reduction_get_th_data(gtid, tg, &var);
+ *p_var += 1;
+ }
+ }
+ __kmpc_task_reduction_modifier_fini(NULL, gtid, 0);
+ }
+ if (var == INIT + NT * 3 - 1) {
+ printf("passed\n");
+ return 0;
+ } else {
+ printf("failed: var = %d (!= %d)\n", var, INIT + NT * 3 - 1);
+ return 1;
+ }
+}
diff --git a/openmp/runtime/test/tasking/kmp_task_modifier_simple_ws_new.cpp b/openmp/runtime/test/tasking/kmp_task_modifier_simple_ws_new.cpp
new file mode 100644
index 00000000000..e66cda91aee
--- /dev/null
+++ b/openmp/runtime/test/tasking/kmp_task_modifier_simple_ws_new.cpp
@@ -0,0 +1,114 @@
+// RUN: %libomp-cxx-compile-and-run
+
+#include <stdio.h>
+#include <omp.h>
+
+#define NT 4
+#define INIT 10
+
+/*
+The test emulates code generation needed for reduction with task modifier on
+parallel construct.
+
+Note: tasks could just use in_reduction clause, but compiler does not accept
+this because of bug: it mistakenly requires reduction item to be shared, which
+is only true for reduction on worksharing and wrong for task reductions.
+*/
+
+//------------------------------------------------
+// OpenMP runtime library routines
+#ifdef __cplusplus
+extern "C" {
+#endif
+extern void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void *item);
+// extern void* __kmpc_task_reduction_modifier_init(void *loc, int gtid, int
+// flags, int num, void* data);
+extern void *__kmpc_taskred_modifier_init(void *loc, int gtid, int is_ws,
+ int num, void *data);
+extern void __kmpc_task_reduction_modifier_fini(void *loc, int gtid, int is_ws);
+extern int __kmpc_global_thread_num(void *);
+#ifdef __cplusplus
+}
+#endif
+
+//------------------------------------------------
+// Compiler-generated code
+
+typedef struct red_input {
+ void *reduce_shar; /**< shared between tasks item to reduce into */
+ void *reduce_orig; /**< original reduction item used for initialization */
+ size_t reduce_size; /**< size of data item in bytes */
+ // three compiler-generated routines (init, fini are optional):
+ void *reduce_init; /**< data initialization routine (single paramemter) */
+ void *reduce_fini; /**< data finalization routine */
+ void *reduce_comb; /**< data combiner routine */
+ unsigned flags; /**< flags for additional info from compiler */
+} red_input_t;
+
+void i_comb(void *lhs, void *rhs) { *(int *)lhs += *(int *)rhs; }
+
+int main() {
+ int var = INIT;
+ int *p_var_orig = &var;
+ int i;
+ omp_set_dynamic(0);
+ omp_set_num_threads(NT);
+#pragma omp parallel private(i) shared(p_var_orig)
+// #pragma omp for reduction(task,+:var)
+#pragma omp for reduction(+ : var)
+ for (i = 0; i < NT; ++i) // single iteration per thread
+ {
+ // generated code, which actually should be placed before
+ // loop iterations distribution, but placed here just to show the idea,
+ // and to keep correctness the loop count is equal to number of threads
+ int gtid = __kmpc_global_thread_num(NULL);
+ void *tg; // pointer to taskgroup (optional)
+ red_input_t r_var;
+ r_var.reduce_shar = &var;
+ r_var.reduce_orig =
+ p_var_orig; // not used in this test but illustrates codegen
+ r_var.reduce_size = sizeof(var);
+ r_var.reduce_init = NULL;
+ r_var.reduce_fini = NULL;
+ r_var.reduce_comb = (void *)&i_comb;
+ tg = __kmpc_taskred_modifier_init(
+ NULL, // ident_t loc;
+ gtid,
+ 1, // 1 - worksharing construct, 0 - parallel
+ 1, // number of reduction objects
+ &r_var // related data
+ );
+ // end of generated code
+ var++;
+#pragma omp task /*in_reduction(+:var)*/ shared(var)
+ {
+ // emulate task reduction here because of compiler bug:
+ // it mistakenly declines to accept in_reduction because var is private
+ // outside.
+ int gtid = __kmpc_global_thread_num(NULL);
+ int *p_var = (int *)__kmpc_task_reduction_get_th_data(gtid, tg, &var);
+ *p_var += 1;
+ }
+ if (omp_get_thread_num() > 0) {
+#pragma omp task /*in_reduction(+:var)*/ shared(var)
+ {
+ int gtid = __kmpc_global_thread_num(NULL);
+ int *p_var = (int *)__kmpc_task_reduction_get_th_data(gtid, tg, &var);
+ *p_var += 1;
+ }
+ }
+ // generated code, which actually should be placed after loop completion
+ // but before barrier and before loop reduction. It placed here just to show
+ // the idea,
+ // and to keep correctness the loop count is equal to number of threads
+ __kmpc_task_reduction_modifier_fini(NULL, gtid, 1);
+ // end of generated code
+ }
+ if (var == INIT + NT * 3 - 1) {
+ printf("passed\n");
+ return 0;
+ } else {
+ printf("failed: var = %d (!= %d)\n", var, INIT + NT * 3 - 1);
+ return 1;
+ }
+}
diff --git a/openmp/runtime/test/tasking/kmp_task_modifier_simple_ws_old.cpp b/openmp/runtime/test/tasking/kmp_task_modifier_simple_ws_old.cpp
new file mode 100644
index 00000000000..97d5cb5d91c
--- /dev/null
+++ b/openmp/runtime/test/tasking/kmp_task_modifier_simple_ws_old.cpp
@@ -0,0 +1,108 @@
+// RUN: %libomp-cxx-compile-and-run
+
+#include <stdio.h>
+#include <omp.h>
+
+#define NT 4
+#define INIT 10
+
+/*
+The test emulates code generation needed for reduction with task modifier on
+parallel construct.
+
+Note: tasks could just use in_reduction clause, but compiler does not accept
+this because of bug: it mistakenly requires reduction item to be shared, which
+is only true for reduction on worksharing and wrong for task reductions.
+*/
+
+//------------------------------------------------
+// OpenMP runtime library routines
+#ifdef __cplusplus
+extern "C" {
+#endif
+extern void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void *item);
+extern void *__kmpc_task_reduction_modifier_init(void *loc, int gtid, int is_ws,
+ int num, void *data);
+extern void __kmpc_task_reduction_modifier_fini(void *loc, int gtid, int is_ws);
+extern int __kmpc_global_thread_num(void *);
+#ifdef __cplusplus
+}
+#endif
+
+//------------------------------------------------
+// Compiler-generated code
+
+typedef struct red_input {
+ void *reduce_shar; /**< shared between tasks item to reduce into */
+ size_t reduce_size; /**< size of data item in bytes */
+ // three compiler-generated routines (init, fini are optional):
+ void *reduce_init; /**< data initialization routine (single paramemter) */
+ void *reduce_fini; /**< data finalization routine */
+ void *reduce_comb; /**< data combiner routine */
+ unsigned flags; /**< flags for additional info from compiler */
+} red_input_t;
+
+void i_comb(void *lhs, void *rhs) { *(int *)lhs += *(int *)rhs; }
+
+int main() {
+ int var = INIT;
+ int i;
+ omp_set_dynamic(0);
+ omp_set_num_threads(NT);
+#pragma omp parallel private(i)
+// #pragma omp for reduction(task,+:var)
+#pragma omp for reduction(+ : var)
+ for (i = 0; i < NT; ++i) // single iteration per thread
+ {
+ // generated code, which actually should be placed before
+ // loop iterations distribution, but placed here just to show the idea,
+ // and to keep correctness the loop count is equal to number of threads
+ int gtid = __kmpc_global_thread_num(NULL);
+ void *tg; // pointer to taskgroup (optional)
+ red_input_t r_var;
+ r_var.reduce_shar = &var;
+ r_var.reduce_size = sizeof(var);
+ r_var.reduce_init = NULL;
+ r_var.reduce_fini = NULL;
+ r_var.reduce_comb = (void *)&i_comb;
+ tg = __kmpc_task_reduction_modifier_init(
+ NULL, // ident_t loc;
+ gtid,
+ 1, // 1 - worksharing construct, 0 - parallel
+ 1, // number of reduction objects
+ &r_var // related data
+ );
+ // end of generated code
+ var++;
+#pragma omp task /*in_reduction(+:var)*/ shared(var)
+ {
+ // emulate task reduction here because of compiler bug:
+ // it mistakenly declines to accept in_reduction because var is private
+ // outside.
+ int gtid = __kmpc_global_thread_num(NULL);
+ int *p_var = (int *)__kmpc_task_reduction_get_th_data(gtid, tg, &var);
+ *p_var += 1;
+ }
+ if (omp_get_thread_num() > 0) {
+#pragma omp task /*in_reduction(+:var)*/ shared(var)
+ {
+ int gtid = __kmpc_global_thread_num(NULL);
+ int *p_var = (int *)__kmpc_task_reduction_get_th_data(gtid, tg, &var);
+ *p_var += 1;
+ }
+ }
+ // generated code, which actually should be placed after loop completion
+ // but before barrier and before loop reduction. It placed here just to show
+ // the idea,
+ // and to keep correctness the loop count is equal to number of threads
+ __kmpc_task_reduction_modifier_fini(NULL, gtid, 1);
+ // end of generated code
+ }
+ if (var == INIT + NT * 3 - 1) {
+ printf("passed\n");
+ return 0;
+ } else {
+ printf("failed: var = %d (!= %d)\n", var, INIT + NT * 3 - 1);
+ return 1;
+ }
+}
OpenPOWER on IntegriCloud