diff options
| author | Jonathan Peyton <jonathan.l.peyton@intel.com> | 2019-05-01 17:54:01 +0000 |
|---|---|---|
| committer | Jonathan Peyton <jonathan.l.peyton@intel.com> | 2019-05-01 17:54:01 +0000 |
| commit | a8426ac8c2db8f4b589c4f3e635085d5fa7a54f3 (patch) | |
| tree | 61a8f25a4d7a7d831a0c3afba14b286d3cb9a1f9 /openmp/runtime/test | |
| parent | 9f6861449457046cfff468613ddd14ed8a6e12fb (diff) | |
| download | bcm5719-llvm-a8426ac8c2db8f4b589c4f3e635085d5fa7a54f3.tar.gz bcm5719-llvm-a8426ac8c2db8f4b589c4f3e635085d5fa7a54f3.zip | |
[OpenMP] Implement task modifier for reduction clause
Implemented task modifier in two versions - one without taking into account
omp_orig variable (the omp_orig still can be processed by compiler without help
of the library, but each reduction object will need separate initializer with
global access to omp_orig), another with omp_orig variable included into
interface (single initializer can be used for multiple reduction objects of
the same type). Second version can be used when the omp_orig is not globally
accessible, or to optimize code in case of multiple reduction objects
of the same type.
Patch by Andrey Churbanov
Differential Revision: https://reviews.llvm.org/D60976
llvm-svn: 359710
Diffstat (limited to 'openmp/runtime/test')
4 files changed, 414 insertions, 0 deletions
diff --git a/openmp/runtime/test/tasking/kmp_task_modifier_simple_par_new.cpp b/openmp/runtime/test/tasking/kmp_task_modifier_simple_par_new.cpp new file mode 100644 index 00000000000..f2dea9d7b9a --- /dev/null +++ b/openmp/runtime/test/tasking/kmp_task_modifier_simple_par_new.cpp @@ -0,0 +1,99 @@ +// RUN: %libomp-cxx-compile-and-run + +#include <stdio.h> +#include <omp.h> + +#define NT 4 +#define INIT 10 + +/* +The test emulates code generation needed for reduction with task modifier on +parallel construct. + +Note: tasks could just use in_reduction clause, but compiler does not accept +this because of bug: it mistakenly requires reduction item to be shared, which +is only true for reduction on worksharing and wrong for task reductions. +*/ + +//------------------------------------------------ +// OpenMP runtime library routines +#ifdef __cplusplus +extern "C" { +#endif +extern void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void *item); +// extern void* __kmpc_task_reduction_modifier_init(void *loc, int gtid, int +// is_ws, int num, void* data); +extern void *__kmpc_taskred_modifier_init(void *loc, int gtid, int is_ws, + int num, void *data); +extern void __kmpc_task_reduction_modifier_fini(void *loc, int gtid, int is_ws); +extern int __kmpc_global_thread_num(void *); +#ifdef __cplusplus +} +#endif + +//------------------------------------------------ +// Compiler-generated code + +typedef struct red_input { + void *reduce_shar; /**< shared between tasks item to reduce into */ + void *reduce_orig; /**< original reduction item used for initialization */ + size_t reduce_size; /**< size of data item in bytes */ + // three compiler-generated routines (init, fini are optional): + void *reduce_init; /**< data initialization routine (single paramemter) */ + void *reduce_fini; /**< data finalization routine */ + void *reduce_comb; /**< data combiner routine */ + unsigned flags; /**< flags for additional info from compiler */ +} red_input_t; + +void i_comb(void *lhs, void *rhs) { *(int *)lhs += *(int *)rhs; } + +int main() { + int var = INIT; + int *p_var_orig = &var; + omp_set_dynamic(0); + omp_set_num_threads(NT); +// #pragma omp parallel reduction(task,+:var) +#pragma omp parallel reduction(+ : var) shared(p_var_orig) + { + int gtid = __kmpc_global_thread_num(NULL); + void *tg; // pointer to taskgroup (optional) + red_input_t r_var; + r_var.reduce_shar = &var; + r_var.reduce_orig = + p_var_orig; // not used in this test but illustrates codegen + r_var.reduce_size = sizeof(var); + r_var.reduce_init = NULL; + r_var.reduce_fini = NULL; + r_var.reduce_comb = (void *)&i_comb; + tg = __kmpc_taskred_modifier_init( + NULL, // ident_t loc; + gtid, + 0, // 1 - worksharing construct, 0 - parallel + 1, // number of reduction objects + &r_var // related data + ); + var++; +#pragma omp task /*in_reduction(+:var)*/ shared(var) + { + int gtid = __kmpc_global_thread_num(NULL); + int *p_var = (int *)__kmpc_task_reduction_get_th_data(gtid, tg, &var); + *p_var += 1; + } + if (omp_get_thread_num() > 0) { +#pragma omp task /*in_reduction(+:var)*/ shared(var) + { + int gtid = __kmpc_global_thread_num(NULL); + int *p_var = (int *)__kmpc_task_reduction_get_th_data(gtid, tg, &var); + *p_var += 1; + } + } + __kmpc_task_reduction_modifier_fini(NULL, gtid, 0); + } + if (var == INIT + NT * 3 - 1) { + printf("passed\n"); + return 0; + } else { + printf("failed: var = %d (!= %d)\n", var, INIT + NT * 3 - 1); + return 1; + } +} diff --git a/openmp/runtime/test/tasking/kmp_task_modifier_simple_par_old.cpp b/openmp/runtime/test/tasking/kmp_task_modifier_simple_par_old.cpp new file mode 100644 index 00000000000..2526d4e9db8 --- /dev/null +++ b/openmp/runtime/test/tasking/kmp_task_modifier_simple_par_old.cpp @@ -0,0 +1,93 @@ +// RUN: %libomp-cxx-compile-and-run + +#include <stdio.h> +#include <omp.h> + +#define NT 4 +#define INIT 10 + +/* +The test emulates code generation needed for reduction with task modifier on +parallel construct. + +Note: tasks could just use in_reduction clause, but compiler does not accept +this because of bug: it mistakenly requires reduction item to be shared, which +is only true for reduction on worksharing and wrong for task reductions. +*/ + +//------------------------------------------------ +// OpenMP runtime library routines +#ifdef __cplusplus +extern "C" { +#endif +extern void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void *item); +extern void *__kmpc_task_reduction_modifier_init(void *loc, int gtid, int is_ws, + int num, void *data); +extern void __kmpc_task_reduction_modifier_fini(void *loc, int gtid, int is_ws); +extern int __kmpc_global_thread_num(void *); +#ifdef __cplusplus +} +#endif + +//------------------------------------------------ +// Compiler-generated code + +typedef struct red_input { + void *reduce_shar; /**< shared between tasks item to reduce into */ + size_t reduce_size; /**< size of data item in bytes */ + // three compiler-generated routines (init, fini are optional): + void *reduce_init; /**< data initialization routine (single paramemter) */ + void *reduce_fini; /**< data finalization routine */ + void *reduce_comb; /**< data combiner routine */ + unsigned flags; /**< flags for additional info from compiler */ +} red_input_t; + +void i_comb(void *lhs, void *rhs) { *(int *)lhs += *(int *)rhs; } + +int main() { + int var = INIT; + omp_set_dynamic(0); + omp_set_num_threads(NT); +// #pragma omp parallel reduction(task,+:var) +#pragma omp parallel reduction(+ : var) + { + int gtid = __kmpc_global_thread_num(NULL); + void *tg; // pointer to taskgroup (optional) + red_input_t r_var; + r_var.reduce_shar = &var; + r_var.reduce_size = sizeof(var); + r_var.reduce_init = NULL; + r_var.reduce_fini = NULL; + r_var.reduce_comb = (void *)&i_comb; + tg = __kmpc_task_reduction_modifier_init( + NULL, // ident_t loc; + gtid, + 0, // 1 - worksharing construct, 0 - parallel + 1, // number of reduction objects + &r_var // related data + ); + var++; +#pragma omp task /*in_reduction(+:var)*/ shared(var) + { + int gtid = __kmpc_global_thread_num(NULL); + int *p_var = (int *)__kmpc_task_reduction_get_th_data(gtid, tg, &var); + *p_var += 1; + } + if (omp_get_thread_num() > 0) { +#pragma omp task /*in_reduction(+:var)*/ shared(var) + { + int gtid = __kmpc_global_thread_num(NULL); + int *p_var = (int *)__kmpc_task_reduction_get_th_data(gtid, tg, &var); + *p_var += 1; + } + } + __kmpc_task_reduction_modifier_fini(NULL, gtid, 0); + } + if (var == INIT + NT * 3 - 1) { + printf("passed\n"); + return 0; + } else { + printf("failed: var = %d (!= %d)\n", var, INIT + NT * 3 - 1); + return 1; + } +} diff --git a/openmp/runtime/test/tasking/kmp_task_modifier_simple_ws_new.cpp b/openmp/runtime/test/tasking/kmp_task_modifier_simple_ws_new.cpp new file mode 100644 index 00000000000..e66cda91aee --- /dev/null +++ b/openmp/runtime/test/tasking/kmp_task_modifier_simple_ws_new.cpp @@ -0,0 +1,114 @@ +// RUN: %libomp-cxx-compile-and-run + +#include <stdio.h> +#include <omp.h> + +#define NT 4 +#define INIT 10 + +/* +The test emulates code generation needed for reduction with task modifier on +parallel construct. + +Note: tasks could just use in_reduction clause, but compiler does not accept +this because of bug: it mistakenly requires reduction item to be shared, which +is only true for reduction on worksharing and wrong for task reductions. +*/ + +//------------------------------------------------ +// OpenMP runtime library routines +#ifdef __cplusplus +extern "C" { +#endif +extern void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void *item); +// extern void* __kmpc_task_reduction_modifier_init(void *loc, int gtid, int +// flags, int num, void* data); +extern void *__kmpc_taskred_modifier_init(void *loc, int gtid, int is_ws, + int num, void *data); +extern void __kmpc_task_reduction_modifier_fini(void *loc, int gtid, int is_ws); +extern int __kmpc_global_thread_num(void *); +#ifdef __cplusplus +} +#endif + +//------------------------------------------------ +// Compiler-generated code + +typedef struct red_input { + void *reduce_shar; /**< shared between tasks item to reduce into */ + void *reduce_orig; /**< original reduction item used for initialization */ + size_t reduce_size; /**< size of data item in bytes */ + // three compiler-generated routines (init, fini are optional): + void *reduce_init; /**< data initialization routine (single paramemter) */ + void *reduce_fini; /**< data finalization routine */ + void *reduce_comb; /**< data combiner routine */ + unsigned flags; /**< flags for additional info from compiler */ +} red_input_t; + +void i_comb(void *lhs, void *rhs) { *(int *)lhs += *(int *)rhs; } + +int main() { + int var = INIT; + int *p_var_orig = &var; + int i; + omp_set_dynamic(0); + omp_set_num_threads(NT); +#pragma omp parallel private(i) shared(p_var_orig) +// #pragma omp for reduction(task,+:var) +#pragma omp for reduction(+ : var) + for (i = 0; i < NT; ++i) // single iteration per thread + { + // generated code, which actually should be placed before + // loop iterations distribution, but placed here just to show the idea, + // and to keep correctness the loop count is equal to number of threads + int gtid = __kmpc_global_thread_num(NULL); + void *tg; // pointer to taskgroup (optional) + red_input_t r_var; + r_var.reduce_shar = &var; + r_var.reduce_orig = + p_var_orig; // not used in this test but illustrates codegen + r_var.reduce_size = sizeof(var); + r_var.reduce_init = NULL; + r_var.reduce_fini = NULL; + r_var.reduce_comb = (void *)&i_comb; + tg = __kmpc_taskred_modifier_init( + NULL, // ident_t loc; + gtid, + 1, // 1 - worksharing construct, 0 - parallel + 1, // number of reduction objects + &r_var // related data + ); + // end of generated code + var++; +#pragma omp task /*in_reduction(+:var)*/ shared(var) + { + // emulate task reduction here because of compiler bug: + // it mistakenly declines to accept in_reduction because var is private + // outside. + int gtid = __kmpc_global_thread_num(NULL); + int *p_var = (int *)__kmpc_task_reduction_get_th_data(gtid, tg, &var); + *p_var += 1; + } + if (omp_get_thread_num() > 0) { +#pragma omp task /*in_reduction(+:var)*/ shared(var) + { + int gtid = __kmpc_global_thread_num(NULL); + int *p_var = (int *)__kmpc_task_reduction_get_th_data(gtid, tg, &var); + *p_var += 1; + } + } + // generated code, which actually should be placed after loop completion + // but before barrier and before loop reduction. It placed here just to show + // the idea, + // and to keep correctness the loop count is equal to number of threads + __kmpc_task_reduction_modifier_fini(NULL, gtid, 1); + // end of generated code + } + if (var == INIT + NT * 3 - 1) { + printf("passed\n"); + return 0; + } else { + printf("failed: var = %d (!= %d)\n", var, INIT + NT * 3 - 1); + return 1; + } +} diff --git a/openmp/runtime/test/tasking/kmp_task_modifier_simple_ws_old.cpp b/openmp/runtime/test/tasking/kmp_task_modifier_simple_ws_old.cpp new file mode 100644 index 00000000000..97d5cb5d91c --- /dev/null +++ b/openmp/runtime/test/tasking/kmp_task_modifier_simple_ws_old.cpp @@ -0,0 +1,108 @@ +// RUN: %libomp-cxx-compile-and-run + +#include <stdio.h> +#include <omp.h> + +#define NT 4 +#define INIT 10 + +/* +The test emulates code generation needed for reduction with task modifier on +parallel construct. + +Note: tasks could just use in_reduction clause, but compiler does not accept +this because of bug: it mistakenly requires reduction item to be shared, which +is only true for reduction on worksharing and wrong for task reductions. +*/ + +//------------------------------------------------ +// OpenMP runtime library routines +#ifdef __cplusplus +extern "C" { +#endif +extern void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void *item); +extern void *__kmpc_task_reduction_modifier_init(void *loc, int gtid, int is_ws, + int num, void *data); +extern void __kmpc_task_reduction_modifier_fini(void *loc, int gtid, int is_ws); +extern int __kmpc_global_thread_num(void *); +#ifdef __cplusplus +} +#endif + +//------------------------------------------------ +// Compiler-generated code + +typedef struct red_input { + void *reduce_shar; /**< shared between tasks item to reduce into */ + size_t reduce_size; /**< size of data item in bytes */ + // three compiler-generated routines (init, fini are optional): + void *reduce_init; /**< data initialization routine (single paramemter) */ + void *reduce_fini; /**< data finalization routine */ + void *reduce_comb; /**< data combiner routine */ + unsigned flags; /**< flags for additional info from compiler */ +} red_input_t; + +void i_comb(void *lhs, void *rhs) { *(int *)lhs += *(int *)rhs; } + +int main() { + int var = INIT; + int i; + omp_set_dynamic(0); + omp_set_num_threads(NT); +#pragma omp parallel private(i) +// #pragma omp for reduction(task,+:var) +#pragma omp for reduction(+ : var) + for (i = 0; i < NT; ++i) // single iteration per thread + { + // generated code, which actually should be placed before + // loop iterations distribution, but placed here just to show the idea, + // and to keep correctness the loop count is equal to number of threads + int gtid = __kmpc_global_thread_num(NULL); + void *tg; // pointer to taskgroup (optional) + red_input_t r_var; + r_var.reduce_shar = &var; + r_var.reduce_size = sizeof(var); + r_var.reduce_init = NULL; + r_var.reduce_fini = NULL; + r_var.reduce_comb = (void *)&i_comb; + tg = __kmpc_task_reduction_modifier_init( + NULL, // ident_t loc; + gtid, + 1, // 1 - worksharing construct, 0 - parallel + 1, // number of reduction objects + &r_var // related data + ); + // end of generated code + var++; +#pragma omp task /*in_reduction(+:var)*/ shared(var) + { + // emulate task reduction here because of compiler bug: + // it mistakenly declines to accept in_reduction because var is private + // outside. + int gtid = __kmpc_global_thread_num(NULL); + int *p_var = (int *)__kmpc_task_reduction_get_th_data(gtid, tg, &var); + *p_var += 1; + } + if (omp_get_thread_num() > 0) { +#pragma omp task /*in_reduction(+:var)*/ shared(var) + { + int gtid = __kmpc_global_thread_num(NULL); + int *p_var = (int *)__kmpc_task_reduction_get_th_data(gtid, tg, &var); + *p_var += 1; + } + } + // generated code, which actually should be placed after loop completion + // but before barrier and before loop reduction. It placed here just to show + // the idea, + // and to keep correctness the loop count is equal to number of threads + __kmpc_task_reduction_modifier_fini(NULL, gtid, 1); + // end of generated code + } + if (var == INIT + NT * 3 - 1) { + printf("passed\n"); + return 0; + } else { + printf("failed: var = %d (!= %d)\n", var, INIT + NT * 3 - 1); + return 1; + } +} |

