diff options
author | Jonas Hahnfeld <Hahnfeld@itc.rwth-aachen.de> | 2016-01-28 10:39:52 +0000 |
---|---|---|
committer | Jonas Hahnfeld <Hahnfeld@itc.rwth-aachen.de> | 2016-01-28 10:39:52 +0000 |
commit | 39b686248264e7700c48d54a6e01c02563befa68 (patch) | |
tree | d5223f58665a1bffb646a1728a148831d83830b9 /openmp/runtime/src | |
parent | dbf627dbd49bf58c94b45f5f98605b2a951f5fef (diff) | |
download | bcm5719-llvm-39b686248264e7700c48d54a6e01c02563befa68.tar.gz bcm5719-llvm-39b686248264e7700c48d54a6e01c02563befa68.zip |
[OMPT] Add support for ompt_event_task_dependences and ompt_event_task_dependence_pair
The attached patch adds support for ompt_event_task_dependences and
ompt_event_task_dependence_pair events from the OMPT specification [1]. These
events only apply to OpenMP 4.0 and 4.1 (aka 4.5) because task dependencies
were introduced in 4.0.
With respect to the changes:
ompt_event_task_dependences
According to the specification, this event is raised after the task has been
created, thefore this event needs to be raised after ompt_event_task_begin
(in __kmp_task_start). However, the dependencies are known at
__kmpc_omp_task_with_deps which occurs before __kmp_task_start. My modifications
extend the ompt_task_info_t struct in order to store the dependencies of the
task when _kmpc_omp_task_with_deps occurs and then they are emitted in
__kmp_task_start just after raising the ompt_event_task_begin. The deps field
is allocated and valid until the event is raised and it is freed and set
to null afterwards.
ompt_event_task_dependence_pair
The processing of the dependences (i.e. checking whenever a dependence is
already satisfied) is done within __kmp_process_deps. That function checks
every dependence and calls the __kmp_track_dependence routine which gives some
support for graphical output. I used that routine to emit the dependence pair
but I also needed to know the sink_task. Despite the fact that the code within
KMP_SUPPORT_GRAPH_OUTPUT refers to task_sink it may be null because
sink->dn.task (there's a comment regarding this) and in fact it does not point
to a proper pointer value because the value is set in node->dn.task = task;
after the __kmp_process_deps calls in __kmp_check_deps. I have extended the
__kmp_process_deps and __kmp_track_dependence parameter list to receive the
sink_task.
[1] https://github.com/OpenMPToolsInterface/OMPT-Technical-Report/blob/target/ompt-tr.pdf
Patch by Harald Servat
Differential Revision: http://reviews.llvm.org/D14746
llvm-svn: 259038
Diffstat (limited to 'openmp/runtime/src')
-rw-r--r-- | openmp/runtime/src/include/40/ompt.h.var | 29 | ||||
-rw-r--r-- | openmp/runtime/src/include/41/ompt.h.var | 29 | ||||
-rw-r--r-- | openmp/runtime/src/kmp_taskdeps.cpp | 75 | ||||
-rw-r--r-- | openmp/runtime/src/kmp_tasking.c | 20 | ||||
-rw-r--r-- | openmp/runtime/src/ompt-event-specific.h | 8 | ||||
-rw-r--r-- | openmp/runtime/src/ompt-internal.h | 20 |
6 files changed, 170 insertions, 11 deletions
diff --git a/openmp/runtime/src/include/40/ompt.h.var b/openmp/runtime/src/include/40/ompt.h.var index a1387fab43b..ff1d86be859 100644 --- a/openmp/runtime/src/include/40/ompt.h.var +++ b/openmp/runtime/src/include/40/ompt.h.var @@ -171,7 +171,10 @@ macro (ompt_event_destroy_lock, ompt_wait_callback_t, 59) /* lock destruction */ \ macro (ompt_event_destroy_nest_lock, ompt_wait_callback_t, 60) /* nest lock destruction */ \ \ - macro (ompt_event_flush, ompt_callback_t, 61) /* after executing flush */ + macro (ompt_event_flush, ompt_callback_t, 61) /* after executing flush */ \ + \ + macro (ompt_event_task_dependences, ompt_task_dependences_callback_t, 69) /* report task dependences */\ + macro (ompt_event_task_dependence_pair, ompt_task_pair_callback_t, 70) /* report task dependence pair */ @@ -206,6 +209,23 @@ typedef struct ompt_frame_s { } ompt_frame_t; +/*--------------------- + * dependences types + *---------------------*/ + +typedef enum ompt_task_dependence_flag_e { + // a two bit field for the dependence type + ompt_task_dependence_type_out = 1, + ompt_task_dependence_type_in = 2, + ompt_task_dependence_type_inout = 3, +} ompt_task_dependence_flag_t; + +typedef struct ompt_task_dependence_s { + void *variable_addr; + uint32_t dependence_flags; +} ompt_task_dependence_t; + + /***************************************************************************** * enumerations for thread states and runtime events *****************************************************************************/ @@ -325,6 +345,13 @@ typedef void (*ompt_new_task_callback_t) ( void *task_function /* pointer to outlined function */ ); +/* task dependences */ +typedef void (*ompt_task_dependences_callback_t) ( + ompt_task_id_t task_id, /* ID of task with dependences */ + const ompt_task_dependence_t *deps,/* vector of task dependences */ + int ndeps /* number of dependences */ +); + /* program */ typedef void (*ompt_control_callback_t) ( uint64_t command, /* command of control call */ diff --git a/openmp/runtime/src/include/41/ompt.h.var b/openmp/runtime/src/include/41/ompt.h.var index da7647fc675..ecf313ab890 100644 --- a/openmp/runtime/src/include/41/ompt.h.var +++ b/openmp/runtime/src/include/41/ompt.h.var @@ -171,7 +171,10 @@ macro (ompt_event_destroy_lock, ompt_wait_callback_t, 59) /* lock destruction */ \ macro (ompt_event_destroy_nest_lock, ompt_wait_callback_t, 60) /* nest lock destruction */ \ \ - macro (ompt_event_flush, ompt_callback_t, 61) /* after executing flush */ + macro (ompt_event_flush, ompt_callback_t, 61) /* after executing flush */ \ + \ + macro (ompt_event_task_dependences, ompt_task_dependences_callback_t, 69) /* report task dependences */\ + macro (ompt_event_task_dependence_pair, ompt_task_pair_callback_t, 70) /* report task dependence pair */ @@ -206,6 +209,23 @@ typedef struct ompt_frame_s { } ompt_frame_t; +/*--------------------- + * dependences types + *---------------------*/ + +typedef enum ompt_task_dependence_flag_e { + // a two bit field for the dependence type + ompt_task_dependence_type_out = 1, + ompt_task_dependence_type_in = 2, + ompt_task_dependence_type_inout = 3, +} ompt_task_dependence_flag_t; + +typedef struct ompt_task_dependence_s { + void *variable_addr; + uint32_t dependence_flags; +} ompt_task_dependence_t; + + /***************************************************************************** * enumerations for thread states and runtime events *****************************************************************************/ @@ -325,6 +345,13 @@ typedef void (*ompt_new_task_callback_t) ( void *task_function /* pointer to outlined function */ ); +/* task dependences */ +typedef void (*ompt_task_dependences_callback_t) ( + ompt_task_id_t task_id, /* ID of task with dependences */ + const ompt_task_dependence_t *deps,/* vector of task dependences */ + int ndeps /* number of dependences */ +); + /* program */ typedef void (*ompt_control_callback_t) ( uint64_t command, /* command of control call */ diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp index da085ce50cc..c4be40a0f40 100644 --- a/openmp/runtime/src/kmp_taskdeps.cpp +++ b/openmp/runtime/src/kmp_taskdeps.cpp @@ -196,7 +196,8 @@ __kmp_depnode_list_free ( kmp_info_t *thread, kmp_depnode_list *list ) } static inline void -__kmp_track_dependence ( kmp_depnode_t *source, kmp_depnode_t *sink ) +__kmp_track_dependence ( kmp_depnode_t *source, kmp_depnode_t *sink, + kmp_task_t *sink_task ) { #ifdef KMP_SUPPORT_GRAPH_OUTPUT kmp_taskdata_t * task_source = KMP_TASK_TO_TASKDATA(source->dn.task); @@ -204,12 +205,27 @@ __kmp_track_dependence ( kmp_depnode_t *source, kmp_depnode_t *sink ) __kmp_printf("%d(%s) -> %d(%s)\n", source->dn.id, task_source->td_ident->psource, sink->dn.id, task_sink->td_ident->psource); #endif +#if OMPT_SUPPORT && OMPT_TRACE + /* OMPT tracks dependences between task (a=source, b=sink) in which + task a blocks the execution of b through the ompt_new_dependence_callback */ + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_task_dependence_pair)) + { + kmp_taskdata_t * task_source = KMP_TASK_TO_TASKDATA(source->dn.task); + kmp_taskdata_t * task_sink = KMP_TASK_TO_TASKDATA(sink_task); + + ompt_callbacks.ompt_callback(ompt_event_task_dependence_pair)( + task_source->ompt_task_info.task_id, + task_sink->ompt_task_info.task_id); + } +#endif /* OMPT_SUPPORT && OMPT_TRACE */ } template< bool filter > static inline kmp_int32 __kmp_process_deps ( kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash, - bool dep_barrier,kmp_int32 ndeps, kmp_depend_info_t *dep_list) + bool dep_barrier,kmp_int32 ndeps, kmp_depend_info_t *dep_list, + kmp_task_t *task ) { KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d processing %d depencies : dep_barrier = %d\n", filter, gtid, ndeps, dep_barrier ) ); @@ -231,7 +247,7 @@ __kmp_process_deps ( kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash, if ( indep->dn.task ) { KMP_ACQUIRE_DEPNODE(gtid,indep); if ( indep->dn.task ) { - __kmp_track_dependence(indep,node); + __kmp_track_dependence(indep,node,task); indep->dn.successors = __kmp_add_node(thread, indep->dn.successors, node); KA_TRACE(40,("__kmp_process_deps<%d>: T#%d adding dependence from %p to %p\n", filter,gtid, KMP_TASK_TO_TASKDATA(indep->dn.task), KMP_TASK_TO_TASKDATA(node->dn.task))); @@ -247,7 +263,7 @@ __kmp_process_deps ( kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash, } else if ( last_out && last_out->dn.task ) { KMP_ACQUIRE_DEPNODE(gtid,last_out); if ( last_out->dn.task ) { - __kmp_track_dependence(last_out,node); + __kmp_track_dependence(last_out,node,task); last_out->dn.successors = __kmp_add_node(thread, last_out->dn.successors, node); KA_TRACE(40,("__kmp_process_deps<%d>: T#%d adding dependence from %p to %p\n", filter,gtid, KMP_TASK_TO_TASKDATA(last_out->dn.task), KMP_TASK_TO_TASKDATA(node->dn.task))); @@ -312,8 +328,10 @@ __kmp_check_deps ( kmp_int32 gtid, kmp_depnode_t *node, kmp_task_t *task, kmp_de // used to pack all npredecessors additions into a single atomic operation at the end int npredecessors; - npredecessors = __kmp_process_deps<true>(gtid, node, hash, dep_barrier, ndeps, dep_list); - npredecessors += __kmp_process_deps<false>(gtid, node, hash, dep_barrier, ndeps_noalias, noalias_dep_list); + npredecessors = __kmp_process_deps<true>(gtid, node, hash, dep_barrier, + ndeps, dep_list, task); + npredecessors += __kmp_process_deps<false>(gtid, node, hash, dep_barrier, + ndeps_noalias, noalias_dep_list, task); node->dn.task = task; KMP_MB(); @@ -404,6 +422,51 @@ __kmpc_omp_task_with_deps( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_ta kmp_info_t *thread = __kmp_threads[ gtid ]; kmp_taskdata_t * current_task = thread->th.th_current_task; +#if OMPT_SUPPORT && OMPT_TRACE + /* OMPT grab all dependences if requested by the tool */ + if (ompt_enabled && ndeps+ndeps_noalias > 0 && + ompt_callbacks.ompt_callback(ompt_event_task_dependences)) + { + kmp_int32 i; + + new_taskdata->ompt_task_info.ndeps = ndeps+ndeps_noalias; + new_taskdata->ompt_task_info.deps = (ompt_task_dependence_t *) + KMP_OMPT_DEPS_ALLOC(thread, + (ndeps+ndeps_noalias)*sizeof(ompt_task_dependence_t)); + + KMP_ASSERT(new_taskdata->ompt_task_info.deps != NULL); + + for (i = 0; i < ndeps; i++) + { + new_taskdata->ompt_task_info.deps[i].variable_addr = + (void*) dep_list[i].base_addr; + if (dep_list[i].flags.in && dep_list[i].flags.out) + new_taskdata->ompt_task_info.deps[i].dependence_flags = + ompt_task_dependence_type_inout; + else if (dep_list[i].flags.out) + new_taskdata->ompt_task_info.deps[i].dependence_flags = + ompt_task_dependence_type_out; + else if (dep_list[i].flags.in) + new_taskdata->ompt_task_info.deps[i].dependence_flags = + ompt_task_dependence_type_in; + } + for (i = 0; i < ndeps_noalias; i++) + { + new_taskdata->ompt_task_info.deps[ndeps+i].variable_addr = + (void*) noalias_dep_list[i].base_addr; + if (noalias_dep_list[i].flags.in && noalias_dep_list[i].flags.out) + new_taskdata->ompt_task_info.deps[ndeps+i].dependence_flags = + ompt_task_dependence_type_inout; + else if (noalias_dep_list[i].flags.out) + new_taskdata->ompt_task_info.deps[ndeps+i].dependence_flags = + ompt_task_dependence_type_out; + else if (noalias_dep_list[i].flags.in) + new_taskdata->ompt_task_info.deps[ndeps+i].dependence_flags = + ompt_task_dependence_type_in; + } + } +#endif /* OMPT_SUPPORT && OMPT_TRACE */ + bool serial = current_task->td_flags.team_serial || current_task->td_flags.tasking_ser || current_task->td_flags.final; #if OMP_41_ENABLED serial = serial && !(new_taskdata->td_flags.proxy == TASK_PROXY); diff --git a/openmp/runtime/src/kmp_tasking.c b/openmp/runtime/src/kmp_tasking.c index fc9348c94e2..3df0c9e73dd 100644 --- a/openmp/runtime/src/kmp_tasking.c +++ b/openmp/runtime/src/kmp_tasking.c @@ -462,6 +462,22 @@ __kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_ta taskdata->ompt_task_info.function); } #endif +#if OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE + /* OMPT emit all dependences if requested by the tool */ + if (ompt_enabled && taskdata->ompt_task_info.ndeps > 0 && + ompt_callbacks.ompt_callback(ompt_event_task_dependences)) + { + ompt_callbacks.ompt_callback(ompt_event_task_dependences)( + taskdata->ompt_task_info.task_id, + taskdata->ompt_task_info.deps, + taskdata->ompt_task_info.ndeps + ); + /* We can now free the allocated memory for the dependencies */ + KMP_OMPT_DEPS_FREE (thread, taskdata->ompt_task_info.deps); + taskdata->ompt_task_info.deps = NULL; + taskdata->ompt_task_info.ndeps = 0; + } +#endif /* OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE */ return; } @@ -760,6 +776,10 @@ __kmp_task_init_ompt( kmp_taskdata_t * task, int tid, void * function ) task->ompt_task_info.function = function; task->ompt_task_info.frame.exit_runtime_frame = NULL; task->ompt_task_info.frame.reenter_runtime_frame = NULL; +#if OMP_40_ENABLED + task->ompt_task_info.ndeps = 0; + task->ompt_task_info.deps = NULL; +#endif /* OMP_40_ENABLED */ } } #endif diff --git a/openmp/runtime/src/ompt-event-specific.h b/openmp/runtime/src/ompt-event-specific.h index 28c1512ac26..fdf121371a0 100644 --- a/openmp/runtime/src/ompt-event-specific.h +++ b/openmp/runtime/src/ompt-event-specific.h @@ -141,4 +141,12 @@ #define ompt_event_flush_implemented ompt_event_UNIMPLEMENTED +#if OMP_40_ENABLED +# define ompt_event_task_dependences_implemented ompt_event_MAY_ALWAYS_TRACE +# define ompt_event_task_dependence_pair_implemented ompt_event_MAY_ALWAYS_TRACE +#else +# define ompt_event_task_dependences_implemented ompt_event_UNIMPLEMENTED +# define ompt_event_task_dependence_pair_implemented ompt_event_UNIMPLEMENTED +#endif /* OMP_40_ENABLED */ + #endif diff --git a/openmp/runtime/src/ompt-internal.h b/openmp/runtime/src/ompt-internal.h index 64e8d2e8fd6..42da9d86793 100644 --- a/openmp/runtime/src/ompt-internal.h +++ b/openmp/runtime/src/ompt-internal.h @@ -26,9 +26,13 @@ typedef struct ompt_callbacks_s { typedef struct { - ompt_frame_t frame; - void* function; - ompt_task_id_t task_id; + ompt_frame_t frame; + void* function; + ompt_task_id_t task_id; +#if OMP_40_ENABLED + int ndeps; + ompt_task_dependence_t *deps; +#endif /* OMP_40_ENABLED */ } ompt_task_info_t; @@ -62,6 +66,16 @@ typedef struct { extern ompt_callbacks_t ompt_callbacks; +#if OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE +#if USE_FAST_MEMORY +# define KMP_OMPT_DEPS_ALLOC __kmp_fast_allocate +# define KMP_OMPT_DEPS_FREE __kmp_fast_free +# else +# define KMP_OMPT_DEPS_ALLOC __kmp_thread_malloc +# define KMP_OMPT_DEPS_FREE __kmp_thread_free +# endif +#endif /* OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE */ + #ifdef __cplusplus extern "C" { #endif |