diff options
Diffstat (limited to 'openmp/runtime/src/ompt-specific.cpp')
| -rw-r--r-- | openmp/runtime/src/ompt-specific.cpp | 377 |
1 files changed, 255 insertions, 122 deletions
diff --git a/openmp/runtime/src/ompt-specific.cpp b/openmp/runtime/src/ompt-specific.cpp index 9e0d1eef031..bdf6bd18580 100644 --- a/openmp/runtime/src/ompt-specific.cpp +++ b/openmp/runtime/src/ompt-specific.cpp @@ -3,42 +3,33 @@ //****************************************************************************** #include "kmp.h" -#include "ompt-internal.h" #include "ompt-specific.h" +#if KMP_OS_UNIX +#include <dlfcn.h> +#endif + +#if KMP_OS_WINDOWS +#define THREAD_LOCAL __declspec(thread) +#else +#define THREAD_LOCAL __thread +#endif + //****************************************************************************** // macros //****************************************************************************** -#define GTID_TO_OMPT_THREAD_ID(id) ((ompt_thread_id_t)(id >= 0) ? id + 1 : 0) - -#define LWT_FROM_TEAM(team) (team)->t.ompt_serialized_team_info; +#define LWT_FROM_TEAM(team) (team)->t.ompt_serialized_team_info #define OMPT_THREAD_ID_BITS 16 -// 2013 08 24 - John Mellor-Crummey -// ideally, a thread should assign its own ids based on thread private data. -// however, the way the intel runtime reinitializes thread data structures -// when it creates teams makes it difficult to maintain persistent thread -// data. using a shared variable instead is simple. I leave it to intel to -// sort out how to implement a higher performance version in their runtime. - -// when using fetch_and_add to generate the IDs, there isn't any reason to waste -// bits for thread id. -#if 0 -#define NEXT_ID(id_ptr, tid) \ - ((KMP_TEST_THEN_INC64(id_ptr) << OMPT_THREAD_ID_BITS) | (tid)) -#else -#define NEXT_ID(id_ptr, tid) (KMP_TEST_THEN_INC64((volatile kmp_int64 *)id_ptr)) -#endif - //****************************************************************************** // private operations //****************************************************************************** //---------------------------------------------------------- // traverse the team and task hierarchy -// note: __ompt_get_teaminfo and __ompt_get_taskinfo +// note: __ompt_get_teaminfo and __ompt_get_task_info_object // traverse the hierarchy similarly and need to be // kept consistent //---------------------------------------------------------- @@ -51,7 +42,7 @@ ompt_team_info_t *__ompt_get_teaminfo(int depth, int *size) { if (team == NULL) return NULL; - ompt_lw_taskteam_t *lwt = LWT_FROM_TEAM(team); + ompt_lw_taskteam_t *next_lwt = LWT_FROM_TEAM(team), *lwt = NULL; while (depth > 0) { // next lightweight team (if any) @@ -61,9 +52,14 @@ ompt_team_info_t *__ompt_get_teaminfo(int depth, int *size) { // next heavyweight team (if any) after // lightweight teams are exhausted if (!lwt && team) { - team = team->t.t_parent; - if (team) { - lwt = LWT_FROM_TEAM(team); + if (next_lwt) { + lwt = next_lwt; + next_lwt = NULL; + } else { + team = team->t.t_parent; + if (team) { + next_lwt = LWT_FROM_TEAM(team); + } } } @@ -90,13 +86,14 @@ ompt_team_info_t *__ompt_get_teaminfo(int depth, int *size) { return NULL; } -ompt_task_info_t *__ompt_get_taskinfo(int depth) { +ompt_task_info_t *__ompt_get_task_info_object(int depth) { ompt_task_info_t *info = NULL; kmp_info_t *thr = ompt_get_thread(); if (thr) { kmp_taskdata_t *taskdata = thr->th.th_current_task; - ompt_lw_taskteam_t *lwt = LWT_FROM_TEAM(taskdata->td_team); + ompt_lw_taskteam_t *lwt = NULL, + *next_lwt = LWT_FROM_TEAM(taskdata->td_team); while (depth > 0) { // next lightweight team (if any) @@ -106,9 +103,59 @@ ompt_task_info_t *__ompt_get_taskinfo(int depth) { // next heavyweight team (if any) after // lightweight teams are exhausted if (!lwt && taskdata) { - taskdata = taskdata->td_parent; - if (taskdata) { - lwt = LWT_FROM_TEAM(taskdata->td_team); + if (next_lwt) { + lwt = next_lwt; + next_lwt = NULL; + } else { + taskdata = taskdata->td_parent; + if (taskdata) { + next_lwt = LWT_FROM_TEAM(taskdata->td_team); + } + } + } + depth--; + } + + if (lwt) { + info = &lwt->ompt_task_info; + } else if (taskdata) { + info = &taskdata->ompt_task_info; + } + } + + return info; +} + +ompt_task_info_t *__ompt_get_scheduling_taskinfo(int depth) { + ompt_task_info_t *info = NULL; + kmp_info_t *thr = ompt_get_thread(); + + if (thr) { + kmp_taskdata_t *taskdata = thr->th.th_current_task; + + ompt_lw_taskteam_t *lwt = NULL, + *next_lwt = LWT_FROM_TEAM(taskdata->td_team); + + while (depth > 0) { + // next lightweight team (if any) + if (lwt) + lwt = lwt->parent; + + // next heavyweight team (if any) after + // lightweight teams are exhausted + if (!lwt && taskdata) { + // first try scheduling parent (for explicit task scheduling) + if (taskdata->ompt_task_info.scheduling_parent) { + taskdata = taskdata->ompt_task_info.scheduling_parent; + } else if (next_lwt) { + lwt = next_lwt; + next_lwt = NULL; + } else { + // then go for implicit tasks + taskdata = taskdata->td_parent; + if (taskdata) { + next_lwt = LWT_FROM_TEAM(taskdata->td_team); + } } } depth--; @@ -132,29 +179,14 @@ ompt_task_info_t *__ompt_get_taskinfo(int depth) { // thread support //---------------------------------------------------------- -ompt_parallel_id_t __ompt_thread_id_new() { - static uint64_t ompt_thread_id = 1; - return NEXT_ID(&ompt_thread_id, 0); -} - -void __ompt_thread_begin(ompt_thread_type_t thread_type, int gtid) { - ompt_callbacks.ompt_callback(ompt_event_thread_begin)( - thread_type, GTID_TO_OMPT_THREAD_ID(gtid)); -} - -void __ompt_thread_end(ompt_thread_type_t thread_type, int gtid) { - ompt_callbacks.ompt_callback(ompt_event_thread_end)( - thread_type, GTID_TO_OMPT_THREAD_ID(gtid)); -} - -ompt_thread_id_t __ompt_get_thread_id_internal() { - // FIXME: until we have a better way of assigning ids, use __kmp_get_gtid - // since the return value might be negative, we need to test that before - // assigning it to an ompt_thread_id_t, which is unsigned. - int id = __kmp_get_gtid(); - assert(id >= 0); - - return GTID_TO_OMPT_THREAD_ID(id); +ompt_data_t *__ompt_get_thread_data_internal() { + if (__kmp_get_gtid() >= 0) { + kmp_info_t *thread = ompt_get_thread(); + if (thread == NULL) + return NULL; + return &(thread->th.ompt_thread_info.thread_data); + } + return NULL; } //---------------------------------------------------------- @@ -162,13 +194,12 @@ ompt_thread_id_t __ompt_get_thread_id_internal() { //---------------------------------------------------------- void __ompt_thread_assign_wait_id(void *variable) { - int gtid = __kmp_gtid_get_specific(); - kmp_info_t *ti = ompt_get_thread_gtid(gtid); + kmp_info_t *ti = ompt_get_thread(); ti->th.ompt_thread_info.wait_id = (ompt_wait_id_t)variable; } -ompt_state_t __ompt_get_state_internal(ompt_wait_id_t *ompt_wait_id) { +omp_state_t __ompt_get_state_internal(ompt_wait_id_t *ompt_wait_id) { kmp_info_t *ti = ompt_get_thread(); if (ti) { @@ -176,46 +207,26 @@ ompt_state_t __ompt_get_state_internal(ompt_wait_id_t *ompt_wait_id) { *ompt_wait_id = ti->th.ompt_thread_info.wait_id; return ti->th.ompt_thread_info.state; } - return ompt_state_undefined; -} - -//---------------------------------------------------------- -// idle frame support -//---------------------------------------------------------- - -void *__ompt_get_idle_frame_internal(void) { - kmp_info_t *ti = ompt_get_thread(); - return ti ? ti->th.ompt_thread_info.idle_frame : NULL; + return omp_state_undefined; } //---------------------------------------------------------- // parallel region support //---------------------------------------------------------- -ompt_parallel_id_t __ompt_parallel_id_new(int gtid) { - static uint64_t ompt_parallel_id = 1; - return gtid >= 0 ? NEXT_ID(&ompt_parallel_id, gtid) : 0; -} - -void *__ompt_get_parallel_function_internal(int depth) { - ompt_team_info_t *info = __ompt_get_teaminfo(depth, NULL); - void *function = info ? info->microtask : NULL; - return function; -} - -ompt_parallel_id_t __ompt_get_parallel_id_internal(int depth) { - ompt_team_info_t *info = __ompt_get_teaminfo(depth, NULL); - ompt_parallel_id_t id = info ? info->parallel_id : 0; - return id; -} - -int __ompt_get_parallel_team_size_internal(int depth) { - // initialize the return value with the error value. - // if there is a team at the specified depth, the default - // value will be overwritten the size of that team. - int size = -1; - (void)__ompt_get_teaminfo(depth, &size); - return size; +int __ompt_get_parallel_info_internal(int ancestor_level, + ompt_data_t **parallel_data, + int *team_size) { + ompt_team_info_t *info; + if (team_size) { + info = __ompt_get_teaminfo(ancestor_level, team_size); + } else { + info = __ompt_get_teaminfo(ancestor_level, NULL); + } + if (parallel_data) { + *parallel_data = info ? &(info->parallel_data) : NULL; + } + return info ? 2 : 0; } //---------------------------------------------------------- @@ -223,60 +234,182 @@ int __ompt_get_parallel_team_size_internal(int depth) { //---------------------------------------------------------- void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, int gtid, - void *microtask, ompt_parallel_id_t ompt_pid) { - lwt->ompt_team_info.parallel_id = ompt_pid; - lwt->ompt_team_info.microtask = microtask; - lwt->ompt_task_info.task_id = 0; + ompt_data_t *ompt_pid, void *codeptr) { + // initialize parallel_data with input, return address to parallel_data on + // exit + lwt->ompt_team_info.parallel_data = *ompt_pid; + lwt->ompt_team_info.master_return_address = codeptr; + lwt->ompt_task_info.task_data.value = 0; lwt->ompt_task_info.frame.reenter_runtime_frame = NULL; lwt->ompt_task_info.frame.exit_runtime_frame = NULL; - lwt->ompt_task_info.function = NULL; + lwt->ompt_task_info.scheduling_parent = NULL; + lwt->ompt_task_info.deps = NULL; + lwt->ompt_task_info.ndeps = 0; + lwt->heap = 0; lwt->parent = 0; } -void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr) { - ompt_lw_taskteam_t *my_parent = thr->th.th_team->t.ompt_serialized_team_info; - lwt->parent = my_parent; - thr->th.th_team->t.ompt_serialized_team_info = lwt; +void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, + int on_heap) { + ompt_lw_taskteam_t *link_lwt = lwt; + if (thr->th.th_team->t.t_serialized > + 1) { // we already have a team, so link the new team and swap values + if (on_heap) { // the lw_taskteam cannot stay on stack, allocate it on heap + link_lwt = + (ompt_lw_taskteam_t *)__kmp_allocate(sizeof(ompt_lw_taskteam_t)); + } + link_lwt->heap = on_heap; + + // would be swap in the (on_stack) case. + ompt_team_info_t tmp_team = lwt->ompt_team_info; + link_lwt->ompt_team_info = *OMPT_CUR_TEAM_INFO(thr); + *OMPT_CUR_TEAM_INFO(thr) = tmp_team; + + ompt_task_info_t tmp_task = lwt->ompt_task_info; + link_lwt->ompt_task_info = *OMPT_CUR_TASK_INFO(thr); + *OMPT_CUR_TASK_INFO(thr) = tmp_task; + + // link the taskteam into the list of taskteams: + ompt_lw_taskteam_t *my_parent = + thr->th.th_team->t.ompt_serialized_team_info; + link_lwt->parent = my_parent; + thr->th.th_team->t.ompt_serialized_team_info = link_lwt; + } else { + // this is the first serialized team, so we just store the values in the + // team and drop the taskteam-object + *OMPT_CUR_TEAM_INFO(thr) = lwt->ompt_team_info; + *OMPT_CUR_TASK_INFO(thr) = lwt->ompt_task_info; + } } -ompt_lw_taskteam_t *__ompt_lw_taskteam_unlink(kmp_info_t *thr) { +void __ompt_lw_taskteam_unlink(kmp_info_t *thr) { ompt_lw_taskteam_t *lwtask = thr->th.th_team->t.ompt_serialized_team_info; - if (lwtask) + if (lwtask) { thr->th.th_team->t.ompt_serialized_team_info = lwtask->parent; - return lwtask; + + ompt_team_info_t tmp_team = lwtask->ompt_team_info; + lwtask->ompt_team_info = *OMPT_CUR_TEAM_INFO(thr); + *OMPT_CUR_TEAM_INFO(thr) = tmp_team; + + ompt_task_info_t tmp_task = lwtask->ompt_task_info; + lwtask->ompt_task_info = *OMPT_CUR_TASK_INFO(thr); + *OMPT_CUR_TASK_INFO(thr) = tmp_task; + + if (lwtask->heap) { + __kmp_free(lwtask); + lwtask = NULL; + } + } + // return lwtask; } //---------------------------------------------------------- // task support //---------------------------------------------------------- -ompt_task_id_t __ompt_task_id_new(int gtid) { - static uint64_t ompt_task_id = 1; - return NEXT_ID(&ompt_task_id, gtid); -} +int __ompt_get_task_info_internal(int ancestor_level, int *type, + ompt_data_t **task_data, + ompt_frame_t **task_frame, + ompt_data_t **parallel_data, + int *thread_num) { + if (ancestor_level < 0) + return 0; -ompt_task_id_t __ompt_get_task_id_internal(int depth) { - ompt_task_info_t *info = __ompt_get_taskinfo(depth); - ompt_task_id_t task_id = info ? info->task_id : 0; - return task_id; -} + // copied from __ompt_get_scheduling_taskinfo + ompt_task_info_t *info = NULL; + ompt_team_info_t *team_info = NULL; + kmp_info_t *thr = ompt_get_thread(); -void *__ompt_get_task_function_internal(int depth) { - ompt_task_info_t *info = __ompt_get_taskinfo(depth); - void *function = info ? info->function : NULL; - return function; -} + if (thr) { + kmp_taskdata_t *taskdata = thr->th.th_current_task; + if (taskdata == NULL) + return 0; + kmp_team *team = thr->th.th_team; + if (team == NULL) + return 0; + ompt_lw_taskteam_t *lwt = NULL, + *next_lwt = LWT_FROM_TEAM(taskdata->td_team); + + while (ancestor_level > 0) { + // next lightweight team (if any) + if (lwt) + lwt = lwt->parent; + + // next heavyweight team (if any) after + // lightweight teams are exhausted + if (!lwt && taskdata) { + // first try scheduling parent (for explicit task scheduling) + if (taskdata->ompt_task_info.scheduling_parent) { + taskdata = taskdata->ompt_task_info.scheduling_parent; + } else if (next_lwt) { + lwt = next_lwt; + next_lwt = NULL; + } else { + // then go for implicit tasks + taskdata = taskdata->td_parent; + if (team == NULL) + return 0; + team = team->t.t_parent; + if (taskdata) { + next_lwt = LWT_FROM_TEAM(taskdata->td_team); + } + } + } + ancestor_level--; + } -ompt_frame_t *__ompt_get_task_frame_internal(int depth) { - ompt_task_info_t *info = __ompt_get_taskinfo(depth); - ompt_frame_t *frame = info ? frame = &info->frame : NULL; - return frame; + if (lwt) { + info = &lwt->ompt_task_info; + team_info = &lwt->ompt_team_info; + if (type) { + *type = ompt_task_implicit; + } + } else if (taskdata) { + info = &taskdata->ompt_task_info; + team_info = &team->t.ompt_team_info; + if (type) { + if (taskdata->td_parent) { + *type = (taskdata->td_flags.tasktype ? ompt_task_explicit + : ompt_task_implicit) | + TASK_TYPE_DETAILS_FORMAT(taskdata); + } else { + *type = ompt_task_initial; + } + } + } + if (task_data) { + *task_data = info ? &info->task_data : NULL; + } + if (task_frame) { + // OpenMP spec asks for the scheduling task to be returned. + *task_frame = info ? &info->frame : NULL; + } + if (parallel_data) { + *parallel_data = team_info ? &(team_info->parallel_data) : NULL; + } + return info ? 2 : 0; + } + return 0; } //---------------------------------------------------------- // team support //---------------------------------------------------------- -void __ompt_team_assign_id(kmp_team_t *team, ompt_parallel_id_t ompt_pid) { - team->t.ompt_team_info.parallel_id = ompt_pid; +void __ompt_team_assign_id(kmp_team_t *team, ompt_data_t ompt_pid) { + team->t.ompt_team_info.parallel_data = ompt_pid; +} + +//---------------------------------------------------------- +// misc +//---------------------------------------------------------- + +static uint64_t __ompt_get_unique_id_internal() { + static uint64_t thread = 1; + static THREAD_LOCAL uint64_t ID = 0; + if (ID == 0) { + uint64_t new_thread = KMP_TEST_THEN_INC64((kmp_int64 *)&thread); + ID = new_thread << (sizeof(uint64_t) * 8 - OMPT_THREAD_ID_BITS); + } + return ++ID; } |

