diff options
Diffstat (limited to 'openmp/runtime/src/kmp_runtime.cpp')
| -rw-r--r-- | openmp/runtime/src/kmp_runtime.cpp | 541 |
1 files changed, 297 insertions, 244 deletions
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 3c408737ca5..bd3b8fef7ba 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -722,16 +722,6 @@ void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { /* TODO replace with general release procedure */ team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc); -#if OMPT_SUPPORT && OMPT_BLAME - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_release_ordered)) { - /* accept blame for "ordered" waiting */ - kmp_info_t *this_thread = __kmp_threads[gtid]; - ompt_callbacks.ompt_callback(ompt_event_release_ordered)( - this_thread->th.ompt_thread_info.wait_id); - } -#endif - KMP_MB(); /* Flush all pending memory write invalidates. */ } #endif /* BUILD_PARALLEL_ORDERED */ @@ -1204,6 +1194,28 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { this_thr->th.th_set_proc_bind = proc_bind_default; #endif /* OMP_40_ENABLED */ +#if OMPT_SUPPORT + ompt_data_t ompt_parallel_data; + ompt_parallel_data.ptr = NULL; + ompt_data_t *implicit_task_data; + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); + if (ompt_enabled.enabled && + this_thr->th.ompt_thread_info.state != omp_state_overhead) { + + ompt_task_info_t *parent_task_info; + parent_task_info = OMPT_CUR_TASK_INFO(this_thr); + + parent_task_info->frame.reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + if (ompt_enabled.ompt_callback_parallel_begin) { + int team_size = 1; + + ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)( + &(parent_task_info->task_data), &(parent_task_info->frame), + &ompt_parallel_data, team_size, ompt_invoker_program, codeptr); + } + } +#endif // OMPT_SUPPORT + if (this_thr->th.th_team != serial_team) { // Nested level will be an index in the nested nthreads array int level = this_thr->th.th_team->t.t_level; @@ -1215,13 +1227,9 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); -#if OMPT_SUPPORT - ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid); -#endif - new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1, #if OMPT_SUPPORT - ompt_parallel_id, + ompt_parallel_data, #endif #if OMP_40_ENABLED proc_bind, @@ -1316,11 +1324,6 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { } this_thr->th.th_dispatch = serial_team->t.t_dispatch; -#if OMPT_SUPPORT - ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid); - __ompt_team_assign_id(serial_team, ompt_parallel_id); -#endif - KMP_MB(); } else { @@ -1364,17 +1367,41 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { if (__kmp_env_consistency_check) __kmp_push_parallel(global_tid, NULL); +#if OMPT_SUPPORT + serial_team->t.ompt_team_info.master_return_address = codeptr; + if (ompt_enabled.enabled && + this_thr->th.ompt_thread_info.state != omp_state_overhead) { + OMPT_CUR_TASK_INFO(this_thr) + ->frame.exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + + ompt_lw_taskteam_t lw_taskteam; + __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid, + &ompt_parallel_data, codeptr); + + __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1); + // don't use lw_taskteam after linking. content was swaped + + /* OMPT implicit task begin */ + implicit_task_data = OMPT_CUR_TASK_DATA(this_thr); + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr), + OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid)); + } + + /* OMPT state */ + this_thr->th.ompt_thread_info.state = omp_state_work_parallel; + OMPT_CUR_TASK_INFO(this_thr) + ->frame.exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + } +#endif } /* most of the work for a fork */ /* return true if we really went parallel, false if serialized */ int __kmp_fork_call(ident_t *loc, int gtid, enum fork_context_e call_context, // Intel, GNU, ... - kmp_int32 argc, -#if OMPT_SUPPORT - void *unwrapped_task, -#endif - microtask_t microtask, launch_t invoker, + kmp_int32 argc, microtask_t microtask, launch_t invoker, /* TODO: revert workaround for Intel(R) 64 tracker #96 */ #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX va_list *ap @@ -1432,16 +1459,17 @@ int __kmp_fork_call(ident_t *loc, int gtid, master_set_numthreads = master_th->th.th_set_nproc; #if OMPT_SUPPORT - ompt_parallel_id_t ompt_parallel_id; - ompt_task_id_t ompt_task_id; + ompt_data_t ompt_parallel_data; + ompt_parallel_data.ptr = NULL; + ompt_data_t *parent_task_data; ompt_frame_t *ompt_frame; - ompt_task_id_t my_task_id; - ompt_parallel_id_t my_parallel_id; + ompt_data_t *implicit_task_data; + void *return_address = NULL; - if (ompt_enabled) { - ompt_parallel_id = __ompt_parallel_id_new(gtid); - ompt_task_id = __ompt_get_task_id_internal(0); - ompt_frame = __ompt_get_task_frame_internal(0); + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame, + NULL, NULL); + return_address = OMPT_LOAD_RETURN_ADDRESS(gtid); } #endif @@ -1465,13 +1493,16 @@ int __kmp_fork_call(ident_t *loc, int gtid, #endif #if OMPT_SUPPORT - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) { - int team_size = master_set_numthreads; - - ompt_callbacks.ompt_callback(ompt_event_parallel_begin)( - ompt_task_id, ompt_frame, ompt_parallel_id, team_size, unwrapped_task, - OMPT_INVOKER(call_context)); + if (ompt_enabled.enabled) { + if (ompt_enabled.ompt_callback_parallel_begin) { + int team_size = master_set_numthreads + ? master_set_numthreads + : get__nproc_2(parent_team, master_tid); + ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)( + parent_task_data, ompt_frame, &ompt_parallel_data, team_size, + OMPT_INVOKER(call_context), return_address); + } + master_th->th.ompt_thread_info.state = omp_state_overhead; } #endif @@ -1508,27 +1539,25 @@ int __kmp_fork_call(ident_t *loc, int gtid, ompt_lw_taskteam_t lw_taskteam; - if (ompt_enabled) { - __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, unwrapped_task, - ompt_parallel_id); - lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); + if (ompt_enabled.enabled) { + __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, + &ompt_parallel_data, return_address); exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); - __ompt_lw_taskteam_link(&lw_taskteam, master_th); + __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0); + // don't use lw_taskteam after linking. content was swaped -#if OMPT_TRACE /* OMPT implicit task begin */ - my_task_id = lw_taskteam.ompt_task_info.task_id; - my_parallel_id = parent_team->t.ompt_team_info.parallel_id; - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( - my_parallel_id, my_task_id); + implicit_task_data = OMPT_CUR_TASK_DATA(master_th); + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), + implicit_task_data, 1, __kmp_tid_from_gtid(gtid)); } -#endif /* OMPT state */ - master_th->th.ompt_thread_info.state = ompt_state_work_parallel; + master_th->th.ompt_thread_info.state = omp_state_work_parallel; } else { exit_runtime_p = &dummy; } @@ -1547,34 +1576,27 @@ int __kmp_fork_call(ident_t *loc, int gtid, #if OMPT_SUPPORT *exit_runtime_p = NULL; - if (ompt_enabled) { -#if OMPT_TRACE - lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL; - - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( - ompt_parallel_id, ompt_task_id); + if (ompt_enabled.enabled) { + OMPT_CUR_TASK_INFO(master_th)->frame.exit_runtime_frame = NULL; + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_end, NULL, implicit_task_data, 1, + __kmp_tid_from_gtid(gtid)); } - __ompt_lw_taskteam_unlink(master_th); - // reset clear the task id only after unlinking the task - lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; -#endif - if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { - ompt_callbacks.ompt_callback(ompt_event_parallel_end)( - ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context)); + if (ompt_enabled.ompt_callback_parallel_end) { + ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( + OMPT_CUR_TEAM_DATA(master_th), OMPT_CUR_TASK_DATA(master_th), + OMPT_INVOKER(call_context), return_address); } - master_th->th.ompt_thread_info.state = ompt_state_overhead; + master_th->th.ompt_thread_info.state = omp_state_overhead; } #endif return TRUE; } parent_team->t.t_pkfn = microtask; -#if OMPT_SUPPORT - parent_team->t.ompt_team_info.microtask = unwrapped_task; -#endif parent_team->t.t_invoke = invoker; KMP_TEST_THEN_INC32((kmp_int32 *)&root->r.r_in_parallel); parent_team->t.t_active_level++; @@ -1726,28 +1748,27 @@ int __kmp_fork_call(ident_t *loc, int gtid, #if OMPT_SUPPORT void *dummy; void **exit_runtime_p; + ompt_task_info_t *task_info; ompt_lw_taskteam_t lw_taskteam; - if (ompt_enabled) { + if (ompt_enabled.enabled) { __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, - unwrapped_task, ompt_parallel_id); - lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); - exit_runtime_p = - &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); - - __ompt_lw_taskteam_link(&lw_taskteam, master_th); - -#if OMPT_TRACE - my_task_id = lw_taskteam.ompt_task_info.task_id; - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( - ompt_parallel_id, my_task_id); + &ompt_parallel_data, return_address); + + __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0); + // don't use lw_taskteam after linking. content was swaped + + task_info = OMPT_CUR_TASK_INFO(master_th); + exit_runtime_p = &(task_info->frame.exit_runtime_frame); + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), + &(task_info->task_data), 1, __kmp_tid_from_gtid(gtid)); } -#endif /* OMPT state */ - master_th->th.ompt_thread_info.state = ompt_state_work_parallel; + master_th->th.ompt_thread_info.state = omp_state_work_parallel; } else { exit_runtime_p = &dummy; } @@ -1766,26 +1787,21 @@ int __kmp_fork_call(ident_t *loc, int gtid, } #if OMPT_SUPPORT - *exit_runtime_p = NULL; - if (ompt_enabled) { - lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL; - -#if OMPT_TRACE - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( - ompt_parallel_id, ompt_task_id); + if (ompt_enabled.enabled) { + exit_runtime_p = NULL; + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_end, NULL, &(task_info->task_data), 1, + __kmp_tid_from_gtid(gtid)); } -#endif __ompt_lw_taskteam_unlink(master_th); - // reset clear the task id only after unlinking the task - lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; - - if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { - ompt_callbacks.ompt_callback(ompt_event_parallel_end)( - ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context)); + if (ompt_enabled.ompt_callback_parallel_end) { + ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( + OMPT_CUR_TEAM_DATA(master_th), parent_task_data, + OMPT_INVOKER(call_context), return_address); } - master_th->th.ompt_thread_info.state = ompt_state_overhead; + master_th->th.ompt_thread_info.state = omp_state_overhead; } #endif } else if (microtask == (microtask_t)__kmp_teams_master) { @@ -1834,30 +1850,28 @@ int __kmp_fork_call(ident_t *loc, int gtid, #if OMPT_SUPPORT void *dummy; void **exit_runtime_p; + ompt_task_info_t *task_info; ompt_lw_taskteam_t lw_taskteam; - if (ompt_enabled) { + if (ompt_enabled.enabled) { __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, - unwrapped_task, ompt_parallel_id); - lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); - exit_runtime_p = - &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); - - __ompt_lw_taskteam_link(&lw_taskteam, master_th); + &ompt_parallel_data, return_address); + __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0); + // don't use lw_taskteam after linking. content was swaped + task_info = OMPT_CUR_TASK_INFO(master_th); + exit_runtime_p = &(task_info->frame.exit_runtime_frame); -#if OMPT_TRACE /* OMPT implicit task begin */ - my_task_id = lw_taskteam.ompt_task_info.task_id; - my_parallel_id = ompt_parallel_id; - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( - my_parallel_id, my_task_id); + implicit_task_data = OMPT_CUR_TASK_DATA(master_th); + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), + implicit_task_data, 1, __kmp_tid_from_gtid(gtid)); } -#endif /* OMPT state */ - master_th->th.ompt_thread_info.state = ompt_state_work_parallel; + master_th->th.ompt_thread_info.state = omp_state_work_parallel; } else { exit_runtime_p = &dummy; } @@ -1875,26 +1889,22 @@ int __kmp_fork_call(ident_t *loc, int gtid, } #if OMPT_SUPPORT - *exit_runtime_p = NULL; - if (ompt_enabled) { -#if OMPT_TRACE - lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL; - - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( - my_parallel_id, my_task_id); + if (ompt_enabled.enabled) { + *exit_runtime_p = NULL; + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_end, NULL, &(task_info->task_data), 1, + __kmp_tid_from_gtid(gtid)); } -#endif + ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); __ompt_lw_taskteam_unlink(master_th); - // reset clear the task id only after unlinking the task - lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; - - if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { - ompt_callbacks.ompt_callback(ompt_event_parallel_end)( - ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context)); + if (ompt_enabled.ompt_callback_parallel_end) { + ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( + &ompt_parallel_data, parent_task_data, + OMPT_INVOKER(call_context), return_address); } - master_th->th.ompt_thread_info.state = ompt_state_overhead; + master_th->th.ompt_thread_info.state = omp_state_overhead; } #endif #if OMP_40_ENABLED @@ -1902,14 +1912,13 @@ int __kmp_fork_call(ident_t *loc, int gtid, #endif /* OMP_40_ENABLED */ } else if (call_context == fork_context_gnu) { #if OMPT_SUPPORT - ompt_lw_taskteam_t *lwt = - (ompt_lw_taskteam_t *)__kmp_allocate(sizeof(ompt_lw_taskteam_t)); - __ompt_lw_taskteam_init(lwt, master_th, gtid, unwrapped_task, - ompt_parallel_id); + ompt_lw_taskteam_t lwt; + __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data, + return_address); - lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid); - lwt->ompt_task_info.frame.exit_runtime_frame = NULL; - __ompt_lw_taskteam_link(lwt, master_th); + lwt.ompt_task_info.frame.exit_runtime_frame = NULL; + __ompt_lw_taskteam_link(&lwt, master_th, 1); +// don't use lw_taskteam after linking. content was swaped #endif // we were called from GNU native code @@ -2004,7 +2013,7 @@ int __kmp_fork_call(ident_t *loc, int gtid, KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n")); team = __kmp_allocate_team(root, nthreads, nthreads, #if OMPT_SUPPORT - ompt_parallel_id, + ompt_parallel_data, #endif #if OMP_40_ENABLED proc_bind, @@ -2015,7 +2024,7 @@ int __kmp_fork_call(ident_t *loc, int gtid, KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n")); team = __kmp_allocate_team(root, nthreads, nthreads, #if OMPT_SUPPORT - ompt_parallel_id, + ompt_parallel_data, #endif #if OMP_40_ENABLED proc_bind, @@ -2033,7 +2042,8 @@ int __kmp_fork_call(ident_t *loc, int gtid, KMP_CHECK_UPDATE(team->t.t_parent, parent_team); KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask); #if OMPT_SUPPORT - KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.microtask, unwrapped_task); + KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address, + return_address); #endif KMP_CHECK_UPDATE(team->t.t_invoke, invoker); // TODO move to root, maybe // TODO: parent_team->t.t_level == INT_MAX ??? @@ -2167,7 +2177,7 @@ int __kmp_fork_call(ident_t *loc, int gtid, &master_th->th.th_current_task->td_icvs, loc); #if OMPT_SUPPORT - master_th->th.ompt_thread_info.state = ompt_state_work_parallel; + master_th->th.ompt_thread_info.state = omp_state_work_parallel; #endif __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); @@ -2251,8 +2261,8 @@ int __kmp_fork_call(ident_t *loc, int gtid, KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid)); #if OMPT_SUPPORT - if (ompt_enabled) { - master_th->th.ompt_thread_info.state = ompt_state_overhead; + if (ompt_enabled.enabled) { + master_th->th.ompt_thread_info.state = omp_state_overhead; } #endif @@ -2264,17 +2274,18 @@ static inline void __kmp_join_restore_state(kmp_info_t *thread, kmp_team_t *team) { // restore state outside the region thread->th.ompt_thread_info.state = - ((team->t.t_serialized) ? ompt_state_work_serial - : ompt_state_work_parallel); + ((team->t.t_serialized) ? omp_state_work_serial + : omp_state_work_parallel); } -static inline void __kmp_join_ompt(kmp_info_t *thread, kmp_team_t *team, - ompt_parallel_id_t parallel_id, - fork_context_e fork_context) { - ompt_task_info_t *task_info = __ompt_get_taskinfo(0); - if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { - ompt_callbacks.ompt_callback(ompt_event_parallel_end)( - parallel_id, task_info->task_id, OMPT_INVOKER(fork_context)); +static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread, + kmp_team_t *team, ompt_data_t *parallel_data, + fork_context_e fork_context, void *codeptr) { + ompt_task_info_t *task_info = __ompt_get_task_info_object(0); + if (ompt_enabled.ompt_callback_parallel_end) { + ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( + parallel_data, &(task_info->task_data), OMPT_INVOKER(fork_context), + codeptr); } task_info->frame.reenter_runtime_frame = NULL; @@ -2311,8 +2322,8 @@ void __kmp_join_call(ident_t *loc, int gtid master_th->th.th_ident = loc; #if OMPT_SUPPORT - if (ompt_enabled) { - master_th->th.ompt_thread_info.state = ompt_state_overhead; + if (ompt_enabled.enabled) { + master_th->th.ompt_thread_info.state = omp_state_overhead; } #endif @@ -2349,7 +2360,7 @@ void __kmp_join_call(ident_t *loc, int gtid __kmpc_end_serialized_parallel(loc, gtid); #if OMPT_SUPPORT - if (ompt_enabled) { + if (ompt_enabled.enabled) { __kmp_join_restore_state(master_th, parent_team); } #endif @@ -2377,7 +2388,8 @@ void __kmp_join_call(ident_t *loc, int gtid KMP_MB(); #if OMPT_SUPPORT - ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id; + ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data); + void *codeptr = team->t.ompt_team_info.master_return_address; #endif #if USE_ITT_BUILD @@ -2449,8 +2461,9 @@ void __kmp_join_call(ident_t *loc, int gtid } #if OMPT_SUPPORT - if (ompt_enabled) { - __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context); + if (ompt_enabled.enabled) { + __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context, + codeptr); } #endif @@ -2479,15 +2492,18 @@ void __kmp_join_call(ident_t *loc, int gtid } KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0); -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled) { - ompt_task_info_t *task_info = __ompt_get_taskinfo(0); - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( - parallel_id, task_info->task_id); +#if OMPT_SUPPORT + if (ompt_enabled.enabled) { + ompt_task_info_t *task_info = __ompt_get_task_info_object(0); + if (ompt_enabled.ompt_callback_implicit_task) { + int ompt_team_size = team->t.t_nproc; + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size, + __kmp_tid_from_gtid(gtid)); } + task_info->frame.exit_runtime_frame = NULL; - task_info->task_id = 0; + task_info->task_data = ompt_data_none; } #endif @@ -2558,8 +2574,9 @@ void __kmp_join_call(ident_t *loc, int gtid __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); #if OMPT_SUPPORT - if (ompt_enabled) { - __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context); + if (ompt_enabled.enabled) { + __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context, + codeptr); } #endif @@ -3154,7 +3171,7 @@ static void __kmp_initialize_root(kmp_root_t *root) { 1, // new_nproc 1, // max_nproc #if OMPT_SUPPORT - 0, // root parallel id + ompt_data_none, // root parallel id #endif #if OMP_40_ENABLED __kmp_nested_proc_bind.bind_types[0], @@ -3195,7 +3212,7 @@ static void __kmp_initialize_root(kmp_root_t *root) { 1, // new_nproc __kmp_dflt_team_nth_ub * 2, // max_nproc #if OMPT_SUPPORT - 0, // root parallel id + ompt_data_none, // root parallel id #endif #if OMP_40_ENABLED __kmp_nested_proc_bind.bind_types[0], @@ -3734,6 +3751,9 @@ int __kmp_register_root(int initial_thread) { __kmp_print_thread_storage_map(root_thread, gtid); } root_thread->th.th_info.ds.ds_gtid = gtid; +#if OMPT_SUPPORT + root_thread->th.ompt_thread_info.thread_data.ptr = NULL; +#endif root_thread->th.th_root = root; if (__kmp_env_consistency_check) { root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid); @@ -3756,7 +3776,7 @@ int __kmp_register_root(int initial_thread) { root_thread->th.th_serial_team = __kmp_allocate_team(root, 1, 1, #if OMPT_SUPPORT - 0, // root parallel id + ompt_data_none, // root parallel id #endif #if OMP_40_ENABLED proc_bind_default, @@ -3826,6 +3846,29 @@ int __kmp_register_root(int initial_thread) { __kmp_root_counter++; +#if OMPT_SUPPORT + if (!initial_thread && ompt_enabled.enabled) { + + ompt_thread_t *root_thread = ompt_get_thread(); + + ompt_set_thread_state(root_thread, omp_state_overhead); + + if (ompt_enabled.ompt_callback_thread_begin) { + ompt_callbacks.ompt_callback(ompt_callback_thread_begin)( + ompt_thread_initial, __ompt_get_thread_data_internal()); + } + ompt_data_t *task_data; + __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL); + if (ompt_enabled.ompt_callback_task_create) { + ompt_callbacks.ompt_callback(ompt_callback_task_create)( + NULL, NULL, task_data, ompt_task_initial, 0, NULL); + // initial task has nothing to return to + } + + ompt_set_thread_state(root_thread, omp_state_work_serial); + } +#endif + KMP_MB(); __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); @@ -3909,9 +3952,9 @@ static int __kmp_reset_root(int gtid, kmp_root_t *root) { #endif /* KMP_OS_WINDOWS */ #if OMPT_SUPPORT - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_thread_end)) { - int gtid = __kmp_get_gtid(); - __ompt_thread_end(ompt_thread_initial, gtid); + if (ompt_enabled.ompt_callback_thread_end) { + ompt_callbacks.ompt_callback(ompt_callback_thread_end)( + &(root->r.r_uber_thread->th.ompt_thread_info.thread_data)); } #endif @@ -3961,7 +4004,7 @@ void __kmp_unregister_root_current_thread(int gtid) { if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) { #if OMPT_SUPPORT // the runtime is shutting down so we won't report any events - thread->th.ompt_thread_info.state = ompt_state_undefined; + thread->th.ompt_thread_info.state = omp_state_undefined; #endif __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL)); } @@ -4282,7 +4325,7 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team, new_thr->th.th_serial_team = serial_team = (kmp_team_t *)__kmp_allocate_team(root, 1, 1, #if OMPT_SUPPORT - 0, // root parallel id + ompt_data_none, // root parallel id #endif #if OMP_40_ENABLED proc_bind_default, @@ -4813,7 +4856,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) { kmp_team_t * __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, #if OMPT_SUPPORT - ompt_parallel_id_t ompt_parallel_id, + ompt_data_t ompt_parallel_data, #endif #if OMP_40_ENABLED kmp_proc_bind_t new_proc_bind, @@ -5180,7 +5223,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, #endif #if OMPT_SUPPORT - __ompt_team_assign_id(team, ompt_parallel_id); + __ompt_team_assign_id(team, ompt_parallel_data); #endif KMP_MB(); @@ -5232,7 +5275,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, team->t.t_id)); #if OMPT_SUPPORT - __ompt_team_assign_id(team, ompt_parallel_id); + __ompt_team_assign_id(team, ompt_parallel_data); #endif KMP_MB(); @@ -5296,7 +5339,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, #endif #if OMPT_SUPPORT - __ompt_team_assign_id(team, ompt_parallel_id); + __ompt_team_assign_id(team, ompt_parallel_data); team->t.ompt_serialized_team_info = NULL; #endif @@ -5563,16 +5606,26 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) { } #if OMPT_SUPPORT - if (ompt_enabled) { - this_thr->th.ompt_thread_info.state = ompt_state_overhead; + ompt_data_t *thread_data; + if (ompt_enabled.enabled) { + thread_data = &(this_thr->th.ompt_thread_info.thread_data); + thread_data->ptr = NULL; + + this_thr->th.ompt_thread_info.state = omp_state_overhead; this_thr->th.ompt_thread_info.wait_id = 0; - this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0); - if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) { - __ompt_thread_begin(ompt_thread_worker, gtid); + this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0); + if (ompt_enabled.ompt_callback_thread_begin) { + ompt_callbacks.ompt_callback(ompt_callback_thread_begin)( + ompt_thread_worker, thread_data); } } #endif +#if OMPT_SUPPORT + if (ompt_enabled.enabled) { + this_thr->th.ompt_thread_info.state = omp_state_idle; + } +#endif /* This is the place where threads wait for work */ while (!TCR_4(__kmp_global.g.g_done)) { KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]); @@ -5581,18 +5634,12 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) { /* wait for work to do */ KA_TRACE(20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid)); -#if OMPT_SUPPORT - if (ompt_enabled) { - this_thr->th.ompt_thread_info.state = ompt_state_idle; - } -#endif - /* No tid yet since not part of a team */ __kmp_fork_barrier(gtid, KMP_GTID_DNE); #if OMPT_SUPPORT - if (ompt_enabled) { - this_thr->th.ompt_thread_info.state = ompt_state_overhead; + if (ompt_enabled.enabled) { + this_thr->th.ompt_thread_info.state = omp_state_overhead; } #endif @@ -5600,14 +5647,6 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) { /* have we been allocated? */ if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) { -#if OMPT_SUPPORT - ompt_task_info_t *task_info; - ompt_parallel_id_t my_parallel_id; - if (ompt_enabled) { - task_info = __ompt_get_taskinfo(0); - my_parallel_id = (*pteam)->t.ompt_team_info.parallel_id; - } -#endif /* we were just woken up, so run our new task */ if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) { int rc; @@ -5619,11 +5658,8 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) { updateHWFPControl(*pteam); #if OMPT_SUPPORT - if (ompt_enabled) { - this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; - // Initialize OMPT task id for implicit task. - int tid = __kmp_tid_from_gtid(gtid); - task_info->task_id = __ompt_task_id_new(tid); + if (ompt_enabled.enabled) { + this_thr->th.ompt_thread_info.state = omp_state_work_parallel; } #endif @@ -5634,40 +5670,29 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) { } KMP_ASSERT(rc); -#if OMPT_SUPPORT - if (ompt_enabled) { - /* no frame set while outside task */ - task_info->frame.exit_runtime_frame = NULL; - - this_thr->th.ompt_thread_info.state = ompt_state_overhead; - } -#endif KMP_MB(); KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n", gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn)); } - /* join barrier after parallel region */ - __kmp_join_barrier(gtid); -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled) { - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { - // don't access *pteam here: it may have already been freed - // by the master thread behind the barrier (possible race) - ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( - my_parallel_id, task_info->task_id); - } - task_info->frame.exit_runtime_frame = NULL; - task_info->task_id = 0; +#if OMPT_SUPPORT + if (ompt_enabled.enabled) { + /* no frame set while outside task */ + __ompt_get_task_info_object(0)->frame.exit_runtime_frame = NULL; + + this_thr->th.ompt_thread_info.state = omp_state_overhead; + this_thr->th.ompt_thread_info.task_data = *OMPT_CUR_TASK_DATA(this_thr); } #endif + /* join barrier after parallel region */ + __kmp_join_barrier(gtid); } } TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done); #if OMPT_SUPPORT - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_thread_end)) { - __ompt_thread_end(ompt_thread_worker, gtid); + if (ompt_enabled.ompt_callback_thread_end) { + ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data); } #endif @@ -6925,26 +6950,27 @@ int __kmp_invoke_task_func(int gtid) { #if OMPT_SUPPORT void *dummy; void **exit_runtime_p; - ompt_task_id_t my_task_id; - ompt_parallel_id_t my_parallel_id; + ompt_data_t *my_task_data; + ompt_data_t *my_parallel_data; + int ompt_team_size; - if (ompt_enabled) { + if (ompt_enabled.enabled) { exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid] .ompt_task_info.frame.exit_runtime_frame); } else { exit_runtime_p = &dummy; } -#if OMPT_TRACE - my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id; - my_parallel_id = team->t.ompt_team_info.parallel_id; - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(my_parallel_id, - my_task_id); + my_task_data = + &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data); + my_parallel_data = &(team->t.ompt_team_info.parallel_data); + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_team_size = team->t.t_nproc; + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size, + __kmp_tid_from_gtid(gtid)); } #endif -#endif { KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); @@ -6991,9 +7017,6 @@ void __kmp_teams_master(int gtid) { SSC_MARK_FORKING(); #endif __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc, -#if OMPT_SUPPORT - (void *)thr->th.th_teams_microtask, // "unwrapped" task -#endif (microtask_t)thr->th.th_teams_microtask, // "wrapped" task VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL); #if INCLUDE_SSC_MARKS @@ -7170,6 +7193,36 @@ void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) { #endif /* KMP_DEBUG */ __kmp_join_barrier(gtid); /* wait for everyone */ +#if OMPT_SUPPORT + int ds_tid = this_thr->th.th_info.ds.ds_tid; + if (this_thr->th.ompt_thread_info.state == omp_state_wait_barrier_implicit) { + ompt_data_t *tId = OMPT_CUR_TASK_DATA(this_thr); + ompt_data_t *pId = OMPT_CUR_TEAM_DATA(this_thr); + this_thr->th.ompt_thread_info.state = omp_state_overhead; +#if OMPT_OPTIONAL + void *codeptr = NULL; + if (KMP_MASTER_TID(ds_tid) && + (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) || + ompt_callbacks.ompt_callback(ompt_callback_sync_region))) + codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address; + + if (ompt_enabled.ompt_callback_sync_region_wait) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( + ompt_sync_region_barrier, ompt_scope_end, pId, tId, codeptr); + } + if (ompt_enabled.ompt_callback_sync_region) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region)( + ompt_sync_region_barrier, ompt_scope_end, pId, tId, codeptr); + } +#endif + if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_end, NULL, tId, 0, ds_tid); + } + // return to idle state + this_thr->th.ompt_thread_info.state = omp_state_overhead; + } +#endif KMP_MB(); /* Flush all pending memory write invalidates. */ KMP_ASSERT(this_thr->th.th_team == team); |

