summaryrefslogtreecommitdiffstats
path: root/openmp/runtime/src/kmp_runtime.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'openmp/runtime/src/kmp_runtime.cpp')
-rw-r--r--openmp/runtime/src/kmp_runtime.cpp541
1 files changed, 297 insertions, 244 deletions
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index 3c408737ca5..bd3b8fef7ba 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -722,16 +722,6 @@ void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
/* TODO replace with general release procedure */
team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
-#if OMPT_SUPPORT && OMPT_BLAME
- if (ompt_enabled &&
- ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
- /* accept blame for "ordered" waiting */
- kmp_info_t *this_thread = __kmp_threads[gtid];
- ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
- this_thread->th.ompt_thread_info.wait_id);
- }
-#endif
-
KMP_MB(); /* Flush all pending memory write invalidates. */
}
#endif /* BUILD_PARALLEL_ORDERED */
@@ -1204,6 +1194,28 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
this_thr->th.th_set_proc_bind = proc_bind_default;
#endif /* OMP_40_ENABLED */
+#if OMPT_SUPPORT
+ ompt_data_t ompt_parallel_data;
+ ompt_parallel_data.ptr = NULL;
+ ompt_data_t *implicit_task_data;
+ void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
+ if (ompt_enabled.enabled &&
+ this_thr->th.ompt_thread_info.state != omp_state_overhead) {
+
+ ompt_task_info_t *parent_task_info;
+ parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
+
+ parent_task_info->frame.reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
+ if (ompt_enabled.ompt_callback_parallel_begin) {
+ int team_size = 1;
+
+ ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
+ &(parent_task_info->task_data), &(parent_task_info->frame),
+ &ompt_parallel_data, team_size, ompt_invoker_program, codeptr);
+ }
+ }
+#endif // OMPT_SUPPORT
+
if (this_thr->th.th_team != serial_team) {
// Nested level will be an index in the nested nthreads array
int level = this_thr->th.th_team->t.t_level;
@@ -1215,13 +1227,9 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
__kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
-#if OMPT_SUPPORT
- ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
-#endif
-
new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
#if OMPT_SUPPORT
- ompt_parallel_id,
+ ompt_parallel_data,
#endif
#if OMP_40_ENABLED
proc_bind,
@@ -1316,11 +1324,6 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
}
this_thr->th.th_dispatch = serial_team->t.t_dispatch;
-#if OMPT_SUPPORT
- ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
- __ompt_team_assign_id(serial_team, ompt_parallel_id);
-#endif
-
KMP_MB();
} else {
@@ -1364,17 +1367,41 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
if (__kmp_env_consistency_check)
__kmp_push_parallel(global_tid, NULL);
+#if OMPT_SUPPORT
+ serial_team->t.ompt_team_info.master_return_address = codeptr;
+ if (ompt_enabled.enabled &&
+ this_thr->th.ompt_thread_info.state != omp_state_overhead) {
+ OMPT_CUR_TASK_INFO(this_thr)
+ ->frame.exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
+
+ ompt_lw_taskteam_t lw_taskteam;
+ __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
+ &ompt_parallel_data, codeptr);
+
+ __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
+ // don't use lw_taskteam after linking. content was swaped
+
+ /* OMPT implicit task begin */
+ implicit_task_data = OMPT_CUR_TASK_DATA(this_thr);
+ if (ompt_enabled.ompt_callback_implicit_task) {
+ ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+ ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
+ OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid));
+ }
+
+ /* OMPT state */
+ this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
+ OMPT_CUR_TASK_INFO(this_thr)
+ ->frame.exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
+ }
+#endif
}
/* most of the work for a fork */
/* return true if we really went parallel, false if serialized */
int __kmp_fork_call(ident_t *loc, int gtid,
enum fork_context_e call_context, // Intel, GNU, ...
- kmp_int32 argc,
-#if OMPT_SUPPORT
- void *unwrapped_task,
-#endif
- microtask_t microtask, launch_t invoker,
+ kmp_int32 argc, microtask_t microtask, launch_t invoker,
/* TODO: revert workaround for Intel(R) 64 tracker #96 */
#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
va_list *ap
@@ -1432,16 +1459,17 @@ int __kmp_fork_call(ident_t *loc, int gtid,
master_set_numthreads = master_th->th.th_set_nproc;
#if OMPT_SUPPORT
- ompt_parallel_id_t ompt_parallel_id;
- ompt_task_id_t ompt_task_id;
+ ompt_data_t ompt_parallel_data;
+ ompt_parallel_data.ptr = NULL;
+ ompt_data_t *parent_task_data;
ompt_frame_t *ompt_frame;
- ompt_task_id_t my_task_id;
- ompt_parallel_id_t my_parallel_id;
+ ompt_data_t *implicit_task_data;
+ void *return_address = NULL;
- if (ompt_enabled) {
- ompt_parallel_id = __ompt_parallel_id_new(gtid);
- ompt_task_id = __ompt_get_task_id_internal(0);
- ompt_frame = __ompt_get_task_frame_internal(0);
+ if (ompt_enabled.enabled) {
+ __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
+ NULL, NULL);
+ return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
}
#endif
@@ -1465,13 +1493,16 @@ int __kmp_fork_call(ident_t *loc, int gtid,
#endif
#if OMPT_SUPPORT
- if (ompt_enabled &&
- ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) {
- int team_size = master_set_numthreads;
-
- ompt_callbacks.ompt_callback(ompt_event_parallel_begin)(
- ompt_task_id, ompt_frame, ompt_parallel_id, team_size, unwrapped_task,
- OMPT_INVOKER(call_context));
+ if (ompt_enabled.enabled) {
+ if (ompt_enabled.ompt_callback_parallel_begin) {
+ int team_size = master_set_numthreads
+ ? master_set_numthreads
+ : get__nproc_2(parent_team, master_tid);
+ ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
+ parent_task_data, ompt_frame, &ompt_parallel_data, team_size,
+ OMPT_INVOKER(call_context), return_address);
+ }
+ master_th->th.ompt_thread_info.state = omp_state_overhead;
}
#endif
@@ -1508,27 +1539,25 @@ int __kmp_fork_call(ident_t *loc, int gtid,
ompt_lw_taskteam_t lw_taskteam;
- if (ompt_enabled) {
- __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, unwrapped_task,
- ompt_parallel_id);
- lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
+ if (ompt_enabled.enabled) {
+ __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
+ &ompt_parallel_data, return_address);
exit_runtime_p =
&(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
- __ompt_lw_taskteam_link(&lw_taskteam, master_th);
+ __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
+ // don't use lw_taskteam after linking. content was swaped
-#if OMPT_TRACE
/* OMPT implicit task begin */
- my_task_id = lw_taskteam.ompt_task_info.task_id;
- my_parallel_id = parent_team->t.ompt_team_info.parallel_id;
- if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
- ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
- my_parallel_id, my_task_id);
+ implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
+ if (ompt_enabled.ompt_callback_implicit_task) {
+ ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+ ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
+ implicit_task_data, 1, __kmp_tid_from_gtid(gtid));
}
-#endif
/* OMPT state */
- master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
+ master_th->th.ompt_thread_info.state = omp_state_work_parallel;
} else {
exit_runtime_p = &dummy;
}
@@ -1547,34 +1576,27 @@ int __kmp_fork_call(ident_t *loc, int gtid,
#if OMPT_SUPPORT
*exit_runtime_p = NULL;
- if (ompt_enabled) {
-#if OMPT_TRACE
- lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
-
- if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
- ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
- ompt_parallel_id, ompt_task_id);
+ if (ompt_enabled.enabled) {
+ OMPT_CUR_TASK_INFO(master_th)->frame.exit_runtime_frame = NULL;
+ if (ompt_enabled.ompt_callback_implicit_task) {
+ ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+ ompt_scope_end, NULL, implicit_task_data, 1,
+ __kmp_tid_from_gtid(gtid));
}
-
__ompt_lw_taskteam_unlink(master_th);
- // reset clear the task id only after unlinking the task
- lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
-#endif
- if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
- ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
- ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context));
+ if (ompt_enabled.ompt_callback_parallel_end) {
+ ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
+ OMPT_CUR_TEAM_DATA(master_th), OMPT_CUR_TASK_DATA(master_th),
+ OMPT_INVOKER(call_context), return_address);
}
- master_th->th.ompt_thread_info.state = ompt_state_overhead;
+ master_th->th.ompt_thread_info.state = omp_state_overhead;
}
#endif
return TRUE;
}
parent_team->t.t_pkfn = microtask;
-#if OMPT_SUPPORT
- parent_team->t.ompt_team_info.microtask = unwrapped_task;
-#endif
parent_team->t.t_invoke = invoker;
KMP_TEST_THEN_INC32((kmp_int32 *)&root->r.r_in_parallel);
parent_team->t.t_active_level++;
@@ -1726,28 +1748,27 @@ int __kmp_fork_call(ident_t *loc, int gtid,
#if OMPT_SUPPORT
void *dummy;
void **exit_runtime_p;
+ ompt_task_info_t *task_info;
ompt_lw_taskteam_t lw_taskteam;
- if (ompt_enabled) {
+ if (ompt_enabled.enabled) {
__ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
- unwrapped_task, ompt_parallel_id);
- lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
- exit_runtime_p =
- &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
-
- __ompt_lw_taskteam_link(&lw_taskteam, master_th);
-
-#if OMPT_TRACE
- my_task_id = lw_taskteam.ompt_task_info.task_id;
- if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
- ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
- ompt_parallel_id, my_task_id);
+ &ompt_parallel_data, return_address);
+
+ __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
+ // don't use lw_taskteam after linking. content was swaped
+
+ task_info = OMPT_CUR_TASK_INFO(master_th);
+ exit_runtime_p = &(task_info->frame.exit_runtime_frame);
+ if (ompt_enabled.ompt_callback_implicit_task) {
+ ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+ ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
+ &(task_info->task_data), 1, __kmp_tid_from_gtid(gtid));
}
-#endif
/* OMPT state */
- master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
+ master_th->th.ompt_thread_info.state = omp_state_work_parallel;
} else {
exit_runtime_p = &dummy;
}
@@ -1766,26 +1787,21 @@ int __kmp_fork_call(ident_t *loc, int gtid,
}
#if OMPT_SUPPORT
- *exit_runtime_p = NULL;
- if (ompt_enabled) {
- lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
-
-#if OMPT_TRACE
- if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
- ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
- ompt_parallel_id, ompt_task_id);
+ if (ompt_enabled.enabled) {
+ exit_runtime_p = NULL;
+ if (ompt_enabled.ompt_callback_implicit_task) {
+ ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+ ompt_scope_end, NULL, &(task_info->task_data), 1,
+ __kmp_tid_from_gtid(gtid));
}
-#endif
__ompt_lw_taskteam_unlink(master_th);
- // reset clear the task id only after unlinking the task
- lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
-
- if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
- ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
- ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context));
+ if (ompt_enabled.ompt_callback_parallel_end) {
+ ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
+ OMPT_CUR_TEAM_DATA(master_th), parent_task_data,
+ OMPT_INVOKER(call_context), return_address);
}
- master_th->th.ompt_thread_info.state = ompt_state_overhead;
+ master_th->th.ompt_thread_info.state = omp_state_overhead;
}
#endif
} else if (microtask == (microtask_t)__kmp_teams_master) {
@@ -1834,30 +1850,28 @@ int __kmp_fork_call(ident_t *loc, int gtid,
#if OMPT_SUPPORT
void *dummy;
void **exit_runtime_p;
+ ompt_task_info_t *task_info;
ompt_lw_taskteam_t lw_taskteam;
- if (ompt_enabled) {
+ if (ompt_enabled.enabled) {
__ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
- unwrapped_task, ompt_parallel_id);
- lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
- exit_runtime_p =
- &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
-
- __ompt_lw_taskteam_link(&lw_taskteam, master_th);
+ &ompt_parallel_data, return_address);
+ __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
+ // don't use lw_taskteam after linking. content was swaped
+ task_info = OMPT_CUR_TASK_INFO(master_th);
+ exit_runtime_p = &(task_info->frame.exit_runtime_frame);
-#if OMPT_TRACE
/* OMPT implicit task begin */
- my_task_id = lw_taskteam.ompt_task_info.task_id;
- my_parallel_id = ompt_parallel_id;
- if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
- ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
- my_parallel_id, my_task_id);
+ implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
+ if (ompt_enabled.ompt_callback_implicit_task) {
+ ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+ ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
+ implicit_task_data, 1, __kmp_tid_from_gtid(gtid));
}
-#endif
/* OMPT state */
- master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
+ master_th->th.ompt_thread_info.state = omp_state_work_parallel;
} else {
exit_runtime_p = &dummy;
}
@@ -1875,26 +1889,22 @@ int __kmp_fork_call(ident_t *loc, int gtid,
}
#if OMPT_SUPPORT
- *exit_runtime_p = NULL;
- if (ompt_enabled) {
-#if OMPT_TRACE
- lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
-
- if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
- ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
- my_parallel_id, my_task_id);
+ if (ompt_enabled.enabled) {
+ *exit_runtime_p = NULL;
+ if (ompt_enabled.ompt_callback_implicit_task) {
+ ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+ ompt_scope_end, NULL, &(task_info->task_data), 1,
+ __kmp_tid_from_gtid(gtid));
}
-#endif
+ ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
__ompt_lw_taskteam_unlink(master_th);
- // reset clear the task id only after unlinking the task
- lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
-
- if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
- ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
- ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context));
+ if (ompt_enabled.ompt_callback_parallel_end) {
+ ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
+ &ompt_parallel_data, parent_task_data,
+ OMPT_INVOKER(call_context), return_address);
}
- master_th->th.ompt_thread_info.state = ompt_state_overhead;
+ master_th->th.ompt_thread_info.state = omp_state_overhead;
}
#endif
#if OMP_40_ENABLED
@@ -1902,14 +1912,13 @@ int __kmp_fork_call(ident_t *loc, int gtid,
#endif /* OMP_40_ENABLED */
} else if (call_context == fork_context_gnu) {
#if OMPT_SUPPORT
- ompt_lw_taskteam_t *lwt =
- (ompt_lw_taskteam_t *)__kmp_allocate(sizeof(ompt_lw_taskteam_t));
- __ompt_lw_taskteam_init(lwt, master_th, gtid, unwrapped_task,
- ompt_parallel_id);
+ ompt_lw_taskteam_t lwt;
+ __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data,
+ return_address);
- lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid);
- lwt->ompt_task_info.frame.exit_runtime_frame = NULL;
- __ompt_lw_taskteam_link(lwt, master_th);
+ lwt.ompt_task_info.frame.exit_runtime_frame = NULL;
+ __ompt_lw_taskteam_link(&lwt, master_th, 1);
+// don't use lw_taskteam after linking. content was swaped
#endif
// we were called from GNU native code
@@ -2004,7 +2013,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));
team = __kmp_allocate_team(root, nthreads, nthreads,
#if OMPT_SUPPORT
- ompt_parallel_id,
+ ompt_parallel_data,
#endif
#if OMP_40_ENABLED
proc_bind,
@@ -2015,7 +2024,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));
team = __kmp_allocate_team(root, nthreads, nthreads,
#if OMPT_SUPPORT
- ompt_parallel_id,
+ ompt_parallel_data,
#endif
#if OMP_40_ENABLED
proc_bind,
@@ -2033,7 +2042,8 @@ int __kmp_fork_call(ident_t *loc, int gtid,
KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
#if OMPT_SUPPORT
- KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.microtask, unwrapped_task);
+ KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
+ return_address);
#endif
KMP_CHECK_UPDATE(team->t.t_invoke, invoker); // TODO move to root, maybe
// TODO: parent_team->t.t_level == INT_MAX ???
@@ -2167,7 +2177,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
&master_th->th.th_current_task->td_icvs, loc);
#if OMPT_SUPPORT
- master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
+ master_th->th.ompt_thread_info.state = omp_state_work_parallel;
#endif
__kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
@@ -2251,8 +2261,8 @@ int __kmp_fork_call(ident_t *loc, int gtid,
KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
#if OMPT_SUPPORT
- if (ompt_enabled) {
- master_th->th.ompt_thread_info.state = ompt_state_overhead;
+ if (ompt_enabled.enabled) {
+ master_th->th.ompt_thread_info.state = omp_state_overhead;
}
#endif
@@ -2264,17 +2274,18 @@ static inline void __kmp_join_restore_state(kmp_info_t *thread,
kmp_team_t *team) {
// restore state outside the region
thread->th.ompt_thread_info.state =
- ((team->t.t_serialized) ? ompt_state_work_serial
- : ompt_state_work_parallel);
+ ((team->t.t_serialized) ? omp_state_work_serial
+ : omp_state_work_parallel);
}
-static inline void __kmp_join_ompt(kmp_info_t *thread, kmp_team_t *team,
- ompt_parallel_id_t parallel_id,
- fork_context_e fork_context) {
- ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
- if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
- ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
- parallel_id, task_info->task_id, OMPT_INVOKER(fork_context));
+static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread,
+ kmp_team_t *team, ompt_data_t *parallel_data,
+ fork_context_e fork_context, void *codeptr) {
+ ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
+ if (ompt_enabled.ompt_callback_parallel_end) {
+ ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
+ parallel_data, &(task_info->task_data), OMPT_INVOKER(fork_context),
+ codeptr);
}
task_info->frame.reenter_runtime_frame = NULL;
@@ -2311,8 +2322,8 @@ void __kmp_join_call(ident_t *loc, int gtid
master_th->th.th_ident = loc;
#if OMPT_SUPPORT
- if (ompt_enabled) {
- master_th->th.ompt_thread_info.state = ompt_state_overhead;
+ if (ompt_enabled.enabled) {
+ master_th->th.ompt_thread_info.state = omp_state_overhead;
}
#endif
@@ -2349,7 +2360,7 @@ void __kmp_join_call(ident_t *loc, int gtid
__kmpc_end_serialized_parallel(loc, gtid);
#if OMPT_SUPPORT
- if (ompt_enabled) {
+ if (ompt_enabled.enabled) {
__kmp_join_restore_state(master_th, parent_team);
}
#endif
@@ -2377,7 +2388,8 @@ void __kmp_join_call(ident_t *loc, int gtid
KMP_MB();
#if OMPT_SUPPORT
- ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id;
+ ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
+ void *codeptr = team->t.ompt_team_info.master_return_address;
#endif
#if USE_ITT_BUILD
@@ -2449,8 +2461,9 @@ void __kmp_join_call(ident_t *loc, int gtid
}
#if OMPT_SUPPORT
- if (ompt_enabled) {
- __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
+ if (ompt_enabled.enabled) {
+ __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
+ codeptr);
}
#endif
@@ -2479,15 +2492,18 @@ void __kmp_join_call(ident_t *loc, int gtid
}
KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
-#if OMPT_SUPPORT && OMPT_TRACE
- if (ompt_enabled) {
- ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
- if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
- ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
- parallel_id, task_info->task_id);
+#if OMPT_SUPPORT
+ if (ompt_enabled.enabled) {
+ ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
+ if (ompt_enabled.ompt_callback_implicit_task) {
+ int ompt_team_size = team->t.t_nproc;
+ ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+ ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
+ __kmp_tid_from_gtid(gtid));
}
+
task_info->frame.exit_runtime_frame = NULL;
- task_info->task_id = 0;
+ task_info->task_data = ompt_data_none;
}
#endif
@@ -2558,8 +2574,9 @@ void __kmp_join_call(ident_t *loc, int gtid
__kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
#if OMPT_SUPPORT
- if (ompt_enabled) {
- __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
+ if (ompt_enabled.enabled) {
+ __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
+ codeptr);
}
#endif
@@ -3154,7 +3171,7 @@ static void __kmp_initialize_root(kmp_root_t *root) {
1, // new_nproc
1, // max_nproc
#if OMPT_SUPPORT
- 0, // root parallel id
+ ompt_data_none, // root parallel id
#endif
#if OMP_40_ENABLED
__kmp_nested_proc_bind.bind_types[0],
@@ -3195,7 +3212,7 @@ static void __kmp_initialize_root(kmp_root_t *root) {
1, // new_nproc
__kmp_dflt_team_nth_ub * 2, // max_nproc
#if OMPT_SUPPORT
- 0, // root parallel id
+ ompt_data_none, // root parallel id
#endif
#if OMP_40_ENABLED
__kmp_nested_proc_bind.bind_types[0],
@@ -3734,6 +3751,9 @@ int __kmp_register_root(int initial_thread) {
__kmp_print_thread_storage_map(root_thread, gtid);
}
root_thread->th.th_info.ds.ds_gtid = gtid;
+#if OMPT_SUPPORT
+ root_thread->th.ompt_thread_info.thread_data.ptr = NULL;
+#endif
root_thread->th.th_root = root;
if (__kmp_env_consistency_check) {
root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
@@ -3756,7 +3776,7 @@ int __kmp_register_root(int initial_thread) {
root_thread->th.th_serial_team =
__kmp_allocate_team(root, 1, 1,
#if OMPT_SUPPORT
- 0, // root parallel id
+ ompt_data_none, // root parallel id
#endif
#if OMP_40_ENABLED
proc_bind_default,
@@ -3826,6 +3846,29 @@ int __kmp_register_root(int initial_thread) {
__kmp_root_counter++;
+#if OMPT_SUPPORT
+ if (!initial_thread && ompt_enabled.enabled) {
+
+ ompt_thread_t *root_thread = ompt_get_thread();
+
+ ompt_set_thread_state(root_thread, omp_state_overhead);
+
+ if (ompt_enabled.ompt_callback_thread_begin) {
+ ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
+ ompt_thread_initial, __ompt_get_thread_data_internal());
+ }
+ ompt_data_t *task_data;
+ __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
+ if (ompt_enabled.ompt_callback_task_create) {
+ ompt_callbacks.ompt_callback(ompt_callback_task_create)(
+ NULL, NULL, task_data, ompt_task_initial, 0, NULL);
+ // initial task has nothing to return to
+ }
+
+ ompt_set_thread_state(root_thread, omp_state_work_serial);
+ }
+#endif
+
KMP_MB();
__kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
@@ -3909,9 +3952,9 @@ static int __kmp_reset_root(int gtid, kmp_root_t *root) {
#endif /* KMP_OS_WINDOWS */
#if OMPT_SUPPORT
- if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
- int gtid = __kmp_get_gtid();
- __ompt_thread_end(ompt_thread_initial, gtid);
+ if (ompt_enabled.ompt_callback_thread_end) {
+ ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
+ &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
}
#endif
@@ -3961,7 +4004,7 @@ void __kmp_unregister_root_current_thread(int gtid) {
if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) {
#if OMPT_SUPPORT
// the runtime is shutting down so we won't report any events
- thread->th.ompt_thread_info.state = ompt_state_undefined;
+ thread->th.ompt_thread_info.state = omp_state_undefined;
#endif
__kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
}
@@ -4282,7 +4325,7 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
new_thr->th.th_serial_team = serial_team =
(kmp_team_t *)__kmp_allocate_team(root, 1, 1,
#if OMPT_SUPPORT
- 0, // root parallel id
+ ompt_data_none, // root parallel id
#endif
#if OMP_40_ENABLED
proc_bind_default,
@@ -4813,7 +4856,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
kmp_team_t *
__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
#if OMPT_SUPPORT
- ompt_parallel_id_t ompt_parallel_id,
+ ompt_data_t ompt_parallel_data,
#endif
#if OMP_40_ENABLED
kmp_proc_bind_t new_proc_bind,
@@ -5180,7 +5223,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
#endif
#if OMPT_SUPPORT
- __ompt_team_assign_id(team, ompt_parallel_id);
+ __ompt_team_assign_id(team, ompt_parallel_data);
#endif
KMP_MB();
@@ -5232,7 +5275,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
team->t.t_id));
#if OMPT_SUPPORT
- __ompt_team_assign_id(team, ompt_parallel_id);
+ __ompt_team_assign_id(team, ompt_parallel_data);
#endif
KMP_MB();
@@ -5296,7 +5339,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
#endif
#if OMPT_SUPPORT
- __ompt_team_assign_id(team, ompt_parallel_id);
+ __ompt_team_assign_id(team, ompt_parallel_data);
team->t.ompt_serialized_team_info = NULL;
#endif
@@ -5563,16 +5606,26 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
}
#if OMPT_SUPPORT
- if (ompt_enabled) {
- this_thr->th.ompt_thread_info.state = ompt_state_overhead;
+ ompt_data_t *thread_data;
+ if (ompt_enabled.enabled) {
+ thread_data = &(this_thr->th.ompt_thread_info.thread_data);
+ thread_data->ptr = NULL;
+
+ this_thr->th.ompt_thread_info.state = omp_state_overhead;
this_thr->th.ompt_thread_info.wait_id = 0;
- this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0);
- if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) {
- __ompt_thread_begin(ompt_thread_worker, gtid);
+ this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
+ if (ompt_enabled.ompt_callback_thread_begin) {
+ ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
+ ompt_thread_worker, thread_data);
}
}
#endif
+#if OMPT_SUPPORT
+ if (ompt_enabled.enabled) {
+ this_thr->th.ompt_thread_info.state = omp_state_idle;
+ }
+#endif
/* This is the place where threads wait for work */
while (!TCR_4(__kmp_global.g.g_done)) {
KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
@@ -5581,18 +5634,12 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
/* wait for work to do */
KA_TRACE(20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid));
-#if OMPT_SUPPORT
- if (ompt_enabled) {
- this_thr->th.ompt_thread_info.state = ompt_state_idle;
- }
-#endif
-
/* No tid yet since not part of a team */
__kmp_fork_barrier(gtid, KMP_GTID_DNE);
#if OMPT_SUPPORT
- if (ompt_enabled) {
- this_thr->th.ompt_thread_info.state = ompt_state_overhead;
+ if (ompt_enabled.enabled) {
+ this_thr->th.ompt_thread_info.state = omp_state_overhead;
}
#endif
@@ -5600,14 +5647,6 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
/* have we been allocated? */
if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
-#if OMPT_SUPPORT
- ompt_task_info_t *task_info;
- ompt_parallel_id_t my_parallel_id;
- if (ompt_enabled) {
- task_info = __ompt_get_taskinfo(0);
- my_parallel_id = (*pteam)->t.ompt_team_info.parallel_id;
- }
-#endif
/* we were just woken up, so run our new task */
if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
int rc;
@@ -5619,11 +5658,8 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
updateHWFPControl(*pteam);
#if OMPT_SUPPORT
- if (ompt_enabled) {
- this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
- // Initialize OMPT task id for implicit task.
- int tid = __kmp_tid_from_gtid(gtid);
- task_info->task_id = __ompt_task_id_new(tid);
+ if (ompt_enabled.enabled) {
+ this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
}
#endif
@@ -5634,40 +5670,29 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
}
KMP_ASSERT(rc);
-#if OMPT_SUPPORT
- if (ompt_enabled) {
- /* no frame set while outside task */
- task_info->frame.exit_runtime_frame = NULL;
-
- this_thr->th.ompt_thread_info.state = ompt_state_overhead;
- }
-#endif
KMP_MB();
KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
(*pteam)->t.t_pkfn));
}
- /* join barrier after parallel region */
- __kmp_join_barrier(gtid);
-#if OMPT_SUPPORT && OMPT_TRACE
- if (ompt_enabled) {
- if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
- // don't access *pteam here: it may have already been freed
- // by the master thread behind the barrier (possible race)
- ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
- my_parallel_id, task_info->task_id);
- }
- task_info->frame.exit_runtime_frame = NULL;
- task_info->task_id = 0;
+#if OMPT_SUPPORT
+ if (ompt_enabled.enabled) {
+ /* no frame set while outside task */
+ __ompt_get_task_info_object(0)->frame.exit_runtime_frame = NULL;
+
+ this_thr->th.ompt_thread_info.state = omp_state_overhead;
+ this_thr->th.ompt_thread_info.task_data = *OMPT_CUR_TASK_DATA(this_thr);
}
#endif
+ /* join barrier after parallel region */
+ __kmp_join_barrier(gtid);
}
}
TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
#if OMPT_SUPPORT
- if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
- __ompt_thread_end(ompt_thread_worker, gtid);
+ if (ompt_enabled.ompt_callback_thread_end) {
+ ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
}
#endif
@@ -6925,26 +6950,27 @@ int __kmp_invoke_task_func(int gtid) {
#if OMPT_SUPPORT
void *dummy;
void **exit_runtime_p;
- ompt_task_id_t my_task_id;
- ompt_parallel_id_t my_parallel_id;
+ ompt_data_t *my_task_data;
+ ompt_data_t *my_parallel_data;
+ int ompt_team_size;
- if (ompt_enabled) {
+ if (ompt_enabled.enabled) {
exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid]
.ompt_task_info.frame.exit_runtime_frame);
} else {
exit_runtime_p = &dummy;
}
-#if OMPT_TRACE
- my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
- my_parallel_id = team->t.ompt_team_info.parallel_id;
- if (ompt_enabled &&
- ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
- ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(my_parallel_id,
- my_task_id);
+ my_task_data =
+ &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
+ my_parallel_data = &(team->t.ompt_team_info.parallel_data);
+ if (ompt_enabled.ompt_callback_implicit_task) {
+ ompt_team_size = team->t.t_nproc;
+ ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+ ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
+ __kmp_tid_from_gtid(gtid));
}
#endif
-#endif
{
KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
@@ -6991,9 +7017,6 @@ void __kmp_teams_master(int gtid) {
SSC_MARK_FORKING();
#endif
__kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
-#if OMPT_SUPPORT
- (void *)thr->th.th_teams_microtask, // "unwrapped" task
-#endif
(microtask_t)thr->th.th_teams_microtask, // "wrapped" task
VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
#if INCLUDE_SSC_MARKS
@@ -7170,6 +7193,36 @@ void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) {
#endif /* KMP_DEBUG */
__kmp_join_barrier(gtid); /* wait for everyone */
+#if OMPT_SUPPORT
+ int ds_tid = this_thr->th.th_info.ds.ds_tid;
+ if (this_thr->th.ompt_thread_info.state == omp_state_wait_barrier_implicit) {
+ ompt_data_t *tId = OMPT_CUR_TASK_DATA(this_thr);
+ ompt_data_t *pId = OMPT_CUR_TEAM_DATA(this_thr);
+ this_thr->th.ompt_thread_info.state = omp_state_overhead;
+#if OMPT_OPTIONAL
+ void *codeptr = NULL;
+ if (KMP_MASTER_TID(ds_tid) &&
+ (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
+ ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
+ codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
+
+ if (ompt_enabled.ompt_callback_sync_region_wait) {
+ ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
+ ompt_sync_region_barrier, ompt_scope_end, pId, tId, codeptr);
+ }
+ if (ompt_enabled.ompt_callback_sync_region) {
+ ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
+ ompt_sync_region_barrier, ompt_scope_end, pId, tId, codeptr);
+ }
+#endif
+ if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
+ ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+ ompt_scope_end, NULL, tId, 0, ds_tid);
+ }
+ // return to idle state
+ this_thr->th.ompt_thread_info.state = omp_state_overhead;
+ }
+#endif
KMP_MB(); /* Flush all pending memory write invalidates. */
KMP_ASSERT(this_thr->th.th_team == team);
OpenPOWER on IntegriCloud