summaryrefslogtreecommitdiffstats
path: root/openmp/runtime/src/kmp_wait_release.h
diff options
context:
space:
mode:
authorJoachim Protze <protze@itc.rwth-aachen.de>2017-11-01 10:08:30 +0000
committerJoachim Protze <protze@itc.rwth-aachen.de>2017-11-01 10:08:30 +0000
commit82e94a593433f36734e2d34898d353a2ecb65b8b (patch)
treed7ac69e35db74384a401862feb3eb31e78ca94ca /openmp/runtime/src/kmp_wait_release.h
parentb93c06331ea77953e80bbd5ffb30167eade466f0 (diff)
downloadbcm5719-llvm-82e94a593433f36734e2d34898d353a2ecb65b8b.tar.gz
bcm5719-llvm-82e94a593433f36734e2d34898d353a2ecb65b8b.zip
Update implementation of OMPT to the specification OpenMP 5.0 Preview 1 (TR4).
The code is tested to work with latest clang, GNU and Intel compiler. The implementation is optimized for low overhead when no tool is attached shifting the cost to execution with tool attached. This patch does not implement OMPT for libomptarget. Patch by Simon Convent and Joachim Protze Differential Revision: https://reviews.llvm.org/D38185 llvm-svn: 317085
Diffstat (limited to 'openmp/runtime/src/kmp_wait_release.h')
-rw-r--r--openmp/runtime/src/kmp_wait_release.h179
1 files changed, 138 insertions, 41 deletions
diff --git a/openmp/runtime/src/kmp_wait_release.h b/openmp/runtime/src/kmp_wait_release.h
index 2ecacefc391..1f519d5b064 100644
--- a/openmp/runtime/src/kmp_wait_release.h
+++ b/openmp/runtime/src/kmp_wait_release.h
@@ -17,6 +17,9 @@
#include "kmp.h"
#include "kmp_itt.h"
#include "kmp_stats.h"
+#if OMPT_SUPPORT
+#include "ompt-specific.h"
+#endif
/*!
@defgroup WAIT_RELEASE Wait/Release operations
@@ -85,6 +88,44 @@ public:
*/
};
+#if OMPT_SUPPORT
+static inline void __ompt_implicit_task_end(kmp_info_t *this_thr,
+ omp_state_t omp_state,
+ ompt_data_t *tId,
+ ompt_data_t *pId) {
+ int ds_tid = this_thr->th.th_info.ds.ds_tid;
+ if (omp_state == omp_state_wait_barrier_implicit) {
+ this_thr->th.ompt_thread_info.state = omp_state_overhead;
+#if OMPT_OPTIONAL
+ void *codeptr = NULL;
+ if (ompt_enabled.ompt_callback_sync_region_wait) {
+ ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
+ ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr);
+ }
+ if (ompt_enabled.ompt_callback_sync_region) {
+ ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
+ ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr);
+ }
+#endif
+ if (!KMP_MASTER_TID(ds_tid)) {
+ if (ompt_enabled.ompt_callback_implicit_task) {
+ ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+ ompt_scope_end, NULL, tId, 0, ds_tid);
+ }
+#if OMPT_OPTIONAL
+ if (ompt_enabled.ompt_callback_idle) {
+ ompt_callbacks.ompt_callback(ompt_callback_idle)(ompt_scope_begin);
+ }
+#endif
+ // return to idle state
+ this_thr->th.ompt_thread_info.state = omp_state_idle;
+ } else {
+ this_thr->th.ompt_thread_info.state = omp_state_overhead;
+ }
+ }
+}
+#endif
+
/* Spin wait loop that first does pause, then yield, then sleep. A thread that
calls __kmp_wait_* must make certain that another thread calls __kmp_release
to wake it back up to prevent deadlocks! */
@@ -116,30 +157,88 @@ __kmp_wait_template(kmp_info_t *this_thr, C *flag,
stats_state_e thread_state = KMP_GET_THREAD_STATE();
#endif
-#if OMPT_SUPPORT && OMPT_BLAME
- ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state;
- if (ompt_enabled && ompt_state != ompt_state_undefined) {
- if (ompt_state == ompt_state_idle) {
- if (ompt_callbacks.ompt_callback(ompt_event_idle_begin)) {
- ompt_callbacks.ompt_callback(ompt_event_idle_begin)(th_gtid + 1);
- }
- } else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)) {
- KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier ||
- ompt_state == ompt_state_wait_barrier_implicit ||
- ompt_state == ompt_state_wait_barrier_explicit);
-
+/* OMPT Behavior:
+THIS function is called from
+ __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions)
+ these have join / fork behavior
+
+ In these cases, we don't change the state or trigger events in THIS
+function.
+ Events are triggered in the calling code (__kmp_barrier):
+
+ state := omp_state_overhead
+ barrier-begin
+ barrier-wait-begin
+ state := omp_state_wait_barrier
+ call join-barrier-implementation (finally arrive here)
+ {}
+ call fork-barrier-implementation (finally arrive here)
+ {}
+ state := omp_state_overhead
+ barrier-wait-end
+ barrier-end
+ state := omp_state_work_parallel
+
+
+ __kmp_fork_barrier (after thread creation, before executing implicit task)
+ call fork-barrier-implementation (finally arrive here)
+ {} // worker arrive here with state = omp_state_idle
+
+
+ __kmp_join_barrier (implicit barrier at end of parallel region)
+ state := omp_state_barrier_implicit
+ barrier-begin
+ barrier-wait-begin
+ call join-barrier-implementation (finally arrive here
+final_spin=FALSE)
+ {
+ }
+ __kmp_fork_barrier (implicit barrier at end of parallel region)
+ call fork-barrier-implementation (finally arrive here final_spin=TRUE)
+
+ Worker after task-team is finished:
+ barrier-wait-end
+ barrier-end
+ implicit-task-end
+ idle-begin
+ state := omp_state_idle
+
+ Before leaving, if state = omp_state_idle
+ idle-end
+ state := omp_state_overhead
+*/
+#if OMPT_SUPPORT
+ omp_state_t ompt_entry_state;
+ ompt_data_t *pId = NULL;
+ ompt_data_t *tId;
+ if (ompt_enabled.enabled) {
+ ompt_entry_state = this_thr->th.ompt_thread_info.state;
+ if (!final_spin || ompt_entry_state != omp_state_wait_barrier_implicit ||
+ KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
ompt_lw_taskteam_t *team =
this_thr->th.th_team->t.ompt_serialized_team_info;
- ompt_parallel_id_t pId;
- ompt_task_id_t tId;
if (team) {
- pId = team->ompt_team_info.parallel_id;
- tId = team->ompt_task_info.task_id;
+ pId = &(team->ompt_team_info.parallel_data);
+ tId = &(team->ompt_task_info.task_data);
} else {
- pId = this_thr->th.th_team->t.ompt_team_info.parallel_id;
- tId = this_thr->th.th_current_task->ompt_task_info.task_id;
+ pId = OMPT_CUR_TEAM_DATA(this_thr);
+ tId = OMPT_CUR_TASK_DATA(this_thr);
+ }
+ } else {
+ pId = NULL;
+ tId = &(this_thr->th.ompt_thread_info.task_data);
+ }
+#if OMPT_OPTIONAL
+ if (ompt_entry_state == omp_state_idle) {
+ if (ompt_enabled.ompt_callback_idle) {
+ ompt_callbacks.ompt_callback(ompt_callback_idle)(ompt_scope_begin);
}
- ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)(pId, tId);
+ } else
+#endif
+ if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
+ this_thr->th.th_task_team == NULL)) {
+ // implicit task is done. Either no taskqueue, or task-team finished
+ __ompt_implicit_task_end(this_thr, ompt_entry_state, tId, pId);
}
}
#endif
@@ -206,6 +305,11 @@ __kmp_wait_template(kmp_info_t *this_thr, C *flag,
this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
} else {
KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
+#if OMPT_SUPPORT
+ // task-team is done now, other cases should be catched above
+ if (final_spin && ompt_enabled.enabled)
+ __ompt_implicit_task_end(this_thr, ompt_entry_state, tId, pId);
+#endif
this_thr->th.th_task_team = NULL;
this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
}
@@ -293,29 +397,22 @@ __kmp_wait_template(kmp_info_t *this_thr, C *flag,
// TODO: If thread is done with work and times out, disband/free
}
-#if OMPT_SUPPORT && OMPT_BLAME
- if (ompt_enabled && ompt_state != ompt_state_undefined) {
- if (ompt_state == ompt_state_idle) {
- if (ompt_callbacks.ompt_callback(ompt_event_idle_end)) {
- ompt_callbacks.ompt_callback(ompt_event_idle_end)(th_gtid + 1);
- }
- } else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)) {
- KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier ||
- ompt_state == ompt_state_wait_barrier_implicit ||
- ompt_state == ompt_state_wait_barrier_explicit);
-
- ompt_lw_taskteam_t *team =
- this_thr->th.th_team->t.ompt_serialized_team_info;
- ompt_parallel_id_t pId;
- ompt_task_id_t tId;
- if (team) {
- pId = team->ompt_team_info.parallel_id;
- tId = team->ompt_task_info.task_id;
- } else {
- pId = this_thr->th.th_team->t.ompt_team_info.parallel_id;
- tId = this_thr->th.th_current_task->ompt_task_info.task_id;
+#if OMPT_SUPPORT
+ omp_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
+ if (ompt_enabled.enabled && ompt_exit_state != omp_state_undefined) {
+#if OMPT_OPTIONAL
+ if (final_spin) {
+ __ompt_implicit_task_end(this_thr, ompt_exit_state, tId, pId);
+ ompt_exit_state = this_thr->th.ompt_thread_info.state;
+ }
+#endif
+ if (ompt_exit_state == omp_state_idle) {
+#if OMPT_OPTIONAL
+ if (ompt_enabled.ompt_callback_idle) {
+ ompt_callbacks.ompt_callback(ompt_callback_idle)(ompt_scope_end);
}
- ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)(pId, tId);
+#endif
+ this_thr->th.ompt_thread_info.state = omp_state_overhead;
}
}
#endif
OpenPOWER on IntegriCloud