summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJonathan Peyton <jonathan.l.peyton@intel.com>2016-11-14 21:13:44 +0000
committerJonathan Peyton <jonathan.l.peyton@intel.com>2016-11-14 21:13:44 +0000
commit5375fe820cff7ae7f3c5c771f28c6f5518f2ee60 (patch)
treed1a87287140554083ff8cc8428685f237f2fe899
parentf10a871419c3bfb05690add75201b1cb56c8e96d (diff)
downloadbcm5719-llvm-5375fe820cff7ae7f3c5c771f28c6f5518f2ee60.tar.gz
bcm5719-llvm-5375fe820cff7ae7f3c5c771f28c6f5518f2ee60.zip
Update stats-gathering code
Have developer timers use partitioning scheme which also required that some redundant developer timers be removed in favor of the already existing normal timers. Move per thread stats initialization to just after global thread id assignment which is as early as possible. Also put all global stats initialization code in __kmp_stats_init() and all global stats destruction code in __kmp_stats_fini(). Differential Revision: https://reviews.llvm.org/D26361 llvm-svn: 286892
-rw-r--r--openmp/runtime/src/kmp_barrier.cpp36
-rw-r--r--openmp/runtime/src/kmp_global.c4
-rw-r--r--openmp/runtime/src/kmp_runtime.c22
-rw-r--r--openmp/runtime/src/kmp_stats.cpp38
-rw-r--r--openmp/runtime/src/kmp_stats.h99
-rw-r--r--openmp/runtime/src/z_Linux_util.c20
6 files changed, 102 insertions, 117 deletions
diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp
index 7843b68ff8d..5e776146cd1 100644
--- a/openmp/runtime/src/kmp_barrier.cpp
+++ b/openmp/runtime/src/kmp_barrier.cpp
@@ -50,7 +50,7 @@ __kmp_linear_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid
void (*reduce)(void *, void *)
USE_ITT_BUILD_ARG(void * itt_sync_obj) )
{
- KMP_TIME_DEVELOPER_BLOCK(KMP_linear_gather);
+ KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_gather);
register kmp_team_t *team = this_thr->th.th_team;
register kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb;
register kmp_info_t **other_threads = team->t.t_threads;
@@ -130,7 +130,7 @@ __kmp_linear_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gti
int propagate_icvs
USE_ITT_BUILD_ARG(void *itt_sync_obj) )
{
- KMP_TIME_DEVELOPER_BLOCK(KMP_linear_release);
+ KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_release);
register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
register kmp_team_t *team;
@@ -149,7 +149,7 @@ __kmp_linear_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gti
if (nproc > 1) {
#if KMP_BARRIER_ICV_PUSH
{
- KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
+ KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
if (propagate_icvs) {
ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs);
for (i=1; i<nproc; ++i) {
@@ -225,7 +225,7 @@ __kmp_tree_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
void (*reduce)(void *, void *)
USE_ITT_BUILD_ARG(void *itt_sync_obj) )
{
- KMP_TIME_DEVELOPER_BLOCK(KMP_tree_gather);
+ KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_gather);
register kmp_team_t *team = this_thr->th.th_team;
register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
register kmp_info_t **other_threads = team->t.t_threads;
@@ -323,7 +323,7 @@ __kmp_tree_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
int propagate_icvs
USE_ITT_BUILD_ARG(void *itt_sync_obj) )
{
- KMP_TIME_DEVELOPER_BLOCK(KMP_tree_release);
+ KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_release);
register kmp_team_t *team;
register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
register kmp_uint32 nproc;
@@ -393,7 +393,7 @@ __kmp_tree_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
#if KMP_BARRIER_ICV_PUSH
{
- KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
+ KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
if (propagate_icvs) {
__kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[child_tid],
team, child_tid, FALSE);
@@ -426,7 +426,7 @@ __kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
void (*reduce)(void *, void *)
USE_ITT_BUILD_ARG(void *itt_sync_obj) )
{
- KMP_TIME_DEVELOPER_BLOCK(KMP_hyper_gather);
+ KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_gather);
register kmp_team_t *team = this_thr->th.th_team;
register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
register kmp_info_t **other_threads = team->t.t_threads;
@@ -535,7 +535,7 @@ __kmp_hyper_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid
int propagate_icvs
USE_ITT_BUILD_ARG(void *itt_sync_obj) )
{
- KMP_TIME_DEVELOPER_BLOCK(KMP_hyper_release);
+ KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_release);
register kmp_team_t *team;
register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
register kmp_info_t **other_threads;
@@ -742,7 +742,7 @@ __kmp_hierarchical_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr,
int gtid, int tid, void (*reduce) (void *, void *)
USE_ITT_BUILD_ARG(void * itt_sync_obj) )
{
- KMP_TIME_DEVELOPER_BLOCK(KMP_hier_gather);
+ KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_gather);
register kmp_team_t *team = this_thr->th.th_team;
register kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb;
register kmp_uint32 nproc = this_thr->th.th_team_nproc;
@@ -883,7 +883,7 @@ __kmp_hierarchical_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, i
int propagate_icvs
USE_ITT_BUILD_ARG(void * itt_sync_obj) )
{
- KMP_TIME_DEVELOPER_BLOCK(KMP_hier_release);
+ KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_release);
register kmp_team_t *team;
register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
register kmp_uint32 nproc;
@@ -1067,9 +1067,8 @@ int
__kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size,
void *reduce_data, void (*reduce)(void *, void *))
{
- KMP_TIME_DEVELOPER_BLOCK(KMP_barrier);
- KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
KMP_TIME_PARTITIONED_BLOCK(OMP_plain_barrier);
+ KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
register int tid = __kmp_tid_from_gtid(gtid);
register kmp_info_t *this_thr = __kmp_threads[gtid];
register kmp_team_t *team = this_thr->th.th_team;
@@ -1333,7 +1332,8 @@ __kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size,
void
__kmp_end_split_barrier(enum barrier_type bt, int gtid)
{
- KMP_TIME_DEVELOPER_BLOCK(KMP_end_split_barrier);
+ KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_end_split_barrier);
+ KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
int tid = __kmp_tid_from_gtid(gtid);
kmp_info_t *this_thr = __kmp_threads[gtid];
kmp_team_t *team = this_thr->th.th_team;
@@ -1376,9 +1376,8 @@ __kmp_end_split_barrier(enum barrier_type bt, int gtid)
void
__kmp_join_barrier(int gtid)
{
- KMP_TIME_PARTITIONED_BLOCK(OMP_fork_join_barrier);
+ KMP_TIME_PARTITIONED_BLOCK(OMP_join_barrier);
KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
- KMP_TIME_DEVELOPER_BLOCK(KMP_join_barrier);
register kmp_info_t *this_thr = __kmp_threads[gtid];
register kmp_team_t *team;
register kmp_uint nproc;
@@ -1592,9 +1591,8 @@ __kmp_join_barrier(int gtid)
void
__kmp_fork_barrier(int gtid, int tid)
{
- KMP_TIME_PARTITIONED_BLOCK(OMP_fork_join_barrier);
+ KMP_TIME_PARTITIONED_BLOCK(OMP_fork_barrier);
KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
- KMP_TIME_DEVELOPER_BLOCK(KMP_fork_barrier);
kmp_info_t *this_thr = __kmp_threads[gtid];
kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL;
#if USE_ITT_BUILD
@@ -1707,7 +1705,7 @@ __kmp_fork_barrier(int gtid, int tid)
the fixed ICVs in the master's thread struct, because it is not always the case that the
threads arrays have been allocated when __kmp_fork_call() is executed. */
{
- KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
+ KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
if (!KMP_MASTER_TID(tid)) { // master thread already has ICVs
// Copy the initial ICVs from the master's thread struct to the implicit task for this tid.
KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid));
@@ -1762,7 +1760,7 @@ __kmp_fork_barrier(int gtid, int tid)
void
__kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, kmp_internal_control_t *new_icvs, ident_t *loc )
{
- KMP_TIME_DEVELOPER_BLOCK(KMP_setup_icv_copy);
+ KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_setup_icv_copy);
KMP_DEBUG_ASSERT(team && new_nproc && new_icvs);
KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
diff --git a/openmp/runtime/src/kmp_global.c b/openmp/runtime/src/kmp_global.c
index 5519696f8e3..86cd3d204ee 100644
--- a/openmp/runtime/src/kmp_global.c
+++ b/openmp/runtime/src/kmp_global.c
@@ -28,10 +28,10 @@ kmp_cpuinfo_t __kmp_cpuinfo = { 0 }; // Not initialized
kmp_tas_lock_t __kmp_stats_lock;
// global list of per thread stats, the head is a sentinel node which accumulates all stats produced before __kmp_create_worker is called.
-kmp_stats_list __kmp_stats_list;
+kmp_stats_list* __kmp_stats_list;
// thread local pointer to stats node within list
-__thread kmp_stats_list* __kmp_stats_thread_ptr = &__kmp_stats_list;
+__thread kmp_stats_list* __kmp_stats_thread_ptr = NULL;
// gives reference tick for all events (considered the 0 tick)
tsc_tick_count __kmp_stats_start_time;
diff --git a/openmp/runtime/src/kmp_runtime.c b/openmp/runtime/src/kmp_runtime.c
index e439150d113..c875f5800cc 100644
--- a/openmp/runtime/src/kmp_runtime.c
+++ b/openmp/runtime/src/kmp_runtime.c
@@ -1417,7 +1417,7 @@ __kmp_fork_call(
kmp_hot_team_ptr_t **p_hot_teams;
#endif
{ // KMP_TIME_BLOCK
- KMP_TIME_DEVELOPER_BLOCK(KMP_fork_call);
+ KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
KMP_COUNT_VALUE(OMP_PARALLEL_args, argc);
KA_TRACE( 20, ("__kmp_fork_call: enter T#%d\n", gtid ));
@@ -2199,7 +2199,6 @@ __kmp_fork_call(
{
KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
- // KMP_TIME_DEVELOPER_BLOCK(USER_master_invoke);
if (! team->t.t_invoke( gtid )) {
KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
}
@@ -2258,7 +2257,7 @@ __kmp_join_call(ident_t *loc, int gtid
#endif /* OMP_40_ENABLED */
)
{
- KMP_TIME_DEVELOPER_BLOCK(KMP_join_call);
+ KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
kmp_team_t *team;
kmp_team_t *parent_team;
kmp_info_t *master_th;
@@ -3681,6 +3680,13 @@ __kmp_register_root( int initial_thread )
KMP_DEBUG_ASSERT( ! root->r.r_root_team );
}
+#if KMP_STATS_ENABLED
+ // Initialize stats as soon as possible (right after gtid assignment).
+ __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
+ KMP_START_EXPLICIT_TIMER(OMP_worker_thread_life);
+ KMP_SET_THREAD_STATE(SERIAL_REGION);
+ KMP_INIT_PARTITIONED_TIMERS(OMP_serial);
+#endif
__kmp_initialize_root( root );
/* setup new root thread structure */
@@ -4748,7 +4754,7 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
kmp_internal_control_t *new_icvs,
int argc USE_NESTED_HOT_ARG(kmp_info_t *master) )
{
- KMP_TIME_DEVELOPER_BLOCK(KMP_allocate_team);
+ KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
int f;
kmp_team_t *team;
int use_hot_team = ! root->r.r_active;
@@ -5504,14 +5510,11 @@ __kmp_launch_thread( kmp_info_t *this_thr )
}
#endif
- KMP_STOP_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
{
- KMP_TIME_DEVELOPER_BLOCK(USER_worker_invoke);
KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
rc = (*pteam)->t.t_invoke( gtid );
}
- KMP_START_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
KMP_ASSERT( rc );
#if OMPT_SUPPORT
@@ -6332,7 +6335,7 @@ __kmp_do_serial_initialize( void )
#endif
#endif
#if KMP_STATS_ENABLED
- __kmp_init_tas_lock( & __kmp_stats_lock );
+ __kmp_stats_init();
#endif
__kmp_init_lock( & __kmp_global_lock );
__kmp_init_queuing_lock( & __kmp_dispatch_lock );
@@ -7293,8 +7296,7 @@ __kmp_cleanup( void )
__kmp_i18n_catclose();
#if KMP_STATS_ENABLED
- __kmp_accumulate_stats_at_exit();
- __kmp_stats_list.deallocate();
+ __kmp_stats_fini();
#endif
KA_TRACE( 10, ("__kmp_cleanup: exit\n" ) );
diff --git a/openmp/runtime/src/kmp_stats.cpp b/openmp/runtime/src/kmp_stats.cpp
index a65a481dc17..3ae25d5a46f 100644
--- a/openmp/runtime/src/kmp_stats.cpp
+++ b/openmp/runtime/src/kmp_stats.cpp
@@ -29,11 +29,11 @@
#define expandName(name,flags,ignore) {STRINGIZE(name),flags},
statInfo timeStat::timerInfo[] = {
KMP_FOREACH_TIMER(expandName,0)
- {0,0}
+ {"TIMER_LAST", 0}
};
const statInfo counter::counterInfo[] = {
KMP_FOREACH_COUNTER(expandName,0)
- {0,0}
+ {"COUNTER_LAST", 0}
};
#undef expandName
@@ -71,7 +71,7 @@ const kmp_stats_output_module::rgb_color kmp_stats_output_module::globalColorArr
static uint32_t statsPrinted = 0;
// output interface
-static kmp_stats_output_module __kmp_stats_global_output;
+static kmp_stats_output_module* __kmp_stats_global_output = NULL;
/* ****************************************************** */
/* ************* statistic member functions ************* */
@@ -164,7 +164,7 @@ void explicitTimer::start(timer_e timerEnumValue) {
return;
}
-void explicitTimer::stop(timer_e timerEnumValue) {
+void explicitTimer::stop(timer_e timerEnumValue, kmp_stats_list* stats_ptr /* = nullptr */) {
if (startTime.getValue() == 0)
return;
@@ -174,8 +174,10 @@ void explicitTimer::stop(timer_e timerEnumValue) {
stat->addSample(((finishTime - startTime) - totalPauseTime).ticks());
if(timeStat::logEvent(timerEnumValue)) {
- __kmp_stats_thread_ptr->push_event(startTime.getValue() - __kmp_stats_start_time.getValue(), finishTime.getValue() - __kmp_stats_start_time.getValue(), __kmp_stats_thread_ptr->getNestValue(), timerEnumValue);
- __kmp_stats_thread_ptr->decrementNestValue();
+ if(!stats_ptr)
+ stats_ptr = __kmp_stats_thread_ptr;
+ stats_ptr->push_event(startTime.getValue() - __kmp_stats_start_time.getValue(), finishTime.getValue() - __kmp_stats_start_time.getValue(), __kmp_stats_thread_ptr->getNestValue(), timerEnumValue);
+ stats_ptr->decrementNestValue();
}
/* We accept the risk that we drop a sample because it really did start at t==0. */
@@ -481,18 +483,18 @@ void kmp_stats_output_module::windupExplicitTimers()
// and say "it's over".
// If the timer wasn't running, this won't record anything anyway.
kmp_stats_list::iterator it;
- for(it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) {
+ for(it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
kmp_stats_list* ptr = *it;
ptr->getPartitionedTimers()->windup();
for (int timer=0; timer<EXPLICIT_TIMER_LAST; timer++) {
- ptr->getExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer);
+ ptr->getExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer, ptr);
}
}
}
void kmp_stats_output_module::printPloticusFile() {
int i;
- int size = __kmp_stats_list.size();
+ int size = __kmp_stats_list->size();
FILE* plotOut = fopen(plotFileName, "w+");
fprintf(plotOut, "#proc page\n"
@@ -602,7 +604,7 @@ void kmp_stats_output_module::outputStats(const char* heading)
fprintf(statsOut, "%s\n",heading);
// Accumulate across threads.
kmp_stats_list::iterator it;
- for (it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) {
+ for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
int t = (*it)->getGtid();
// Output per thread stats if requested.
if (printPerThreadFlag) {
@@ -666,7 +668,7 @@ extern "C" {
void __kmp_reset_stats()
{
kmp_stats_list::iterator it;
- for(it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) {
+ for(it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
timeStat * timers = (*it)->getTimers();
counter * counters = (*it)->getCounters();
explicitTimer * eTimers = (*it)->getExplicitTimers();
@@ -688,7 +690,7 @@ void __kmp_reset_stats()
// This function will reset all stats and stop all threads' explicit timers if they haven't been stopped already.
void __kmp_output_stats(const char * heading)
{
- __kmp_stats_global_output.outputStats(heading);
+ __kmp_stats_global_output->outputStats(heading);
__kmp_reset_stats();
}
@@ -703,6 +705,18 @@ void __kmp_accumulate_stats_at_exit(void)
void __kmp_stats_init(void)
{
+ __kmp_init_tas_lock( & __kmp_stats_lock );
+ __kmp_stats_start_time = tsc_tick_count::now();
+ __kmp_stats_global_output = new kmp_stats_output_module();
+ __kmp_stats_list = new kmp_stats_list();
+}
+
+void __kmp_stats_fini(void)
+{
+ __kmp_accumulate_stats_at_exit();
+ __kmp_stats_list->deallocate();
+ delete __kmp_stats_global_output;
+ delete __kmp_stats_list;
}
} // extern "C"
diff --git a/openmp/runtime/src/kmp_stats.h b/openmp/runtime/src/kmp_stats.h
index b767fdf8516..1521d219141 100644
--- a/openmp/runtime/src/kmp_stats.h
+++ b/openmp/runtime/src/kmp_stats.h
@@ -104,8 +104,7 @@ enum stats_state_e {
macro (OMP_TASKLOOP, 0, arg) \
macro (TASK_executed, 0, arg) \
macro (TASK_cancelled, 0, arg) \
- macro (TASK_stolen, 0, arg) \
- macro (LAST,0,arg)
+ macro (TASK_stolen, 0, arg)
/*!
* \brief Add new timers under KMP_FOREACH_TIMER() macro in kmp_stats.h
@@ -123,31 +122,31 @@ enum stats_state_e {
* @ingroup STATS_GATHERING2
*/
#define KMP_FOREACH_TIMER(macro, arg) \
- macro (OMP_worker_thread_life, 0, arg) \
+ macro (OMP_worker_thread_life, stats_flags_e::logEvent, arg) \
macro (FOR_static_scheduling, 0, arg) \
macro (FOR_dynamic_scheduling, 0, arg) \
macro (OMP_critical, 0, arg) \
macro (OMP_critical_wait, 0, arg) \
macro (OMP_single, 0, arg) \
macro (OMP_master, 0, arg) \
- macro (OMP_idle, 0, arg) \
- macro (OMP_plain_barrier, 0, arg) \
- macro (OMP_fork_join_barrier, 0, arg) \
- macro (OMP_parallel, 0, arg) \
+ macro (OMP_idle, stats_flags_e::logEvent, arg) \
+ macro (OMP_plain_barrier, stats_flags_e::logEvent, arg) \
+ macro (OMP_fork_barrier, stats_flags_e::logEvent, arg) \
+ macro (OMP_join_barrier, stats_flags_e::logEvent, arg) \
+ macro (OMP_parallel, stats_flags_e::logEvent, arg) \
macro (OMP_task_immediate, 0, arg) \
macro (OMP_task_taskwait, 0, arg) \
macro (OMP_task_taskyield, 0, arg) \
macro (OMP_task_taskgroup, 0, arg) \
macro (OMP_task_join_bar, 0, arg) \
macro (OMP_task_plain_bar, 0, arg) \
- macro (OMP_serial, 0, arg) \
+ macro (OMP_serial, stats_flags_e::logEvent, arg) \
macro (OMP_taskloop_scheduling, 0, arg) \
macro (OMP_set_numthreads, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
macro (OMP_PARALLEL_args, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
macro (FOR_static_iterations, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
macro (FOR_dynamic_iterations,stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
- KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \
- macro (LAST,0, arg)
+ KMP_FOREACH_DEVELOPER_TIMER(macro, arg)
// OMP_start_end -- Time from when OpenMP is initialized until the stats are printed at exit
@@ -190,28 +189,22 @@ enum stats_state_e {
// KMP_tree_release -- time in __kmp_tree_barrier_release
// KMP_hyper_gather -- time in __kmp_hyper_barrier_gather
// KMP_hyper_release -- time in __kmp_hyper_barrier_release
-# define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \
- macro (KMP_fork_call, 0, arg) \
- macro (KMP_join_call, 0, arg) \
- macro (KMP_fork_barrier, stats_flags_e::logEvent, arg) \
- macro (KMP_join_barrier, stats_flags_e::logEvent, arg) \
- macro (KMP_barrier, 0, arg) \
- macro (KMP_end_split_barrier, 0, arg) \
- macro (KMP_hier_gather, 0, arg) \
- macro (KMP_hier_release, 0, arg) \
- macro (KMP_hyper_gather, stats_flags_e::logEvent, arg) \
- macro (KMP_hyper_release, stats_flags_e::logEvent, arg) \
- macro (KMP_linear_gather, 0, arg) \
- macro (KMP_linear_release, 0, arg) \
- macro (KMP_tree_gather, 0, arg) \
- macro (KMP_tree_release, 0, arg) \
- macro (USER_master_invoke, stats_flags_e::logEvent, arg) \
- macro (USER_worker_invoke, stats_flags_e::logEvent, arg) \
- macro (USER_resume, stats_flags_e::logEvent, arg) \
- macro (USER_suspend, stats_flags_e::logEvent, arg) \
- macro (USER_launch_thread_loop, stats_flags_e::logEvent, arg) \
- macro (KMP_allocate_team, 0, arg) \
- macro (KMP_setup_icv_copy, 0, arg) \
+# define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \
+ macro (KMP_fork_call, 0, arg) \
+ macro (KMP_join_call, 0, arg) \
+ macro (KMP_end_split_barrier, 0, arg) \
+ macro (KMP_hier_gather, 0, arg) \
+ macro (KMP_hier_release, 0, arg) \
+ macro (KMP_hyper_gather, 0, arg) \
+ macro (KMP_hyper_release, 0, arg) \
+ macro (KMP_linear_gather, 0, arg) \
+ macro (KMP_linear_release, 0, arg) \
+ macro (KMP_tree_gather, 0, arg) \
+ macro (KMP_tree_release, 0, arg) \
+ macro (USER_resume, 0, arg) \
+ macro (USER_suspend, 0, arg) \
+ macro (KMP_allocate_team, 0, arg) \
+ macro (KMP_setup_icv_copy, 0, arg) \
macro (USER_icv_copy, 0, arg)
#else
# define KMP_FOREACH_DEVELOPER_TIMER(macro, arg)
@@ -233,47 +226,23 @@ enum stats_state_e {
*
* @ingroup STATS_GATHERING
*/
-#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) \
- macro(OMP_worker_thread_life, 0, arg) \
- macro(FOR_static_scheduling, 0, arg) \
- macro(FOR_dynamic_scheduling, 0, arg) \
- macro(OMP_critical, 0, arg) \
- macro(OMP_critical_wait, 0, arg) \
- macro(OMP_single, 0, arg) \
- macro(OMP_master, 0, arg) \
- macro(OMP_idle, 0, arg) \
- macro(OMP_plain_barrier, 0, arg) \
- macro(OMP_fork_join_barrier, 0, arg) \
- macro(OMP_parallel, 0, arg) \
- macro(OMP_task_immediate, 0, arg) \
- macro(OMP_task_taskwait, 0, arg) \
- macro(OMP_task_taskyield, 0, arg) \
- macro(OMP_task_taskgroup, 0, arg) \
- macro(OMP_task_join_bar, 0, arg) \
- macro(OMP_task_plain_bar, 0, arg) \
- macro(OMP_serial, 0, arg) \
- macro(OMP_taskloop_scheduling, 0, arg) \
- KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro,arg) \
- macro(LAST, 0, arg)
-
-#if (KMP_DEVELOPER_STATS)
-# define KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro, arg) \
- macro(USER_launch_thread_loop, stats_flags_e::logEvent, arg)
-#else
-# define KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro, arg)
-#endif
+#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) \
+ KMP_FOREACH_TIMER(macro, arg)
#define ENUMERATE(name,ignore,prefix) prefix##name,
enum timer_e {
KMP_FOREACH_TIMER(ENUMERATE, TIMER_)
+ TIMER_LAST
};
enum explicit_timer_e {
KMP_FOREACH_EXPLICIT_TIMER(ENUMERATE, EXPLICIT_TIMER_)
+ EXPLICIT_TIMER_LAST
};
enum counter_e {
KMP_FOREACH_COUNTER(ENUMERATE, COUNTER_)
+ COUNTER_LAST
};
#undef ENUMERATE
@@ -370,7 +339,7 @@ class explicitTimer
void start(timer_e timerEnumValue);
void pause() { pauseStartTime = tsc_tick_count::now(); }
void resume() { totalPauseTime += (tsc_tick_count::now() - pauseStartTime); }
- void stop(timer_e timerEnumValue);
+ void stop(timer_e timerEnumValue, kmp_stats_list* stats_ptr = nullptr);
void reset() { startTime = 0; pauseStartTime = 0; totalPauseTime = 0; }
};
@@ -716,13 +685,14 @@ class kmp_stats_output_module {
extern "C" {
#endif
void __kmp_stats_init();
+void __kmp_stats_fini();
void __kmp_reset_stats();
void __kmp_output_stats(const char *);
void __kmp_accumulate_stats_at_exit(void);
// thread local pointer to stats node within list
extern __thread kmp_stats_list* __kmp_stats_thread_ptr;
// head to stats list.
-extern kmp_stats_list __kmp_stats_list;
+extern kmp_stats_list* __kmp_stats_list;
// lock for __kmp_stats_list
extern kmp_tas_lock_t __kmp_stats_lock;
// reference start time
@@ -866,6 +836,7 @@ extern kmp_stats_output_module __kmp_stats_output;
# define KMP_COUNT_DEVELOPER_BLOCK(n) KMP_COUNT_BLOCK(n)
# define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) KMP_START_EXPLICIT_TIMER(n)
# define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) KMP_STOP_EXPLICIT_TIMER(n)
+# define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) KMP_TIME_PARTITIONED_BLOCK(n)
#else
// Null definitions
# define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0)
@@ -873,6 +844,7 @@ extern kmp_stats_output_module __kmp_stats_output;
# define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0)
# define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
# define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
+# define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0)
#endif
#else // KMP_STATS_ENABLED
@@ -894,6 +866,7 @@ extern kmp_stats_output_module __kmp_stats_output;
#define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
#define KMP_INIT_PARTITIONED_TIMERS(name) ((void)0)
#define KMP_TIME_PARTITIONED_BLOCK(name) ((void)0)
+#define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0)
#define KMP_PUSH_PARTITIONED_TIMER(name) ((void)0)
#define KMP_POP_PARTITIONED_TIMER() ((void)0)
#define KMP_SET_THREAD_STATE(state_name) ((void)0)
diff --git a/openmp/runtime/src/z_Linux_util.c b/openmp/runtime/src/z_Linux_util.c
index 389f4d543db..4a26d4b65d5 100644
--- a/openmp/runtime/src/z_Linux_util.c
+++ b/openmp/runtime/src/z_Linux_util.c
@@ -866,14 +866,12 @@ __kmp_create_worker( int gtid, kmp_info_t *th, size_t stack_size )
// th->th.th_stats is used to transfer thread specific stats-pointer to __kmp_launch_worker
// So when thread is created (goes into __kmp_launch_worker) it will
// set it's __thread local pointer to th->th.th_stats
- th->th.th_stats = __kmp_stats_list.push_back(gtid);
- if(KMP_UBER_GTID(gtid)) {
- __kmp_stats_start_time = tsc_tick_count::now();
- __kmp_stats_thread_ptr = th->th.th_stats;
- __kmp_stats_init();
- KMP_START_EXPLICIT_TIMER(OMP_worker_thread_life);
- KMP_SET_THREAD_STATE(SERIAL_REGION);
- KMP_INIT_PARTITIONED_TIMERS(OMP_serial);
+ if(!KMP_UBER_GTID(gtid)) {
+ th->th.th_stats = __kmp_stats_list->push_back(gtid);
+ } else {
+ // For root threads, the __kmp_stats_thread_ptr is set in __kmp_register_root(), so
+ // set the th->th.th_stats field to it.
+ th->th.th_stats = __kmp_stats_thread_ptr;
}
__kmp_release_tas_lock(&__kmp_stats_lock, gtid);
@@ -1541,7 +1539,7 @@ __kmp_suspend_uninitialize_thread( kmp_info_t *th )
template <class C>
static inline void __kmp_suspend_template( int th_gtid, C *flag )
{
- KMP_TIME_DEVELOPER_BLOCK(USER_suspend);
+ KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_suspend);
kmp_info_t *th = __kmp_threads[th_gtid];
int status;
typename C::flag_t old_spin;
@@ -1675,7 +1673,7 @@ void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {
template <class C>
static inline void __kmp_resume_template( int target_gtid, C *flag )
{
- KMP_TIME_DEVELOPER_BLOCK(USER_resume);
+ KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume);
kmp_info_t *th = __kmp_threads[target_gtid];
int status;
@@ -1750,7 +1748,7 @@ void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) {
void
__kmp_resume_monitor()
{
- KMP_TIME_DEVELOPER_BLOCK(USER_resume);
+ KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume);
int status;
#ifdef KMP_DEBUG
int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
OpenPOWER on IntegriCloud