summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--openmp/runtime/src/kmp_barrier.cpp18
-rw-r--r--openmp/runtime/src/kmp_csupport.c19
-rw-r--r--openmp/runtime/src/kmp_dispatch.cpp2
-rw-r--r--openmp/runtime/src/kmp_runtime.c23
-rw-r--r--openmp/runtime/src/kmp_sched.cpp2
-rw-r--r--openmp/runtime/src/kmp_stats.cpp79
-rw-r--r--openmp/runtime/src/kmp_stats.h205
-rw-r--r--openmp/runtime/src/kmp_stats_timing.h24
-rw-r--r--openmp/runtime/src/kmp_tasking.c26
-rw-r--r--openmp/runtime/src/kmp_wait_release.h30
-rw-r--r--openmp/runtime/src/z_Linux_util.c9
11 files changed, 369 insertions, 68 deletions
diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp
index 6fef085f6b6..b51fdeea6b3 100644
--- a/openmp/runtime/src/kmp_barrier.cpp
+++ b/openmp/runtime/src/kmp_barrier.cpp
@@ -1048,6 +1048,8 @@ __kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size,
void *reduce_data, void (*reduce)(void *, void *))
{
KMP_TIME_DEVELOPER_BLOCK(KMP_barrier);
+ KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
+ KMP_TIME_PARTITIONED_BLOCK(OMP_plain_barrier);
register int tid = __kmp_tid_from_gtid(gtid);
register kmp_info_t *this_thr = __kmp_threads[gtid];
register kmp_team_t *team = this_thr->th.th_team;
@@ -1348,6 +1350,8 @@ __kmp_end_split_barrier(enum barrier_type bt, int gtid)
void
__kmp_join_barrier(int gtid)
{
+ KMP_TIME_PARTITIONED_BLOCK(OMP_fork_join_barrier);
+ KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
KMP_TIME_DEVELOPER_BLOCK(KMP_join_barrier);
register kmp_info_t *this_thr = __kmp_threads[gtid];
register kmp_team_t *team;
@@ -1463,6 +1467,18 @@ __kmp_join_barrier(int gtid)
__kmp_task_team_wait(this_thr, team
USE_ITT_BUILD_ARG(itt_sync_obj) );
}
+#if KMP_STATS_ENABLED
+ // Have master thread flag the workers to indicate they are now waiting for
+ // next parallel region, Also wake them up so they switch their timers to idle.
+ for (int i=0; i<team->t.t_nproc; ++i) {
+ kmp_info_t* team_thread = team->t.t_threads[i];
+ if (team_thread == this_thr)
+ continue;
+ team_thread->th.th_stats->setIdleFlag();
+ if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME && team_thread->th.th_sleep_loc != NULL)
+ __kmp_null_resume_wrapper(__kmp_gtid_from_thread(team_thread), team_thread->th.th_sleep_loc);
+ }
+#endif
#if USE_ITT_BUILD
if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
__kmp_itt_barrier_middle(gtid, itt_sync_obj);
@@ -1546,6 +1562,8 @@ __kmp_join_barrier(int gtid)
void
__kmp_fork_barrier(int gtid, int tid)
{
+ KMP_TIME_PARTITIONED_BLOCK(OMP_fork_join_barrier);
+ KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
KMP_TIME_DEVELOPER_BLOCK(KMP_fork_barrier);
kmp_info_t *this_thr = __kmp_threads[gtid];
kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL;
diff --git a/openmp/runtime/src/kmp_csupport.c b/openmp/runtime/src/kmp_csupport.c
index aa934ae26b6..2c2a08f8307 100644
--- a/openmp/runtime/src/kmp_csupport.c
+++ b/openmp/runtime/src/kmp_csupport.c
@@ -290,7 +290,6 @@ __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
}
else
{
- KMP_STOP_EXPLICIT_TIMER(OMP_serial);
KMP_COUNT_BLOCK(OMP_PARALLEL);
}
#endif
@@ -345,10 +344,6 @@ __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
}
#endif
}
-#if (KMP_STATS_ENABLED)
- if (!inParallel)
- KMP_START_EXPLICIT_TIMER(OMP_serial);
-#endif
}
#if OMP_40_ENABLED
@@ -669,7 +664,6 @@ void
__kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
{
KMP_COUNT_BLOCK(OMP_BARRIER);
- KMP_TIME_BLOCK(OMP_barrier);
KC_TRACE( 10, ("__kmpc_barrier: called T#%d\n", global_tid ) );
if (! TCR_4(__kmp_init_parallel))
@@ -713,7 +707,7 @@ __kmpc_master(ident_t *loc, kmp_int32 global_tid)
if( KMP_MASTER_GTID( global_tid )) {
KMP_COUNT_BLOCK(OMP_MASTER);
- KMP_START_EXPLICIT_TIMER(OMP_master);
+ KMP_PUSH_PARTITIONED_TIMER(OMP_master);
status = 1;
}
@@ -763,7 +757,7 @@ __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
KC_TRACE( 10, ("__kmpc_end_master: called T#%d\n", global_tid ) );
KMP_DEBUG_ASSERT( KMP_MASTER_GTID( global_tid ));
- KMP_STOP_EXPLICIT_TIMER(OMP_master);
+ KMP_POP_PARTITIONED_TIMER();
#if OMPT_SUPPORT && OMPT_TRACE
kmp_info_t *this_thr = __kmp_threads[ global_tid ];
@@ -1088,7 +1082,7 @@ __kmpc_critical( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit )
__kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
#else
KMP_COUNT_BLOCK(OMP_CRITICAL);
- KMP_TIME_BLOCK(OMP_critical_wait); /* Time spent waiting to enter the critical section */
+ KMP_TIME_PARTITIONED_BLOCK(OMP_critical_wait); /* Time spent waiting to enter the critical section */
kmp_user_lock_p lck;
KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) );
@@ -1250,6 +1244,7 @@ __kmpc_critical_with_hint( ident_t * loc, kmp_int32 global_tid, kmp_critical_nam
__kmp_itt_critical_acquired( lck );
#endif /* USE_ITT_BUILD */
+ KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
KA_TRACE( 15, ("__kmpc_critical: done T#%d\n", global_tid ));
} // __kmpc_critical_with_hint
@@ -1342,7 +1337,7 @@ __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
#endif
#endif // KMP_USE_DYNAMIC_LOCK
- KMP_STOP_EXPLICIT_TIMER(OMP_critical);
+ KMP_POP_PARTITIONED_TIMER();
KA_TRACE( 15, ("__kmpc_end_critical: done T#%d\n", global_tid ));
}
@@ -1464,7 +1459,7 @@ __kmpc_single(ident_t *loc, kmp_int32 global_tid)
if (rc) {
// We are going to execute the single statement, so we should count it.
KMP_COUNT_BLOCK(OMP_SINGLE);
- KMP_START_EXPLICIT_TIMER(OMP_single);
+ KMP_PUSH_PARTITIONED_TIMER(OMP_single);
}
#if OMPT_SUPPORT && OMPT_TRACE
@@ -1507,7 +1502,7 @@ void
__kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
{
__kmp_exit_single( global_tid );
- KMP_STOP_EXPLICIT_TIMER(OMP_single);
+ KMP_POP_PARTITIONED_TIMER();
#if OMPT_SUPPORT && OMPT_TRACE
kmp_info_t *this_thr = __kmp_threads[ global_tid ];
diff --git a/openmp/runtime/src/kmp_dispatch.cpp b/openmp/runtime/src/kmp_dispatch.cpp
index 9c3974a2f4b..6b8fbeea0b1 100644
--- a/openmp/runtime/src/kmp_dispatch.cpp
+++ b/openmp/runtime/src/kmp_dispatch.cpp
@@ -1424,7 +1424,7 @@ __kmp_dispatch_next(
// This is potentially slightly misleading, schedule(runtime) will appear here even if the actual runtme schedule
// is static. (Which points out a disadavantage of schedule(runtime): even when static scheduling is used it costs
// more than a compile time choice to use static scheduling would.)
- KMP_TIME_BLOCK(FOR_dynamic_scheduling);
+ KMP_TIME_PARTITIONED_BLOCK(FOR_dynamic_scheduling);
int status;
dispatch_private_info_template< T > * pr;
diff --git a/openmp/runtime/src/kmp_runtime.c b/openmp/runtime/src/kmp_runtime.c
index 092d913bbbb..c11cb625026 100644
--- a/openmp/runtime/src/kmp_runtime.c
+++ b/openmp/runtime/src/kmp_runtime.c
@@ -1543,7 +1543,8 @@ __kmp_fork_call(
#endif
{
- KMP_TIME_BLOCK(OMP_work);
+ KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
+ KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
__kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
#if OMPT_SUPPORT
, exit_runtime_p
@@ -1618,7 +1619,8 @@ __kmp_fork_call(
gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
{
- KMP_TIME_BLOCK(OMP_work);
+ KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
+ KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
if (! parent_team->t.t_invoke( gtid )) {
KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
}
@@ -1738,7 +1740,8 @@ __kmp_fork_call(
#endif
{
- KMP_TIME_BLOCK(OMP_work);
+ KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
+ KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
__kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
#if OMPT_SUPPORT
, exit_runtime_p
@@ -1795,7 +1798,8 @@ __kmp_fork_call(
team->t.t_level--;
// AC: call special invoker for outer "parallel" of the teams construct
{
- KMP_TIME_BLOCK(OMP_work);
+ KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
+ KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
invoker(gtid);
}
} else {
@@ -1842,7 +1846,8 @@ __kmp_fork_call(
#endif
{
- KMP_TIME_BLOCK(OMP_work);
+ KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
+ KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
__kmp_invoke_microtask( microtask, gtid, 0, argc, args
#if OMPT_SUPPORT
, exit_runtime_p
@@ -2178,7 +2183,8 @@ __kmp_fork_call(
} // END of timer KMP_fork_call block
{
- KMP_TIME_BLOCK(OMP_work);
+ KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
+ KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
// KMP_TIME_DEVELOPER_BLOCK(USER_master_invoke);
if (! team->t.t_invoke( gtid )) {
KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
@@ -5448,6 +5454,8 @@ __kmp_launch_thread( kmp_info_t *this_thr )
KMP_STOP_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
{
KMP_TIME_DEVELOPER_BLOCK(USER_worker_invoke);
+ KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
+ KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
rc = (*pteam)->t.t_invoke( gtid );
}
KMP_START_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
@@ -6783,7 +6791,8 @@ __kmp_invoke_task_func( int gtid )
#endif
{
- KMP_TIME_BLOCK(OMP_work);
+ KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
+ KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
gtid, tid, (int) team->t.t_argc, (void **) team->t.t_argv
#if OMPT_SUPPORT
diff --git a/openmp/runtime/src/kmp_sched.cpp b/openmp/runtime/src/kmp_sched.cpp
index a09aa868173..eb2ee6792d0 100644
--- a/openmp/runtime/src/kmp_sched.cpp
+++ b/openmp/runtime/src/kmp_sched.cpp
@@ -84,7 +84,7 @@ __kmp_for_static_init(
typename traits_t< T >::signed_t chunk
) {
KMP_COUNT_BLOCK(OMP_FOR_static);
- KMP_TIME_BLOCK (FOR_static_scheduling);
+ KMP_TIME_PARTITIONED_BLOCK(FOR_static_scheduling);
typedef typename traits_t< T >::unsigned_t UT;
typedef typename traits_t< T >::signed_t ST;
diff --git a/openmp/runtime/src/kmp_stats.cpp b/openmp/runtime/src/kmp_stats.cpp
index a4c626b988d..8100f3bf4ae 100644
--- a/openmp/runtime/src/kmp_stats.cpp
+++ b/openmp/runtime/src/kmp_stats.cpp
@@ -157,6 +157,7 @@ std::string statistic::format(char unit, bool total) const
void explicitTimer::start(timer_e timerEnumValue) {
startTime = tsc_tick_count::now();
+ totalPauseTime = 0;
if(timeStat::logEvent(timerEnumValue)) {
__kmp_stats_thread_ptr->incrementNestValue();
}
@@ -170,7 +171,7 @@ void explicitTimer::stop(timer_e timerEnumValue) {
tsc_tick_count finishTime = tsc_tick_count::now();
//stat->addSample ((tsc_tick_count::now() - startTime).ticks());
- stat->addSample ((finishTime - startTime).ticks());
+ stat->addSample(((finishTime - startTime) - totalPauseTime).ticks());
if(timeStat::logEvent(timerEnumValue)) {
__kmp_stats_thread_ptr->push_event(startTime.getValue() - __kmp_stats_start_time.getValue(), finishTime.getValue() - __kmp_stats_start_time.getValue(), __kmp_stats_thread_ptr->getNestValue(), timerEnumValue);
@@ -182,6 +183,74 @@ void explicitTimer::stop(timer_e timerEnumValue) {
return;
}
+/* ************************************************************** */
+/* ************* partitionedTimers member functions ************* */
+partitionedTimers::partitionedTimers() {
+ timer_stack.reserve(8);
+}
+
+// add a timer to this collection of partitioned timers.
+void partitionedTimers::add_timer(explicit_timer_e timer_index, explicitTimer* timer_pointer) {
+ KMP_DEBUG_ASSERT((int)timer_index < (int)EXPLICIT_TIMER_LAST+1);
+ timers[timer_index] = timer_pointer;
+}
+
+// initialize the paritioned timers to an initial timer
+void partitionedTimers::init(timerPair init_timer_pair) {
+ KMP_DEBUG_ASSERT(this->timer_stack.size() == 0);
+ timer_stack.push_back(init_timer_pair);
+ timers[init_timer_pair.get_index()]->start(init_timer_pair.get_timer());
+}
+
+// stop/save the current timer, and start the new timer (timer_pair)
+// There is a special condition where if the current timer is equal to
+// the one you are trying to push, then it only manipulates the stack,
+// and it won't stop/start the currently running timer.
+void partitionedTimers::push(timerPair timer_pair) {
+ // get the current timer
+ // stop current timer
+ // push new timer
+ // start the new timer
+ KMP_DEBUG_ASSERT(this->timer_stack.size() > 0);
+ timerPair current_timer = timer_stack.back();
+ timer_stack.push_back(timer_pair);
+ if(current_timer != timer_pair) {
+ timers[current_timer.get_index()]->pause();
+ timers[timer_pair.get_index()]->start(timer_pair.get_timer());
+ }
+}
+
+// stop/discard the current timer, and start the previously saved timer
+void partitionedTimers::pop() {
+ // get the current timer
+ // stop current timer
+ // pop current timer
+ // get the new current timer and start it back up
+ KMP_DEBUG_ASSERT(this->timer_stack.size() > 1);
+ timerPair current_timer = timer_stack.back();
+ timer_stack.pop_back();
+ timerPair new_timer = timer_stack.back();
+ if(current_timer != new_timer) {
+ timers[current_timer.get_index()]->stop(current_timer.get_timer());
+ timers[new_timer.get_index()]->resume();
+ }
+}
+
+// Wind up all the currently running timers.
+// This pops off all the timers from the stack and clears the stack
+// After this is called, init() must be run again to initialize the
+// stack of timers
+void partitionedTimers::windup() {
+ while(timer_stack.size() > 1) {
+ this->pop();
+ }
+ if(timer_stack.size() > 0) {
+ timerPair last_timer = timer_stack.back();
+ timer_stack.pop_back();
+ timers[last_timer.get_index()]->stop(last_timer.get_timer());
+ }
+}
+
/* ******************************************************************* */
/* ************* kmp_stats_event_vector member functions ************* */
@@ -397,8 +466,10 @@ void kmp_stats_output_module::windupExplicitTimers()
// If the timer wasn't running, this won't record anything anyway.
kmp_stats_list::iterator it;
for(it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) {
+ kmp_stats_list* ptr = *it;
+ ptr->getPartitionedTimers()->windup();
for (int timer=0; timer<EXPLICIT_TIMER_LAST; timer++) {
- (*it)->getExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer);
+ ptr->getExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer);
}
}
}
@@ -595,11 +666,7 @@ void __kmp_reset_stats()
// reset the event vector so all previous events are "erased"
(*it)->resetEventVector();
-
- // May need to restart the explicit timers in thread zero?
}
- KMP_START_EXPLICIT_TIMER(OMP_serial);
- KMP_START_EXPLICIT_TIMER(OMP_start_end);
}
// This function will reset all stats and stop all threads' explicit timers if they haven't been stopped already.
diff --git a/openmp/runtime/src/kmp_stats.h b/openmp/runtime/src/kmp_stats.h
index 0377fcf9ed9..e317e7cbf80 100644
--- a/openmp/runtime/src/kmp_stats.h
+++ b/openmp/runtime/src/kmp_stats.h
@@ -27,6 +27,7 @@
#include <limits>
#include <math.h>
+#include <vector>
#include <string>
#include <stdint.h>
#include <new> // placement new
@@ -52,6 +53,23 @@ enum stats_flags_e {
};
/*!
+ * @ingroup STATS_GATHERING
+ * \brief the states which a thread can be in
+ *
+ */
+enum stats_state_e {
+ IDLE,
+ SERIAL_REGION,
+ FORK_JOIN_BARRIER,
+ PLAIN_BARRIER,
+ TASKWAIT,
+ TASKYIELD,
+ TASKGROUP,
+ IMPLICIT_TASK,
+ EXPLICIT_TASK
+};
+
+/*!
* \brief Add new counters under KMP_FOREACH_COUNTER() macro in kmp_stats.h
*
* @param macro a user defined macro that takes three arguments - macro(COUNTER_NAME, flags, arg)
@@ -103,18 +121,25 @@ enum stats_flags_e {
*
* @ingroup STATS_GATHERING2
*/
-#define KMP_FOREACH_TIMER(macro, arg) \
- macro (OMP_start_end, stats_flags_e::onlyInMaster | stats_flags_e::noTotal, arg) \
- macro (OMP_serial, stats_flags_e::onlyInMaster | stats_flags_e::noTotal, arg) \
- macro (OMP_work, 0, arg) \
- macro (OMP_barrier, 0, arg) \
- macro (FOR_static_scheduling, 0, arg) \
- macro (FOR_dynamic_scheduling, 0, arg) \
- macro (OMP_task, 0, arg) \
- macro (OMP_critical, 0, arg) \
- macro (OMP_critical_wait, 0, arg) \
- macro (OMP_single, 0, arg) \
- macro (OMP_master, 0, arg) \
+#define KMP_FOREACH_TIMER(macro, arg) \
+ macro (OMP_worker_thread_life, 0, arg) \
+ macro (FOR_static_scheduling, 0, arg) \
+ macro (FOR_dynamic_scheduling, 0, arg) \
+ macro (OMP_critical, 0, arg) \
+ macro (OMP_critical_wait, 0, arg) \
+ macro (OMP_single, 0, arg) \
+ macro (OMP_master, 0, arg) \
+ macro (OMP_idle, 0, arg) \
+ macro (OMP_plain_barrier, 0, arg) \
+ macro (OMP_fork_join_barrier, 0, arg) \
+ macro (OMP_parallel, 0, arg) \
+ macro (OMP_task_immediate, 0, arg) \
+ macro (OMP_task_taskwait, 0, arg) \
+ macro (OMP_task_taskyield, 0, arg) \
+ macro (OMP_task_taskgroup, 0, arg) \
+ macro (OMP_task_join_bar, 0, arg) \
+ macro (OMP_task_plain_bar, 0, arg) \
+ macro (OMP_serial, 0, arg) \
macro (OMP_set_numthreads, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
macro (OMP_PARALLEL_args, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
macro (FOR_static_iterations, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
@@ -129,7 +154,16 @@ enum stats_flags_e {
// OMP_barrier -- Time at "real" barriers (includes task time)
// FOR_static_scheduling -- Time spent doing scheduling for a static "for"
// FOR_dynamic_scheduling -- Time spent doing scheduling for a dynamic "for"
-// OMP_task -- Time spent executing tasks
+// OMP_idle -- Worker threads time spent waiting for inclusion in a parallel region
+// OMP_plain_barrier -- Time spent in a barrier construct
+// OMP_fork_join_barrier -- Time spent in a the fork-join barrier surrounding a parallel region
+// OMP_parallel -- Time spent inside a parallel construct
+// OMP_task_immediate -- Time spent executing non-deferred tasks
+// OMP_task_taskwait -- Time spent executing tasks inside a taskwait construct
+// OMP_task_taskyield -- Time spent executing tasks inside a taskyield construct
+// OMP_task_taskgroup -- Time spent executing tasks inside a taskygroup construct
+// OMP_task_join_bar -- Time spent executing tasks inside a join barrier
+// OMP_task_plain_bar -- Time spent executing tasks inside a barrier construct
// OMP_single -- Time spent executing a "single" region
// OMP_master -- Time spent executing a "master" region
// OMP_set_numthreads -- Values passed to omp_set_num_threads
@@ -197,12 +231,25 @@ enum stats_flags_e {
*
* @ingroup STATS_GATHERING
*/
-#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) \
- macro(OMP_serial, 0, arg) \
- macro(OMP_start_end, 0, arg) \
- macro(OMP_critical, 0, arg) \
- macro(OMP_single, 0, arg) \
- macro(OMP_master, 0, arg) \
+#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) \
+ macro(OMP_worker_thread_life, 0, arg) \
+ macro(FOR_static_scheduling, 0, arg) \
+ macro(FOR_dynamic_scheduling, 0, arg) \
+ macro(OMP_critical, 0, arg) \
+ macro(OMP_critical_wait, 0, arg) \
+ macro(OMP_single, 0, arg) \
+ macro(OMP_master, 0, arg) \
+ macro(OMP_idle, 0, arg) \
+ macro(OMP_plain_barrier, 0, arg) \
+ macro(OMP_fork_join_barrier, 0, arg) \
+ macro(OMP_parallel, 0, arg) \
+ macro(OMP_task_immediate, 0, arg) \
+ macro(OMP_task_taskwait, 0, arg) \
+ macro(OMP_task_taskyield, 0, arg) \
+ macro(OMP_task_taskgroup, 0, arg) \
+ macro(OMP_task_join_bar, 0, arg) \
+ macro(OMP_task_plain_bar, 0, arg) \
+ macro(OMP_serial, 0, arg) \
KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro,arg) \
macro(LAST, 0, arg)
@@ -227,6 +274,21 @@ enum counter_e {
};
#undef ENUMERATE
+class timerPair {
+ explicit_timer_e timer_index;
+ timer_e timer;
+ public:
+ timerPair(explicit_timer_e ti, timer_e t) : timer_index(ti), timer(t) {}
+ inline explicit_timer_e get_index() const { return timer_index; }
+ inline timer_e get_timer() const { return timer; }
+ bool operator==(const timerPair & rhs) {
+ return this->get_index() == rhs.get_index();
+ }
+ bool operator!=(const timerPair & rhs) {
+ return !(*this == rhs);
+ }
+};
+
class statistic
{
double minVal;
@@ -294,15 +356,19 @@ class explicitTimer
{
timeStat * stat;
tsc_tick_count startTime;
+ tsc_tick_count pauseStartTime;
+ tsc_tick_count::tsc_interval_t totalPauseTime;
public:
- explicitTimer () : stat(0), startTime(0) { }
- explicitTimer (timeStat * s) : stat(s), startTime() { }
+ explicitTimer () : stat(0), startTime(0), pauseStartTime(0), totalPauseTime() { }
+ explicitTimer (timeStat * s) : stat(s), startTime(), pauseStartTime(0), totalPauseTime() { }
void setStat (timeStat *s) { stat = s; }
void start(timer_e timerEnumValue);
+ void pause() { pauseStartTime = tsc_tick_count::now(); }
+ void resume() { totalPauseTime += (tsc_tick_count::now() - pauseStartTime); }
void stop(timer_e timerEnumValue);
- void reset() { startTime = 0; }
+ void reset() { startTime = 0; pauseStartTime = 0; totalPauseTime = 0; }
};
// Where all you need is to time a block, this is enough.
@@ -315,6 +381,49 @@ class blockTimer : public explicitTimer
~blockTimer() { stop(timerEnumValue); }
};
+// Where you need to partition a threads clock ticks into separate states
+// e.g., a partitionedTimers class with two timers of EXECUTING_TASK, and
+// DOING_NOTHING would render these conditions:
+// time(EXECUTING_TASK) + time(DOING_NOTHING) = total time thread is alive
+// No clock tick in the EXECUTING_TASK is a member of DOING_NOTHING and vice versa
+class partitionedTimers
+{
+ private:
+ explicitTimer* timers[EXPLICIT_TIMER_LAST+1];
+ std::vector<timerPair> timer_stack;
+ public:
+ partitionedTimers();
+ void add_timer(explicit_timer_e timer_index, explicitTimer* timer_pointer);
+ void init(timerPair timer_index);
+ void push(timerPair timer_index);
+ void pop();
+ void windup();
+};
+
+// Special wrapper around the partioned timers to aid timing code blocks
+// It avoids the need to have an explicit end, leaving the scope suffices.
+class blockPartitionedTimer
+{
+ partitionedTimers* part_timers;
+ timerPair timer_pair;
+ public:
+ blockPartitionedTimer(partitionedTimers* pt, timerPair tp) : part_timers(pt), timer_pair(tp) { part_timers->push(timer_pair); }
+ ~blockPartitionedTimer() { part_timers->pop(); }
+};
+
+// Special wrapper around the thread state to aid in keeping state in code blocks
+// It avoids the need to have an explicit end, leaving the scope suffices.
+class blockThreadState
+{
+ stats_state_e* state_pointer;
+ stats_state_e old_state;
+ public:
+ blockThreadState(stats_state_e* thread_state_pointer, stats_state_e new_state) : state_pointer(thread_state_pointer), old_state(*thread_state_pointer) {
+ *state_pointer = new_state;
+ }
+ ~blockThreadState() { *state_pointer = old_state; }
+};
+
// If all you want is a count, then you can use this...
// The individual per-thread counts will be aggregated into a statistic at program exit.
class counter
@@ -473,14 +582,19 @@ class kmp_stats_list {
timeStat _timers[TIMER_LAST+1];
counter _counters[COUNTER_LAST+1];
explicitTimer _explicitTimers[EXPLICIT_TIMER_LAST+1];
+ partitionedTimers _partitionedTimers;
int _nestLevel; // one per thread
kmp_stats_event_vector _event_vector;
kmp_stats_list* next;
kmp_stats_list* prev;
+ stats_state_e state;
+ int thread_is_idle_flag;
public:
- kmp_stats_list() : next(this) , prev(this) , _event_vector(), _nestLevel(0) {
+ kmp_stats_list() : _nestLevel(0), _event_vector(), next(this), prev(this),
+ state(IDLE), thread_is_idle_flag(0) {
#define doInit(name,ignore1,ignore2) \
- getExplicitTimer(EXPLICIT_TIMER_##name)->setStat(getTimer(TIMER_##name));
+ getExplicitTimer(EXPLICIT_TIMER_##name)->setStat(getTimer(TIMER_##name)); \
+ _partitionedTimers.add_timer(EXPLICIT_TIMER_##name, getExplicitTimer(EXPLICIT_TIMER_##name));
KMP_FOREACH_EXPLICIT_TIMER(doInit,0);
#undef doInit
}
@@ -488,6 +602,7 @@ class kmp_stats_list {
inline timeStat * getTimer(timer_e idx) { return &_timers[idx]; }
inline counter * getCounter(counter_e idx) { return &_counters[idx]; }
inline explicitTimer * getExplicitTimer(explicit_timer_e idx) { return &_explicitTimers[idx]; }
+ inline partitionedTimers * getPartitionedTimers() { return &_partitionedTimers; }
inline timeStat * getTimers() { return _timers; }
inline counter * getCounters() { return _counters; }
inline explicitTimer * getExplicitTimers() { return _explicitTimers; }
@@ -498,6 +613,12 @@ class kmp_stats_list {
inline void decrementNestValue() { _nestLevel--; }
inline int getGtid() const { return gtid; }
inline void setGtid(int newgtid) { gtid = newgtid; }
+ inline void setState(stats_state_e newstate) { state = newstate; }
+ inline stats_state_e getState() const { return state; }
+ inline stats_state_e * getStatePointer() { return &state; }
+ inline bool isIdle() { return thread_is_idle_flag==1; }
+ inline void setIdleFlag() { thread_is_idle_flag = 1; }
+ inline void resetIdleFlag() { thread_is_idle_flag = 0; }
kmp_stats_list* push_back(int gtid); // returns newly created list node
inline void push_event(uint64_t start_time, uint64_t stop_time, int nest_level, timer_e name) {
_event_vector.push_back(start_time, stop_time, nest_level, name);
@@ -699,6 +820,35 @@ extern kmp_stats_output_module __kmp_stats_output;
__kmp_output_stats(heading_string)
/*!
+ * \brief Initializes the paritioned timers to begin with name.
+ *
+ * @param name timer which you want this thread to begin with
+ *
+ * @ingroup STATS_GATHERING
+*/
+#define KMP_INIT_PARTITIONED_TIMERS(name) \
+ __kmp_stats_thread_ptr->getPartitionedTimers()->init(timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
+
+#define KMP_TIME_PARTITIONED_BLOCK(name) \
+ blockPartitionedTimer __PBLOCKTIME__(__kmp_stats_thread_ptr->getPartitionedTimers(), \
+ timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
+
+#define KMP_PUSH_PARTITIONED_TIMER(name) \
+ __kmp_stats_thread_ptr->getPartitionedTimers()->push(timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
+
+#define KMP_POP_PARTITIONED_TIMER() \
+ __kmp_stats_thread_ptr->getPartitionedTimers()->pop()
+
+#define KMP_SET_THREAD_STATE(state_name) \
+ __kmp_stats_thread_ptr->setState(state_name)
+
+#define KMP_GET_THREAD_STATE() \
+ __kmp_stats_thread_ptr->getState()
+
+#define KMP_SET_THREAD_STATE_BLOCK(state_name) \
+ blockThreadState __BTHREADSTATE__(__kmp_stats_thread_ptr->getStatePointer(), state_name)
+
+/*!
* \brief resets all stats (counters to 0, timers to 0 elapsed ticks)
*
* \details Reset all stats for all threads.
@@ -739,6 +889,13 @@ extern kmp_stats_output_module __kmp_stats_output;
#define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0)
#define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
#define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
+#define KMP_INIT_PARTITIONED_TIMERS(name) ((void)0)
+#define KMP_TIME_PARTITIONED_BLOCK(name) ((void)0)
+#define KMP_PUSH_PARTITIONED_TIMER(name) ((void)0)
+#define KMP_POP_PARTITIONED_TIMER() ((void)0)
+#define KMP_SET_THREAD_STATE(state_name) ((void)0)
+#define KMP_GET_THREAD_STATE() ((void)0)
+#define KMP_SET_THREAD_STATE_BLOCK(state_name) ((void)0)
#endif // KMP_STATS_ENABLED
#endif // KMP_STATS_H
diff --git a/openmp/runtime/src/kmp_stats_timing.h b/openmp/runtime/src/kmp_stats_timing.h
index 0ce0da99a5e..0605d9456da 100644
--- a/openmp/runtime/src/kmp_stats_timing.h
+++ b/openmp/runtime/src/kmp_stats_timing.h
@@ -40,11 +40,16 @@ class tsc_tick_count {
#endif
double ticks() const { return double(value); }
int64_t getValue() const { return value; }
+ tsc_interval_t& operator=(int64_t nvalue) { value = nvalue; return *this; }
friend class tsc_tick_count;
- friend tsc_interval_t operator-(
- const tsc_tick_count t1, const tsc_tick_count t0);
+ friend tsc_interval_t operator-(const tsc_tick_count& t1,
+ const tsc_tick_count& t0);
+ friend tsc_interval_t operator-(const tsc_tick_count::tsc_interval_t& i1,
+ const tsc_tick_count::tsc_interval_t& i0);
+ friend tsc_interval_t& operator+=(tsc_tick_count::tsc_interval_t& i1,
+ const tsc_tick_count::tsc_interval_t& i0);
};
#if KMP_HAVE___BUILTIN_READCYCLECOUNTER
@@ -66,14 +71,25 @@ class tsc_tick_count {
static double tick_time(); // returns seconds per cycle (period) of clock
#endif
static tsc_tick_count now() { return tsc_tick_count(); } // returns the rdtsc register value
- friend tsc_tick_count::tsc_interval_t operator-(const tsc_tick_count t1, const tsc_tick_count t0);
+ friend tsc_tick_count::tsc_interval_t operator-(const tsc_tick_count& t1, const tsc_tick_count& t0);
};
-inline tsc_tick_count::tsc_interval_t operator-(const tsc_tick_count t1, const tsc_tick_count t0)
+inline tsc_tick_count::tsc_interval_t operator-(const tsc_tick_count& t1, const tsc_tick_count& t0)
{
return tsc_tick_count::tsc_interval_t( t1.my_count-t0.my_count );
}
+inline tsc_tick_count::tsc_interval_t operator-(const tsc_tick_count::tsc_interval_t& i1, const tsc_tick_count::tsc_interval_t& i0)
+{
+ return tsc_tick_count::tsc_interval_t( i1.value-i0.value );
+}
+
+inline tsc_tick_count::tsc_interval_t& operator+=(tsc_tick_count::tsc_interval_t& i1, const tsc_tick_count::tsc_interval_t& i0)
+{
+ i1.value += i0.value;
+ return i1;
+}
+
#if KMP_HAVE_TICK_TIME
inline double tsc_tick_count::tsc_interval_t::seconds() const
{
diff --git a/openmp/runtime/src/kmp_tasking.c b/openmp/runtime/src/kmp_tasking.c
index 2758027f572..1b89f0213d8 100644
--- a/openmp/runtime/src/kmp_tasking.c
+++ b/openmp/runtime/src/kmp_tasking.c
@@ -36,16 +36,6 @@ static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t
static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask );
#endif
-static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
- if (!flag) return;
- // Attempt to wake up a thread: examine its type and call appropriate template
- switch (((kmp_flag_64 *)flag)->get_type()) {
- case flag32: __kmp_resume_32(gtid, NULL); break;
- case flag64: __kmp_resume_64(gtid, NULL); break;
- case flag_oncore: __kmp_resume_oncore(gtid, NULL); break;
- }
-}
-
#ifdef BUILD_TIED_TASK_STACK
//---------------------------------------------------------------------------
@@ -1207,8 +1197,17 @@ __kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_ta
// Thunks generated by gcc take a different argument list.
//
if (!discard) {
+#if KMP_STATS_ENABLED
KMP_COUNT_BLOCK(TASK_executed);
- KMP_TIME_BLOCK (OMP_task);
+ switch(KMP_GET_THREAD_STATE()) {
+ case FORK_JOIN_BARRIER: KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar); break;
+ case PLAIN_BARRIER: KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar); break;
+ case TASKYIELD: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield); break;
+ case TASKWAIT: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait); break;
+ case TASKGROUP: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup); break;
+ default: KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate); break;
+ }
+#endif // KMP_STATS_ENABLED
#endif // OMP_40_ENABLED
#if OMPT_SUPPORT && OMPT_TRACE
@@ -1231,6 +1230,7 @@ __kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_ta
{
(*(task->routine))(gtid, task);
}
+ KMP_POP_PARTITIONED_TIMER();
#if OMPT_SUPPORT && OMPT_TRACE
/* let OMPT know that we're returning to the callee task */
@@ -1369,6 +1369,7 @@ kmp_int32
__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
{
kmp_int32 res;
+ KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
#if KMP_DEBUG
kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
@@ -1392,6 +1393,7 @@ __kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid )
kmp_taskdata_t * taskdata;
kmp_info_t * thread;
int thread_finished = FALSE;
+ KMP_SET_THREAD_STATE_BLOCK(TASKWAIT);
KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref) );
@@ -1481,6 +1483,7 @@ __kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part )
int thread_finished = FALSE;
KMP_COUNT_BLOCK(OMP_TASKYIELD);
+ KMP_SET_THREAD_STATE_BLOCK(TASKYIELD);
KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
gtid, loc_ref, end_part) );
@@ -1561,6 +1564,7 @@ __kmpc_end_taskgroup( ident_t* loc, int gtid )
KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
KMP_DEBUG_ASSERT( taskgroup != NULL );
+ KMP_SET_THREAD_STATE_BLOCK(TASKGROUP);
if ( __kmp_tasking_mode != tskm_immediate_exec ) {
#if USE_ITT_BUILD
diff --git a/openmp/runtime/src/kmp_wait_release.h b/openmp/runtime/src/kmp_wait_release.h
index bed40170e03..a277fbf2725 100644
--- a/openmp/runtime/src/kmp_wait_release.h
+++ b/openmp/runtime/src/kmp_wait_release.h
@@ -18,6 +18,7 @@
#include "kmp.h"
#include "kmp_itt.h"
+#include "kmp_stats.h"
/*!
@defgroup WAIT_RELEASE Wait/Release operations
@@ -104,6 +105,9 @@ __kmp_wait_template(kmp_info_t *this_thr, C *flag, int final_spin
}
th_gtid = this_thr->th.th_info.ds.ds_gtid;
KA_TRACE(20, ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
+#if KMP_STATS_ENABLED
+ stats_state_e thread_state = KMP_GET_THREAD_STATE();
+#endif
#if OMPT_SUPPORT && OMPT_BLAME
ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state;
@@ -223,6 +227,15 @@ __kmp_wait_template(kmp_info_t *this_thr, C *flag, int final_spin
}
}
+#if KMP_STATS_ENABLED
+ // Check if thread has been signalled to idle state
+ // This indicates that the logical "join-barrier" has finished
+ if (this_thr->th.th_stats->isIdle() && KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
+ KMP_SET_THREAD_STATE(IDLE);
+ KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
+ }
+#endif
+
// Don't suspend if KMP_BLOCKTIME is set to "infinite"
if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
continue;
@@ -273,6 +286,14 @@ __kmp_wait_template(kmp_info_t *this_thr, C *flag, int final_spin
}
}
#endif
+#if KMP_STATS_ENABLED
+ // If we were put into idle state, pop that off the state stack
+ if (KMP_GET_THREAD_STATE() == IDLE) {
+ KMP_POP_PARTITIONED_TIMER();
+ KMP_SET_THREAD_STATE(thread_state);
+ this_thr->th.th_stats->resetIdleFlag();
+ }
+#endif
KMP_FSYNC_SPIN_ACQUIRED(spin);
}
@@ -556,6 +577,15 @@ public:
flag_type get_ptr_type() { return flag_oncore; }
};
+// Used to wake up threads, volatile void* flag is usually the th_sleep_loc associated
+// with int gtid.
+static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
+ switch (((kmp_flag_64 *)flag)->get_type()) {
+ case flag32: __kmp_resume_32(gtid, NULL); break;
+ case flag64: __kmp_resume_64(gtid, NULL); break;
+ case flag_oncore: __kmp_resume_oncore(gtid, NULL); break;
+ }
+}
/*!
@}
diff --git a/openmp/runtime/src/z_Linux_util.c b/openmp/runtime/src/z_Linux_util.c
index 1e7683d2402..5d61cd177fe 100644
--- a/openmp/runtime/src/z_Linux_util.c
+++ b/openmp/runtime/src/z_Linux_util.c
@@ -697,6 +697,9 @@ __kmp_launch_worker( void *thr )
#if KMP_STATS_ENABLED
// set __thread local index to point to thread-specific stats
__kmp_stats_thread_ptr = ((kmp_info_t*)thr)->th.th_stats;
+ KMP_START_EXPLICIT_TIMER(OMP_worker_thread_life);
+ KMP_SET_THREAD_STATE(IDLE);
+ KMP_INIT_PARTITIONED_TIMERS(OMP_idle);
#endif
#if USE_ITT_BUILD
@@ -972,8 +975,9 @@ __kmp_create_worker( int gtid, kmp_info_t *th, size_t stack_size )
__kmp_stats_start_time = tsc_tick_count::now();
__kmp_stats_thread_ptr = th->th.th_stats;
__kmp_stats_init();
- KMP_START_EXPLICIT_TIMER(OMP_serial);
- KMP_START_EXPLICIT_TIMER(OMP_start_end);
+ KMP_START_EXPLICIT_TIMER(OMP_worker_thread_life);
+ KMP_SET_THREAD_STATE(SERIAL_REGION);
+ KMP_INIT_PARTITIONED_TIMERS(OMP_serial);
}
__kmp_release_tas_lock(&__kmp_stats_lock, gtid);
@@ -1856,6 +1860,7 @@ void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) {
void
__kmp_resume_monitor()
{
+ KMP_TIME_DEVELOPER_BLOCK(USER_resume);
int status;
#ifdef KMP_DEBUG
int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
OpenPOWER on IntegriCloud