diff options
| author | Jonathan Peyton <jonathan.l.peyton@intel.com> | 2016-05-05 16:15:57 +0000 |
|---|---|---|
| committer | Jonathan Peyton <jonathan.l.peyton@intel.com> | 2016-05-05 16:15:57 +0000 |
| commit | 11dc82fa83b9e058dee690584d7ba4c0380fbf64 (patch) | |
| tree | a7eda7b548953e1949e86cbe62a0cd5f63366023 /openmp/runtime/src/kmp_stats.h | |
| parent | 3e0b7837bfcabb877e5f1a6747335409d367be76 (diff) | |
| download | bcm5719-llvm-11dc82fa83b9e058dee690584d7ba4c0380fbf64.tar.gz bcm5719-llvm-11dc82fa83b9e058dee690584d7ba4c0380fbf64.zip | |
[STATS] Use partitioned timer scheme
This change removes the current timers with ones that partition time properly.
The current timers are nested, so that if a new timer, B, starts when the
current timer, A, is already timing, A's time will include B's. To eliminate
this problem, the partitioned timers are designed to stop the current timer (A),
let the new timer run (B), and when the new timer is finished, restart the
previously running timer (A). With this partitioning of time, a threads' timers
all sum up to the OMP_worker_thread_life time and can now easily show the
percentage of time a thread is spending in different parts of the runtime or
user code.
There is also a new state variable associated with each thread which tells where
it is executing a task. This corresponds with the timers: OMP_task_*, e.g., if
time is spent in OMP_task_taskwait, then that thread executed tasks inside a
#pragma omp taskwait construct.
The changes are mostly changing the MACROs to use the new PARITIONED_* macros,
the new partitionedTimers class and its methods, and new state logic.
Differential Revision: http://reviews.llvm.org/D19229
llvm-svn: 268640
Diffstat (limited to 'openmp/runtime/src/kmp_stats.h')
| -rw-r--r-- | openmp/runtime/src/kmp_stats.h | 205 |
1 files changed, 181 insertions, 24 deletions
diff --git a/openmp/runtime/src/kmp_stats.h b/openmp/runtime/src/kmp_stats.h index 0377fcf9ed9..e317e7cbf80 100644 --- a/openmp/runtime/src/kmp_stats.h +++ b/openmp/runtime/src/kmp_stats.h @@ -27,6 +27,7 @@ #include <limits> #include <math.h> +#include <vector> #include <string> #include <stdint.h> #include <new> // placement new @@ -52,6 +53,23 @@ enum stats_flags_e { }; /*! + * @ingroup STATS_GATHERING + * \brief the states which a thread can be in + * + */ +enum stats_state_e { + IDLE, + SERIAL_REGION, + FORK_JOIN_BARRIER, + PLAIN_BARRIER, + TASKWAIT, + TASKYIELD, + TASKGROUP, + IMPLICIT_TASK, + EXPLICIT_TASK +}; + +/*! * \brief Add new counters under KMP_FOREACH_COUNTER() macro in kmp_stats.h * * @param macro a user defined macro that takes three arguments - macro(COUNTER_NAME, flags, arg) @@ -103,18 +121,25 @@ enum stats_flags_e { * * @ingroup STATS_GATHERING2 */ -#define KMP_FOREACH_TIMER(macro, arg) \ - macro (OMP_start_end, stats_flags_e::onlyInMaster | stats_flags_e::noTotal, arg) \ - macro (OMP_serial, stats_flags_e::onlyInMaster | stats_flags_e::noTotal, arg) \ - macro (OMP_work, 0, arg) \ - macro (OMP_barrier, 0, arg) \ - macro (FOR_static_scheduling, 0, arg) \ - macro (FOR_dynamic_scheduling, 0, arg) \ - macro (OMP_task, 0, arg) \ - macro (OMP_critical, 0, arg) \ - macro (OMP_critical_wait, 0, arg) \ - macro (OMP_single, 0, arg) \ - macro (OMP_master, 0, arg) \ +#define KMP_FOREACH_TIMER(macro, arg) \ + macro (OMP_worker_thread_life, 0, arg) \ + macro (FOR_static_scheduling, 0, arg) \ + macro (FOR_dynamic_scheduling, 0, arg) \ + macro (OMP_critical, 0, arg) \ + macro (OMP_critical_wait, 0, arg) \ + macro (OMP_single, 0, arg) \ + macro (OMP_master, 0, arg) \ + macro (OMP_idle, 0, arg) \ + macro (OMP_plain_barrier, 0, arg) \ + macro (OMP_fork_join_barrier, 0, arg) \ + macro (OMP_parallel, 0, arg) \ + macro (OMP_task_immediate, 0, arg) \ + macro (OMP_task_taskwait, 0, arg) \ + macro (OMP_task_taskyield, 0, arg) \ + macro (OMP_task_taskgroup, 0, arg) \ + macro (OMP_task_join_bar, 0, arg) \ + macro (OMP_task_plain_bar, 0, arg) \ + macro (OMP_serial, 0, arg) \ macro (OMP_set_numthreads, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ macro (OMP_PARALLEL_args, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ macro (FOR_static_iterations, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ @@ -129,7 +154,16 @@ enum stats_flags_e { // OMP_barrier -- Time at "real" barriers (includes task time) // FOR_static_scheduling -- Time spent doing scheduling for a static "for" // FOR_dynamic_scheduling -- Time spent doing scheduling for a dynamic "for" -// OMP_task -- Time spent executing tasks +// OMP_idle -- Worker threads time spent waiting for inclusion in a parallel region +// OMP_plain_barrier -- Time spent in a barrier construct +// OMP_fork_join_barrier -- Time spent in a the fork-join barrier surrounding a parallel region +// OMP_parallel -- Time spent inside a parallel construct +// OMP_task_immediate -- Time spent executing non-deferred tasks +// OMP_task_taskwait -- Time spent executing tasks inside a taskwait construct +// OMP_task_taskyield -- Time spent executing tasks inside a taskyield construct +// OMP_task_taskgroup -- Time spent executing tasks inside a taskygroup construct +// OMP_task_join_bar -- Time spent executing tasks inside a join barrier +// OMP_task_plain_bar -- Time spent executing tasks inside a barrier construct // OMP_single -- Time spent executing a "single" region // OMP_master -- Time spent executing a "master" region // OMP_set_numthreads -- Values passed to omp_set_num_threads @@ -197,12 +231,25 @@ enum stats_flags_e { * * @ingroup STATS_GATHERING */ -#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) \ - macro(OMP_serial, 0, arg) \ - macro(OMP_start_end, 0, arg) \ - macro(OMP_critical, 0, arg) \ - macro(OMP_single, 0, arg) \ - macro(OMP_master, 0, arg) \ +#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) \ + macro(OMP_worker_thread_life, 0, arg) \ + macro(FOR_static_scheduling, 0, arg) \ + macro(FOR_dynamic_scheduling, 0, arg) \ + macro(OMP_critical, 0, arg) \ + macro(OMP_critical_wait, 0, arg) \ + macro(OMP_single, 0, arg) \ + macro(OMP_master, 0, arg) \ + macro(OMP_idle, 0, arg) \ + macro(OMP_plain_barrier, 0, arg) \ + macro(OMP_fork_join_barrier, 0, arg) \ + macro(OMP_parallel, 0, arg) \ + macro(OMP_task_immediate, 0, arg) \ + macro(OMP_task_taskwait, 0, arg) \ + macro(OMP_task_taskyield, 0, arg) \ + macro(OMP_task_taskgroup, 0, arg) \ + macro(OMP_task_join_bar, 0, arg) \ + macro(OMP_task_plain_bar, 0, arg) \ + macro(OMP_serial, 0, arg) \ KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro,arg) \ macro(LAST, 0, arg) @@ -227,6 +274,21 @@ enum counter_e { }; #undef ENUMERATE +class timerPair { + explicit_timer_e timer_index; + timer_e timer; + public: + timerPair(explicit_timer_e ti, timer_e t) : timer_index(ti), timer(t) {} + inline explicit_timer_e get_index() const { return timer_index; } + inline timer_e get_timer() const { return timer; } + bool operator==(const timerPair & rhs) { + return this->get_index() == rhs.get_index(); + } + bool operator!=(const timerPair & rhs) { + return !(*this == rhs); + } +}; + class statistic { double minVal; @@ -294,15 +356,19 @@ class explicitTimer { timeStat * stat; tsc_tick_count startTime; + tsc_tick_count pauseStartTime; + tsc_tick_count::tsc_interval_t totalPauseTime; public: - explicitTimer () : stat(0), startTime(0) { } - explicitTimer (timeStat * s) : stat(s), startTime() { } + explicitTimer () : stat(0), startTime(0), pauseStartTime(0), totalPauseTime() { } + explicitTimer (timeStat * s) : stat(s), startTime(), pauseStartTime(0), totalPauseTime() { } void setStat (timeStat *s) { stat = s; } void start(timer_e timerEnumValue); + void pause() { pauseStartTime = tsc_tick_count::now(); } + void resume() { totalPauseTime += (tsc_tick_count::now() - pauseStartTime); } void stop(timer_e timerEnumValue); - void reset() { startTime = 0; } + void reset() { startTime = 0; pauseStartTime = 0; totalPauseTime = 0; } }; // Where all you need is to time a block, this is enough. @@ -315,6 +381,49 @@ class blockTimer : public explicitTimer ~blockTimer() { stop(timerEnumValue); } }; +// Where you need to partition a threads clock ticks into separate states +// e.g., a partitionedTimers class with two timers of EXECUTING_TASK, and +// DOING_NOTHING would render these conditions: +// time(EXECUTING_TASK) + time(DOING_NOTHING) = total time thread is alive +// No clock tick in the EXECUTING_TASK is a member of DOING_NOTHING and vice versa +class partitionedTimers +{ + private: + explicitTimer* timers[EXPLICIT_TIMER_LAST+1]; + std::vector<timerPair> timer_stack; + public: + partitionedTimers(); + void add_timer(explicit_timer_e timer_index, explicitTimer* timer_pointer); + void init(timerPair timer_index); + void push(timerPair timer_index); + void pop(); + void windup(); +}; + +// Special wrapper around the partioned timers to aid timing code blocks +// It avoids the need to have an explicit end, leaving the scope suffices. +class blockPartitionedTimer +{ + partitionedTimers* part_timers; + timerPair timer_pair; + public: + blockPartitionedTimer(partitionedTimers* pt, timerPair tp) : part_timers(pt), timer_pair(tp) { part_timers->push(timer_pair); } + ~blockPartitionedTimer() { part_timers->pop(); } +}; + +// Special wrapper around the thread state to aid in keeping state in code blocks +// It avoids the need to have an explicit end, leaving the scope suffices. +class blockThreadState +{ + stats_state_e* state_pointer; + stats_state_e old_state; + public: + blockThreadState(stats_state_e* thread_state_pointer, stats_state_e new_state) : state_pointer(thread_state_pointer), old_state(*thread_state_pointer) { + *state_pointer = new_state; + } + ~blockThreadState() { *state_pointer = old_state; } +}; + // If all you want is a count, then you can use this... // The individual per-thread counts will be aggregated into a statistic at program exit. class counter @@ -473,14 +582,19 @@ class kmp_stats_list { timeStat _timers[TIMER_LAST+1]; counter _counters[COUNTER_LAST+1]; explicitTimer _explicitTimers[EXPLICIT_TIMER_LAST+1]; + partitionedTimers _partitionedTimers; int _nestLevel; // one per thread kmp_stats_event_vector _event_vector; kmp_stats_list* next; kmp_stats_list* prev; + stats_state_e state; + int thread_is_idle_flag; public: - kmp_stats_list() : next(this) , prev(this) , _event_vector(), _nestLevel(0) { + kmp_stats_list() : _nestLevel(0), _event_vector(), next(this), prev(this), + state(IDLE), thread_is_idle_flag(0) { #define doInit(name,ignore1,ignore2) \ - getExplicitTimer(EXPLICIT_TIMER_##name)->setStat(getTimer(TIMER_##name)); + getExplicitTimer(EXPLICIT_TIMER_##name)->setStat(getTimer(TIMER_##name)); \ + _partitionedTimers.add_timer(EXPLICIT_TIMER_##name, getExplicitTimer(EXPLICIT_TIMER_##name)); KMP_FOREACH_EXPLICIT_TIMER(doInit,0); #undef doInit } @@ -488,6 +602,7 @@ class kmp_stats_list { inline timeStat * getTimer(timer_e idx) { return &_timers[idx]; } inline counter * getCounter(counter_e idx) { return &_counters[idx]; } inline explicitTimer * getExplicitTimer(explicit_timer_e idx) { return &_explicitTimers[idx]; } + inline partitionedTimers * getPartitionedTimers() { return &_partitionedTimers; } inline timeStat * getTimers() { return _timers; } inline counter * getCounters() { return _counters; } inline explicitTimer * getExplicitTimers() { return _explicitTimers; } @@ -498,6 +613,12 @@ class kmp_stats_list { inline void decrementNestValue() { _nestLevel--; } inline int getGtid() const { return gtid; } inline void setGtid(int newgtid) { gtid = newgtid; } + inline void setState(stats_state_e newstate) { state = newstate; } + inline stats_state_e getState() const { return state; } + inline stats_state_e * getStatePointer() { return &state; } + inline bool isIdle() { return thread_is_idle_flag==1; } + inline void setIdleFlag() { thread_is_idle_flag = 1; } + inline void resetIdleFlag() { thread_is_idle_flag = 0; } kmp_stats_list* push_back(int gtid); // returns newly created list node inline void push_event(uint64_t start_time, uint64_t stop_time, int nest_level, timer_e name) { _event_vector.push_back(start_time, stop_time, nest_level, name); @@ -699,6 +820,35 @@ extern kmp_stats_output_module __kmp_stats_output; __kmp_output_stats(heading_string) /*! + * \brief Initializes the paritioned timers to begin with name. + * + * @param name timer which you want this thread to begin with + * + * @ingroup STATS_GATHERING +*/ +#define KMP_INIT_PARTITIONED_TIMERS(name) \ + __kmp_stats_thread_ptr->getPartitionedTimers()->init(timerPair(EXPLICIT_TIMER_##name, TIMER_##name)) + +#define KMP_TIME_PARTITIONED_BLOCK(name) \ + blockPartitionedTimer __PBLOCKTIME__(__kmp_stats_thread_ptr->getPartitionedTimers(), \ + timerPair(EXPLICIT_TIMER_##name, TIMER_##name)) + +#define KMP_PUSH_PARTITIONED_TIMER(name) \ + __kmp_stats_thread_ptr->getPartitionedTimers()->push(timerPair(EXPLICIT_TIMER_##name, TIMER_##name)) + +#define KMP_POP_PARTITIONED_TIMER() \ + __kmp_stats_thread_ptr->getPartitionedTimers()->pop() + +#define KMP_SET_THREAD_STATE(state_name) \ + __kmp_stats_thread_ptr->setState(state_name) + +#define KMP_GET_THREAD_STATE() \ + __kmp_stats_thread_ptr->getState() + +#define KMP_SET_THREAD_STATE_BLOCK(state_name) \ + blockThreadState __BTHREADSTATE__(__kmp_stats_thread_ptr->getStatePointer(), state_name) + +/*! * \brief resets all stats (counters to 0, timers to 0 elapsed ticks) * * \details Reset all stats for all threads. @@ -739,6 +889,13 @@ extern kmp_stats_output_module __kmp_stats_output; #define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0) #define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) #define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) +#define KMP_INIT_PARTITIONED_TIMERS(name) ((void)0) +#define KMP_TIME_PARTITIONED_BLOCK(name) ((void)0) +#define KMP_PUSH_PARTITIONED_TIMER(name) ((void)0) +#define KMP_POP_PARTITIONED_TIMER() ((void)0) +#define KMP_SET_THREAD_STATE(state_name) ((void)0) +#define KMP_GET_THREAD_STATE() ((void)0) +#define KMP_SET_THREAD_STATE_BLOCK(state_name) ((void)0) #endif // KMP_STATS_ENABLED #endif // KMP_STATS_H |

