diff options
Diffstat (limited to 'openmp/runtime/src/kmp_stats.h')
| -rw-r--r-- | openmp/runtime/src/kmp_stats.h | 205 |
1 files changed, 181 insertions, 24 deletions
diff --git a/openmp/runtime/src/kmp_stats.h b/openmp/runtime/src/kmp_stats.h index 0377fcf9ed9..e317e7cbf80 100644 --- a/openmp/runtime/src/kmp_stats.h +++ b/openmp/runtime/src/kmp_stats.h @@ -27,6 +27,7 @@ #include <limits> #include <math.h> +#include <vector> #include <string> #include <stdint.h> #include <new> // placement new @@ -52,6 +53,23 @@ enum stats_flags_e { }; /*! + * @ingroup STATS_GATHERING + * \brief the states which a thread can be in + * + */ +enum stats_state_e { + IDLE, + SERIAL_REGION, + FORK_JOIN_BARRIER, + PLAIN_BARRIER, + TASKWAIT, + TASKYIELD, + TASKGROUP, + IMPLICIT_TASK, + EXPLICIT_TASK +}; + +/*! * \brief Add new counters under KMP_FOREACH_COUNTER() macro in kmp_stats.h * * @param macro a user defined macro that takes three arguments - macro(COUNTER_NAME, flags, arg) @@ -103,18 +121,25 @@ enum stats_flags_e { * * @ingroup STATS_GATHERING2 */ -#define KMP_FOREACH_TIMER(macro, arg) \ - macro (OMP_start_end, stats_flags_e::onlyInMaster | stats_flags_e::noTotal, arg) \ - macro (OMP_serial, stats_flags_e::onlyInMaster | stats_flags_e::noTotal, arg) \ - macro (OMP_work, 0, arg) \ - macro (OMP_barrier, 0, arg) \ - macro (FOR_static_scheduling, 0, arg) \ - macro (FOR_dynamic_scheduling, 0, arg) \ - macro (OMP_task, 0, arg) \ - macro (OMP_critical, 0, arg) \ - macro (OMP_critical_wait, 0, arg) \ - macro (OMP_single, 0, arg) \ - macro (OMP_master, 0, arg) \ +#define KMP_FOREACH_TIMER(macro, arg) \ + macro (OMP_worker_thread_life, 0, arg) \ + macro (FOR_static_scheduling, 0, arg) \ + macro (FOR_dynamic_scheduling, 0, arg) \ + macro (OMP_critical, 0, arg) \ + macro (OMP_critical_wait, 0, arg) \ + macro (OMP_single, 0, arg) \ + macro (OMP_master, 0, arg) \ + macro (OMP_idle, 0, arg) \ + macro (OMP_plain_barrier, 0, arg) \ + macro (OMP_fork_join_barrier, 0, arg) \ + macro (OMP_parallel, 0, arg) \ + macro (OMP_task_immediate, 0, arg) \ + macro (OMP_task_taskwait, 0, arg) \ + macro (OMP_task_taskyield, 0, arg) \ + macro (OMP_task_taskgroup, 0, arg) \ + macro (OMP_task_join_bar, 0, arg) \ + macro (OMP_task_plain_bar, 0, arg) \ + macro (OMP_serial, 0, arg) \ macro (OMP_set_numthreads, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ macro (OMP_PARALLEL_args, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ macro (FOR_static_iterations, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ @@ -129,7 +154,16 @@ enum stats_flags_e { // OMP_barrier -- Time at "real" barriers (includes task time) // FOR_static_scheduling -- Time spent doing scheduling for a static "for" // FOR_dynamic_scheduling -- Time spent doing scheduling for a dynamic "for" -// OMP_task -- Time spent executing tasks +// OMP_idle -- Worker threads time spent waiting for inclusion in a parallel region +// OMP_plain_barrier -- Time spent in a barrier construct +// OMP_fork_join_barrier -- Time spent in a the fork-join barrier surrounding a parallel region +// OMP_parallel -- Time spent inside a parallel construct +// OMP_task_immediate -- Time spent executing non-deferred tasks +// OMP_task_taskwait -- Time spent executing tasks inside a taskwait construct +// OMP_task_taskyield -- Time spent executing tasks inside a taskyield construct +// OMP_task_taskgroup -- Time spent executing tasks inside a taskygroup construct +// OMP_task_join_bar -- Time spent executing tasks inside a join barrier +// OMP_task_plain_bar -- Time spent executing tasks inside a barrier construct // OMP_single -- Time spent executing a "single" region // OMP_master -- Time spent executing a "master" region // OMP_set_numthreads -- Values passed to omp_set_num_threads @@ -197,12 +231,25 @@ enum stats_flags_e { * * @ingroup STATS_GATHERING */ -#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) \ - macro(OMP_serial, 0, arg) \ - macro(OMP_start_end, 0, arg) \ - macro(OMP_critical, 0, arg) \ - macro(OMP_single, 0, arg) \ - macro(OMP_master, 0, arg) \ +#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) \ + macro(OMP_worker_thread_life, 0, arg) \ + macro(FOR_static_scheduling, 0, arg) \ + macro(FOR_dynamic_scheduling, 0, arg) \ + macro(OMP_critical, 0, arg) \ + macro(OMP_critical_wait, 0, arg) \ + macro(OMP_single, 0, arg) \ + macro(OMP_master, 0, arg) \ + macro(OMP_idle, 0, arg) \ + macro(OMP_plain_barrier, 0, arg) \ + macro(OMP_fork_join_barrier, 0, arg) \ + macro(OMP_parallel, 0, arg) \ + macro(OMP_task_immediate, 0, arg) \ + macro(OMP_task_taskwait, 0, arg) \ + macro(OMP_task_taskyield, 0, arg) \ + macro(OMP_task_taskgroup, 0, arg) \ + macro(OMP_task_join_bar, 0, arg) \ + macro(OMP_task_plain_bar, 0, arg) \ + macro(OMP_serial, 0, arg) \ KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro,arg) \ macro(LAST, 0, arg) @@ -227,6 +274,21 @@ enum counter_e { }; #undef ENUMERATE +class timerPair { + explicit_timer_e timer_index; + timer_e timer; + public: + timerPair(explicit_timer_e ti, timer_e t) : timer_index(ti), timer(t) {} + inline explicit_timer_e get_index() const { return timer_index; } + inline timer_e get_timer() const { return timer; } + bool operator==(const timerPair & rhs) { + return this->get_index() == rhs.get_index(); + } + bool operator!=(const timerPair & rhs) { + return !(*this == rhs); + } +}; + class statistic { double minVal; @@ -294,15 +356,19 @@ class explicitTimer { timeStat * stat; tsc_tick_count startTime; + tsc_tick_count pauseStartTime; + tsc_tick_count::tsc_interval_t totalPauseTime; public: - explicitTimer () : stat(0), startTime(0) { } - explicitTimer (timeStat * s) : stat(s), startTime() { } + explicitTimer () : stat(0), startTime(0), pauseStartTime(0), totalPauseTime() { } + explicitTimer (timeStat * s) : stat(s), startTime(), pauseStartTime(0), totalPauseTime() { } void setStat (timeStat *s) { stat = s; } void start(timer_e timerEnumValue); + void pause() { pauseStartTime = tsc_tick_count::now(); } + void resume() { totalPauseTime += (tsc_tick_count::now() - pauseStartTime); } void stop(timer_e timerEnumValue); - void reset() { startTime = 0; } + void reset() { startTime = 0; pauseStartTime = 0; totalPauseTime = 0; } }; // Where all you need is to time a block, this is enough. @@ -315,6 +381,49 @@ class blockTimer : public explicitTimer ~blockTimer() { stop(timerEnumValue); } }; +// Where you need to partition a threads clock ticks into separate states +// e.g., a partitionedTimers class with two timers of EXECUTING_TASK, and +// DOING_NOTHING would render these conditions: +// time(EXECUTING_TASK) + time(DOING_NOTHING) = total time thread is alive +// No clock tick in the EXECUTING_TASK is a member of DOING_NOTHING and vice versa +class partitionedTimers +{ + private: + explicitTimer* timers[EXPLICIT_TIMER_LAST+1]; + std::vector<timerPair> timer_stack; + public: + partitionedTimers(); + void add_timer(explicit_timer_e timer_index, explicitTimer* timer_pointer); + void init(timerPair timer_index); + void push(timerPair timer_index); + void pop(); + void windup(); +}; + +// Special wrapper around the partioned timers to aid timing code blocks +// It avoids the need to have an explicit end, leaving the scope suffices. +class blockPartitionedTimer +{ + partitionedTimers* part_timers; + timerPair timer_pair; + public: + blockPartitionedTimer(partitionedTimers* pt, timerPair tp) : part_timers(pt), timer_pair(tp) { part_timers->push(timer_pair); } + ~blockPartitionedTimer() { part_timers->pop(); } +}; + +// Special wrapper around the thread state to aid in keeping state in code blocks +// It avoids the need to have an explicit end, leaving the scope suffices. +class blockThreadState +{ + stats_state_e* state_pointer; + stats_state_e old_state; + public: + blockThreadState(stats_state_e* thread_state_pointer, stats_state_e new_state) : state_pointer(thread_state_pointer), old_state(*thread_state_pointer) { + *state_pointer = new_state; + } + ~blockThreadState() { *state_pointer = old_state; } +}; + // If all you want is a count, then you can use this... // The individual per-thread counts will be aggregated into a statistic at program exit. class counter @@ -473,14 +582,19 @@ class kmp_stats_list { timeStat _timers[TIMER_LAST+1]; counter _counters[COUNTER_LAST+1]; explicitTimer _explicitTimers[EXPLICIT_TIMER_LAST+1]; + partitionedTimers _partitionedTimers; int _nestLevel; // one per thread kmp_stats_event_vector _event_vector; kmp_stats_list* next; kmp_stats_list* prev; + stats_state_e state; + int thread_is_idle_flag; public: - kmp_stats_list() : next(this) , prev(this) , _event_vector(), _nestLevel(0) { + kmp_stats_list() : _nestLevel(0), _event_vector(), next(this), prev(this), + state(IDLE), thread_is_idle_flag(0) { #define doInit(name,ignore1,ignore2) \ - getExplicitTimer(EXPLICIT_TIMER_##name)->setStat(getTimer(TIMER_##name)); + getExplicitTimer(EXPLICIT_TIMER_##name)->setStat(getTimer(TIMER_##name)); \ + _partitionedTimers.add_timer(EXPLICIT_TIMER_##name, getExplicitTimer(EXPLICIT_TIMER_##name)); KMP_FOREACH_EXPLICIT_TIMER(doInit,0); #undef doInit } @@ -488,6 +602,7 @@ class kmp_stats_list { inline timeStat * getTimer(timer_e idx) { return &_timers[idx]; } inline counter * getCounter(counter_e idx) { return &_counters[idx]; } inline explicitTimer * getExplicitTimer(explicit_timer_e idx) { return &_explicitTimers[idx]; } + inline partitionedTimers * getPartitionedTimers() { return &_partitionedTimers; } inline timeStat * getTimers() { return _timers; } inline counter * getCounters() { return _counters; } inline explicitTimer * getExplicitTimers() { return _explicitTimers; } @@ -498,6 +613,12 @@ class kmp_stats_list { inline void decrementNestValue() { _nestLevel--; } inline int getGtid() const { return gtid; } inline void setGtid(int newgtid) { gtid = newgtid; } + inline void setState(stats_state_e newstate) { state = newstate; } + inline stats_state_e getState() const { return state; } + inline stats_state_e * getStatePointer() { return &state; } + inline bool isIdle() { return thread_is_idle_flag==1; } + inline void setIdleFlag() { thread_is_idle_flag = 1; } + inline void resetIdleFlag() { thread_is_idle_flag = 0; } kmp_stats_list* push_back(int gtid); // returns newly created list node inline void push_event(uint64_t start_time, uint64_t stop_time, int nest_level, timer_e name) { _event_vector.push_back(start_time, stop_time, nest_level, name); @@ -699,6 +820,35 @@ extern kmp_stats_output_module __kmp_stats_output; __kmp_output_stats(heading_string) /*! + * \brief Initializes the paritioned timers to begin with name. + * + * @param name timer which you want this thread to begin with + * + * @ingroup STATS_GATHERING +*/ +#define KMP_INIT_PARTITIONED_TIMERS(name) \ + __kmp_stats_thread_ptr->getPartitionedTimers()->init(timerPair(EXPLICIT_TIMER_##name, TIMER_##name)) + +#define KMP_TIME_PARTITIONED_BLOCK(name) \ + blockPartitionedTimer __PBLOCKTIME__(__kmp_stats_thread_ptr->getPartitionedTimers(), \ + timerPair(EXPLICIT_TIMER_##name, TIMER_##name)) + +#define KMP_PUSH_PARTITIONED_TIMER(name) \ + __kmp_stats_thread_ptr->getPartitionedTimers()->push(timerPair(EXPLICIT_TIMER_##name, TIMER_##name)) + +#define KMP_POP_PARTITIONED_TIMER() \ + __kmp_stats_thread_ptr->getPartitionedTimers()->pop() + +#define KMP_SET_THREAD_STATE(state_name) \ + __kmp_stats_thread_ptr->setState(state_name) + +#define KMP_GET_THREAD_STATE() \ + __kmp_stats_thread_ptr->getState() + +#define KMP_SET_THREAD_STATE_BLOCK(state_name) \ + blockThreadState __BTHREADSTATE__(__kmp_stats_thread_ptr->getStatePointer(), state_name) + +/*! * \brief resets all stats (counters to 0, timers to 0 elapsed ticks) * * \details Reset all stats for all threads. @@ -739,6 +889,13 @@ extern kmp_stats_output_module __kmp_stats_output; #define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0) #define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) #define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) +#define KMP_INIT_PARTITIONED_TIMERS(name) ((void)0) +#define KMP_TIME_PARTITIONED_BLOCK(name) ((void)0) +#define KMP_PUSH_PARTITIONED_TIMER(name) ((void)0) +#define KMP_POP_PARTITIONED_TIMER() ((void)0) +#define KMP_SET_THREAD_STATE(state_name) ((void)0) +#define KMP_GET_THREAD_STATE() ((void)0) +#define KMP_SET_THREAD_STATE_BLOCK(state_name) ((void)0) #endif // KMP_STATS_ENABLED #endif // KMP_STATS_H |

