summaryrefslogtreecommitdiffstats
path: root/openmp/runtime/src/kmp_stats.h
diff options
context:
space:
mode:
Diffstat (limited to 'openmp/runtime/src/kmp_stats.h')
-rw-r--r--openmp/runtime/src/kmp_stats.h205
1 files changed, 181 insertions, 24 deletions
diff --git a/openmp/runtime/src/kmp_stats.h b/openmp/runtime/src/kmp_stats.h
index 0377fcf9ed9..e317e7cbf80 100644
--- a/openmp/runtime/src/kmp_stats.h
+++ b/openmp/runtime/src/kmp_stats.h
@@ -27,6 +27,7 @@
#include <limits>
#include <math.h>
+#include <vector>
#include <string>
#include <stdint.h>
#include <new> // placement new
@@ -52,6 +53,23 @@ enum stats_flags_e {
};
/*!
+ * @ingroup STATS_GATHERING
+ * \brief the states which a thread can be in
+ *
+ */
+enum stats_state_e {
+ IDLE,
+ SERIAL_REGION,
+ FORK_JOIN_BARRIER,
+ PLAIN_BARRIER,
+ TASKWAIT,
+ TASKYIELD,
+ TASKGROUP,
+ IMPLICIT_TASK,
+ EXPLICIT_TASK
+};
+
+/*!
* \brief Add new counters under KMP_FOREACH_COUNTER() macro in kmp_stats.h
*
* @param macro a user defined macro that takes three arguments - macro(COUNTER_NAME, flags, arg)
@@ -103,18 +121,25 @@ enum stats_flags_e {
*
* @ingroup STATS_GATHERING2
*/
-#define KMP_FOREACH_TIMER(macro, arg) \
- macro (OMP_start_end, stats_flags_e::onlyInMaster | stats_flags_e::noTotal, arg) \
- macro (OMP_serial, stats_flags_e::onlyInMaster | stats_flags_e::noTotal, arg) \
- macro (OMP_work, 0, arg) \
- macro (OMP_barrier, 0, arg) \
- macro (FOR_static_scheduling, 0, arg) \
- macro (FOR_dynamic_scheduling, 0, arg) \
- macro (OMP_task, 0, arg) \
- macro (OMP_critical, 0, arg) \
- macro (OMP_critical_wait, 0, arg) \
- macro (OMP_single, 0, arg) \
- macro (OMP_master, 0, arg) \
+#define KMP_FOREACH_TIMER(macro, arg) \
+ macro (OMP_worker_thread_life, 0, arg) \
+ macro (FOR_static_scheduling, 0, arg) \
+ macro (FOR_dynamic_scheduling, 0, arg) \
+ macro (OMP_critical, 0, arg) \
+ macro (OMP_critical_wait, 0, arg) \
+ macro (OMP_single, 0, arg) \
+ macro (OMP_master, 0, arg) \
+ macro (OMP_idle, 0, arg) \
+ macro (OMP_plain_barrier, 0, arg) \
+ macro (OMP_fork_join_barrier, 0, arg) \
+ macro (OMP_parallel, 0, arg) \
+ macro (OMP_task_immediate, 0, arg) \
+ macro (OMP_task_taskwait, 0, arg) \
+ macro (OMP_task_taskyield, 0, arg) \
+ macro (OMP_task_taskgroup, 0, arg) \
+ macro (OMP_task_join_bar, 0, arg) \
+ macro (OMP_task_plain_bar, 0, arg) \
+ macro (OMP_serial, 0, arg) \
macro (OMP_set_numthreads, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
macro (OMP_PARALLEL_args, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
macro (FOR_static_iterations, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
@@ -129,7 +154,16 @@ enum stats_flags_e {
// OMP_barrier -- Time at "real" barriers (includes task time)
// FOR_static_scheduling -- Time spent doing scheduling for a static "for"
// FOR_dynamic_scheduling -- Time spent doing scheduling for a dynamic "for"
-// OMP_task -- Time spent executing tasks
+// OMP_idle -- Worker threads time spent waiting for inclusion in a parallel region
+// OMP_plain_barrier -- Time spent in a barrier construct
+// OMP_fork_join_barrier -- Time spent in a the fork-join barrier surrounding a parallel region
+// OMP_parallel -- Time spent inside a parallel construct
+// OMP_task_immediate -- Time spent executing non-deferred tasks
+// OMP_task_taskwait -- Time spent executing tasks inside a taskwait construct
+// OMP_task_taskyield -- Time spent executing tasks inside a taskyield construct
+// OMP_task_taskgroup -- Time spent executing tasks inside a taskygroup construct
+// OMP_task_join_bar -- Time spent executing tasks inside a join barrier
+// OMP_task_plain_bar -- Time spent executing tasks inside a barrier construct
// OMP_single -- Time spent executing a "single" region
// OMP_master -- Time spent executing a "master" region
// OMP_set_numthreads -- Values passed to omp_set_num_threads
@@ -197,12 +231,25 @@ enum stats_flags_e {
*
* @ingroup STATS_GATHERING
*/
-#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) \
- macro(OMP_serial, 0, arg) \
- macro(OMP_start_end, 0, arg) \
- macro(OMP_critical, 0, arg) \
- macro(OMP_single, 0, arg) \
- macro(OMP_master, 0, arg) \
+#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) \
+ macro(OMP_worker_thread_life, 0, arg) \
+ macro(FOR_static_scheduling, 0, arg) \
+ macro(FOR_dynamic_scheduling, 0, arg) \
+ macro(OMP_critical, 0, arg) \
+ macro(OMP_critical_wait, 0, arg) \
+ macro(OMP_single, 0, arg) \
+ macro(OMP_master, 0, arg) \
+ macro(OMP_idle, 0, arg) \
+ macro(OMP_plain_barrier, 0, arg) \
+ macro(OMP_fork_join_barrier, 0, arg) \
+ macro(OMP_parallel, 0, arg) \
+ macro(OMP_task_immediate, 0, arg) \
+ macro(OMP_task_taskwait, 0, arg) \
+ macro(OMP_task_taskyield, 0, arg) \
+ macro(OMP_task_taskgroup, 0, arg) \
+ macro(OMP_task_join_bar, 0, arg) \
+ macro(OMP_task_plain_bar, 0, arg) \
+ macro(OMP_serial, 0, arg) \
KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro,arg) \
macro(LAST, 0, arg)
@@ -227,6 +274,21 @@ enum counter_e {
};
#undef ENUMERATE
+class timerPair {
+ explicit_timer_e timer_index;
+ timer_e timer;
+ public:
+ timerPair(explicit_timer_e ti, timer_e t) : timer_index(ti), timer(t) {}
+ inline explicit_timer_e get_index() const { return timer_index; }
+ inline timer_e get_timer() const { return timer; }
+ bool operator==(const timerPair & rhs) {
+ return this->get_index() == rhs.get_index();
+ }
+ bool operator!=(const timerPair & rhs) {
+ return !(*this == rhs);
+ }
+};
+
class statistic
{
double minVal;
@@ -294,15 +356,19 @@ class explicitTimer
{
timeStat * stat;
tsc_tick_count startTime;
+ tsc_tick_count pauseStartTime;
+ tsc_tick_count::tsc_interval_t totalPauseTime;
public:
- explicitTimer () : stat(0), startTime(0) { }
- explicitTimer (timeStat * s) : stat(s), startTime() { }
+ explicitTimer () : stat(0), startTime(0), pauseStartTime(0), totalPauseTime() { }
+ explicitTimer (timeStat * s) : stat(s), startTime(), pauseStartTime(0), totalPauseTime() { }
void setStat (timeStat *s) { stat = s; }
void start(timer_e timerEnumValue);
+ void pause() { pauseStartTime = tsc_tick_count::now(); }
+ void resume() { totalPauseTime += (tsc_tick_count::now() - pauseStartTime); }
void stop(timer_e timerEnumValue);
- void reset() { startTime = 0; }
+ void reset() { startTime = 0; pauseStartTime = 0; totalPauseTime = 0; }
};
// Where all you need is to time a block, this is enough.
@@ -315,6 +381,49 @@ class blockTimer : public explicitTimer
~blockTimer() { stop(timerEnumValue); }
};
+// Where you need to partition a threads clock ticks into separate states
+// e.g., a partitionedTimers class with two timers of EXECUTING_TASK, and
+// DOING_NOTHING would render these conditions:
+// time(EXECUTING_TASK) + time(DOING_NOTHING) = total time thread is alive
+// No clock tick in the EXECUTING_TASK is a member of DOING_NOTHING and vice versa
+class partitionedTimers
+{
+ private:
+ explicitTimer* timers[EXPLICIT_TIMER_LAST+1];
+ std::vector<timerPair> timer_stack;
+ public:
+ partitionedTimers();
+ void add_timer(explicit_timer_e timer_index, explicitTimer* timer_pointer);
+ void init(timerPair timer_index);
+ void push(timerPair timer_index);
+ void pop();
+ void windup();
+};
+
+// Special wrapper around the partioned timers to aid timing code blocks
+// It avoids the need to have an explicit end, leaving the scope suffices.
+class blockPartitionedTimer
+{
+ partitionedTimers* part_timers;
+ timerPair timer_pair;
+ public:
+ blockPartitionedTimer(partitionedTimers* pt, timerPair tp) : part_timers(pt), timer_pair(tp) { part_timers->push(timer_pair); }
+ ~blockPartitionedTimer() { part_timers->pop(); }
+};
+
+// Special wrapper around the thread state to aid in keeping state in code blocks
+// It avoids the need to have an explicit end, leaving the scope suffices.
+class blockThreadState
+{
+ stats_state_e* state_pointer;
+ stats_state_e old_state;
+ public:
+ blockThreadState(stats_state_e* thread_state_pointer, stats_state_e new_state) : state_pointer(thread_state_pointer), old_state(*thread_state_pointer) {
+ *state_pointer = new_state;
+ }
+ ~blockThreadState() { *state_pointer = old_state; }
+};
+
// If all you want is a count, then you can use this...
// The individual per-thread counts will be aggregated into a statistic at program exit.
class counter
@@ -473,14 +582,19 @@ class kmp_stats_list {
timeStat _timers[TIMER_LAST+1];
counter _counters[COUNTER_LAST+1];
explicitTimer _explicitTimers[EXPLICIT_TIMER_LAST+1];
+ partitionedTimers _partitionedTimers;
int _nestLevel; // one per thread
kmp_stats_event_vector _event_vector;
kmp_stats_list* next;
kmp_stats_list* prev;
+ stats_state_e state;
+ int thread_is_idle_flag;
public:
- kmp_stats_list() : next(this) , prev(this) , _event_vector(), _nestLevel(0) {
+ kmp_stats_list() : _nestLevel(0), _event_vector(), next(this), prev(this),
+ state(IDLE), thread_is_idle_flag(0) {
#define doInit(name,ignore1,ignore2) \
- getExplicitTimer(EXPLICIT_TIMER_##name)->setStat(getTimer(TIMER_##name));
+ getExplicitTimer(EXPLICIT_TIMER_##name)->setStat(getTimer(TIMER_##name)); \
+ _partitionedTimers.add_timer(EXPLICIT_TIMER_##name, getExplicitTimer(EXPLICIT_TIMER_##name));
KMP_FOREACH_EXPLICIT_TIMER(doInit,0);
#undef doInit
}
@@ -488,6 +602,7 @@ class kmp_stats_list {
inline timeStat * getTimer(timer_e idx) { return &_timers[idx]; }
inline counter * getCounter(counter_e idx) { return &_counters[idx]; }
inline explicitTimer * getExplicitTimer(explicit_timer_e idx) { return &_explicitTimers[idx]; }
+ inline partitionedTimers * getPartitionedTimers() { return &_partitionedTimers; }
inline timeStat * getTimers() { return _timers; }
inline counter * getCounters() { return _counters; }
inline explicitTimer * getExplicitTimers() { return _explicitTimers; }
@@ -498,6 +613,12 @@ class kmp_stats_list {
inline void decrementNestValue() { _nestLevel--; }
inline int getGtid() const { return gtid; }
inline void setGtid(int newgtid) { gtid = newgtid; }
+ inline void setState(stats_state_e newstate) { state = newstate; }
+ inline stats_state_e getState() const { return state; }
+ inline stats_state_e * getStatePointer() { return &state; }
+ inline bool isIdle() { return thread_is_idle_flag==1; }
+ inline void setIdleFlag() { thread_is_idle_flag = 1; }
+ inline void resetIdleFlag() { thread_is_idle_flag = 0; }
kmp_stats_list* push_back(int gtid); // returns newly created list node
inline void push_event(uint64_t start_time, uint64_t stop_time, int nest_level, timer_e name) {
_event_vector.push_back(start_time, stop_time, nest_level, name);
@@ -699,6 +820,35 @@ extern kmp_stats_output_module __kmp_stats_output;
__kmp_output_stats(heading_string)
/*!
+ * \brief Initializes the paritioned timers to begin with name.
+ *
+ * @param name timer which you want this thread to begin with
+ *
+ * @ingroup STATS_GATHERING
+*/
+#define KMP_INIT_PARTITIONED_TIMERS(name) \
+ __kmp_stats_thread_ptr->getPartitionedTimers()->init(timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
+
+#define KMP_TIME_PARTITIONED_BLOCK(name) \
+ blockPartitionedTimer __PBLOCKTIME__(__kmp_stats_thread_ptr->getPartitionedTimers(), \
+ timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
+
+#define KMP_PUSH_PARTITIONED_TIMER(name) \
+ __kmp_stats_thread_ptr->getPartitionedTimers()->push(timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
+
+#define KMP_POP_PARTITIONED_TIMER() \
+ __kmp_stats_thread_ptr->getPartitionedTimers()->pop()
+
+#define KMP_SET_THREAD_STATE(state_name) \
+ __kmp_stats_thread_ptr->setState(state_name)
+
+#define KMP_GET_THREAD_STATE() \
+ __kmp_stats_thread_ptr->getState()
+
+#define KMP_SET_THREAD_STATE_BLOCK(state_name) \
+ blockThreadState __BTHREADSTATE__(__kmp_stats_thread_ptr->getStatePointer(), state_name)
+
+/*!
* \brief resets all stats (counters to 0, timers to 0 elapsed ticks)
*
* \details Reset all stats for all threads.
@@ -739,6 +889,13 @@ extern kmp_stats_output_module __kmp_stats_output;
#define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0)
#define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
#define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
+#define KMP_INIT_PARTITIONED_TIMERS(name) ((void)0)
+#define KMP_TIME_PARTITIONED_BLOCK(name) ((void)0)
+#define KMP_PUSH_PARTITIONED_TIMER(name) ((void)0)
+#define KMP_POP_PARTITIONED_TIMER() ((void)0)
+#define KMP_SET_THREAD_STATE(state_name) ((void)0)
+#define KMP_GET_THREAD_STATE() ((void)0)
+#define KMP_SET_THREAD_STATE_BLOCK(state_name) ((void)0)
#endif // KMP_STATS_ENABLED
#endif // KMP_STATS_H
OpenPOWER on IntegriCloud