summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--openmp/runtime/CMakeLists.txt4
-rw-r--r--openmp/runtime/src/CMakeLists.txt5
-rw-r--r--openmp/runtime/src/kmp_barrier.cpp77
-rw-r--r--openmp/runtime/src/kmp_cancel.cpp4
-rw-r--r--openmp/runtime/src/kmp_csupport.c33
-rw-r--r--openmp/runtime/src/kmp_dispatch.cpp29
-rw-r--r--openmp/runtime/src/kmp_runtime.c39
-rw-r--r--openmp/runtime/src/kmp_sched.cpp7
-rw-r--r--openmp/runtime/src/kmp_stats.cpp8
-rw-r--r--openmp/runtime/src/kmp_stats.h147
-rw-r--r--openmp/runtime/src/kmp_tasking.c7
-rw-r--r--openmp/runtime/src/z_Linux_util.c4
12 files changed, 233 insertions, 131 deletions
diff --git a/openmp/runtime/CMakeLists.txt b/openmp/runtime/CMakeLists.txt
index 3371248caca..1d814f022ee 100644
--- a/openmp/runtime/CMakeLists.txt
+++ b/openmp/runtime/CMakeLists.txt
@@ -254,6 +254,10 @@ set(LIBOMP_STATS FALSE CACHE BOOL
if(LIBOMP_STATS AND (NOT LIBOMP_HAVE_STATS))
libomp_error_say("Stats-gathering functionality requested but not available")
endif()
+# The stats functionality requires the std c++ library
+if(LIBOMP_STATS)
+ set(LIBOMP_USE_STDCPPLIB TRUE)
+endif()
# OMPT-support
# TODO: Make this a real feature check
diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt
index a4e84513544..637f45b75d9 100644
--- a/openmp/runtime/src/CMakeLists.txt
+++ b/openmp/runtime/src/CMakeLists.txt
@@ -149,7 +149,10 @@ endif()
# Remove any cmake-automatic linking of the standard C++ library.
# We neither need (nor want) the standard C++ library dependency even though we compile c++ files.
if(NOT ${LIBOMP_USE_STDCPPLIB})
+ set(LIBOMP_LINKER_LANGUAGE C)
set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES)
+else()
+ set(LIBOMP_LINKER_LANGUAGE CXX)
endif()
# Add the OpenMP library
@@ -158,7 +161,7 @@ add_library(omp SHARED ${LIBOMP_SOURCE_FILES})
set_target_properties(omp PROPERTIES
PREFIX "" SUFFIX "" OUTPUT_NAME "${LIBOMP_LIB_FILE}"
LINK_FLAGS "${LIBOMP_CONFIGURED_LDFLAGS}"
- LINKER_LANGUAGE C # use C Compiler for linking step
+ LINKER_LANGUAGE ${LIBOMP_LINKER_LANGUAGE}
SKIP_BUILD_RPATH true # have Mac linker -install_name just be "-install_name libomp.dylib"
)
diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp
index e6c4e8adfb9..0d9c7668b01 100644
--- a/openmp/runtime/src/kmp_barrier.cpp
+++ b/openmp/runtime/src/kmp_barrier.cpp
@@ -46,7 +46,7 @@ __kmp_linear_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid
void (*reduce)(void *, void *)
USE_ITT_BUILD_ARG(void * itt_sync_obj) )
{
- KMP_TIME_BLOCK(KMP_linear_gather);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_linear_gather);
register kmp_team_t *team = this_thr->th.th_team;
register kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb;
register kmp_info_t **other_threads = team->t.t_threads;
@@ -123,7 +123,7 @@ __kmp_linear_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gti
int propagate_icvs
USE_ITT_BUILD_ARG(void *itt_sync_obj) )
{
- KMP_TIME_BLOCK(KMP_linear_release);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_linear_release);
register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
register kmp_team_t *team;
@@ -141,17 +141,18 @@ __kmp_linear_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gti
if (nproc > 1) {
#if KMP_BARRIER_ICV_PUSH
- KMP_START_EXPLICIT_TIMER(USER_icv_copy);
- if (propagate_icvs) {
- ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs);
- for (i=1; i<nproc; ++i) {
- __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[i], team, i, FALSE);
- ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs,
- &team->t.t_implicit_task_taskdata[0].td_icvs);
+ {
+ KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
+ if (propagate_icvs) {
+ ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs);
+ for (i=1; i<nproc; ++i) {
+ __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[i], team, i, FALSE);
+ ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs,
+ &team->t.t_implicit_task_taskdata[0].td_icvs);
+ }
+ ngo_sync();
}
- ngo_sync();
}
- KMP_STOP_EXPLICIT_TIMER(USER_icv_copy);
#endif // KMP_BARRIER_ICV_PUSH
// Now, release all of the worker threads
@@ -217,7 +218,7 @@ __kmp_tree_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
void (*reduce)(void *, void *)
USE_ITT_BUILD_ARG(void *itt_sync_obj) )
{
- KMP_TIME_BLOCK(KMP_tree_gather);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_tree_gather);
register kmp_team_t *team = this_thr->th.th_team;
register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
register kmp_info_t **other_threads = team->t.t_threads;
@@ -312,7 +313,7 @@ __kmp_tree_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
int propagate_icvs
USE_ITT_BUILD_ARG(void *itt_sync_obj) )
{
- KMP_TIME_BLOCK(KMP_tree_release);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_tree_release);
register kmp_team_t *team;
register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
register kmp_uint32 nproc;
@@ -381,14 +382,15 @@ __kmp_tree_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
#endif /* KMP_CACHE_MANAGE */
#if KMP_BARRIER_ICV_PUSH
- KMP_START_EXPLICIT_TIMER(USER_icv_copy);
- if (propagate_icvs) {
- __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[child_tid],
- team, child_tid, FALSE);
- copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs,
- &team->t.t_implicit_task_taskdata[0].td_icvs);
+ {
+ KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
+ if (propagate_icvs) {
+ __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[child_tid],
+ team, child_tid, FALSE);
+ copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs,
+ &team->t.t_implicit_task_taskdata[0].td_icvs);
+ }
}
- KMP_STOP_EXPLICIT_TIMER(USER_icv_copy);
#endif // KMP_BARRIER_ICV_PUSH
KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid,
@@ -414,7 +416,7 @@ __kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
void (*reduce)(void *, void *)
USE_ITT_BUILD_ARG(void *itt_sync_obj) )
{
- KMP_TIME_BLOCK(KMP_hyper_gather);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_hyper_gather);
register kmp_team_t *team = this_thr->th.th_team;
register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
register kmp_info_t **other_threads = team->t.t_threads;
@@ -520,7 +522,7 @@ __kmp_hyper_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid
int propagate_icvs
USE_ITT_BUILD_ARG(void *itt_sync_obj) )
{
- KMP_TIME_BLOCK(KMP_hyper_release);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_hyper_release);
register kmp_team_t *team;
register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
register kmp_info_t **other_threads;
@@ -725,7 +727,7 @@ __kmp_hierarchical_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr,
int gtid, int tid, void (*reduce) (void *, void *)
USE_ITT_BUILD_ARG(void * itt_sync_obj) )
{
- KMP_TIME_BLOCK(KMP_hier_gather);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_hier_gather);
register kmp_team_t *team = this_thr->th.th_team;
register kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb;
register kmp_uint32 nproc = this_thr->th.th_team_nproc;
@@ -853,7 +855,7 @@ __kmp_hierarchical_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, i
int propagate_icvs
USE_ITT_BUILD_ARG(void * itt_sync_obj) )
{
- KMP_TIME_BLOCK(KMP_hier_release);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_hier_release);
register kmp_team_t *team;
register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
register kmp_uint32 nproc;
@@ -1035,7 +1037,7 @@ int
__kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size,
void *reduce_data, void (*reduce)(void *, void *))
{
- KMP_TIME_BLOCK(KMP_barrier);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_barrier);
register int tid = __kmp_tid_from_gtid(gtid);
register kmp_info_t *this_thr = __kmp_threads[gtid];
register kmp_team_t *team = this_thr->th.th_team;
@@ -1294,7 +1296,7 @@ __kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size,
void
__kmp_end_split_barrier(enum barrier_type bt, int gtid)
{
- KMP_TIME_BLOCK(KMP_end_split_barrier);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_end_split_barrier);
int tid = __kmp_tid_from_gtid(gtid);
kmp_info_t *this_thr = __kmp_threads[gtid];
kmp_team_t *team = this_thr->th.th_team;
@@ -1335,7 +1337,7 @@ __kmp_end_split_barrier(enum barrier_type bt, int gtid)
void
__kmp_join_barrier(int gtid)
{
- KMP_TIME_BLOCK(KMP_join_barrier);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_join_barrier);
register kmp_info_t *this_thr = __kmp_threads[gtid];
register kmp_team_t *team;
register kmp_uint nproc;
@@ -1533,7 +1535,7 @@ __kmp_join_barrier(int gtid)
void
__kmp_fork_barrier(int gtid, int tid)
{
- KMP_TIME_BLOCK(KMP_fork_barrier);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_fork_barrier);
kmp_info_t *this_thr = __kmp_threads[gtid];
kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL;
#if USE_ITT_BUILD
@@ -1648,15 +1650,16 @@ __kmp_fork_barrier(int gtid, int tid)
this data before this function is called. We cannot modify __kmp_fork_call() to look at
the fixed ICVs in the master's thread struct, because it is not always the case that the
threads arrays have been allocated when __kmp_fork_call() is executed. */
- KMP_START_EXPLICIT_TIMER(USER_icv_copy);
- if (!KMP_MASTER_TID(tid)) { // master thread already has ICVs
- // Copy the initial ICVs from the master's thread struct to the implicit task for this tid.
- KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid));
- __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE);
- copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
- &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs);
+ {
+ KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
+ if (!KMP_MASTER_TID(tid)) { // master thread already has ICVs
+ // Copy the initial ICVs from the master's thread struct to the implicit task for this tid.
+ KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid));
+ __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE);
+ copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
+ &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs);
+ }
}
- KMP_STOP_EXPLICIT_TIMER(USER_icv_copy);
#endif // KMP_BARRIER_ICV_PULL
if (__kmp_tasking_mode != tskm_immediate_exec) {
@@ -1702,7 +1705,7 @@ __kmp_fork_barrier(int gtid, int tid)
void
__kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, kmp_internal_control_t *new_icvs, ident_t *loc )
{
- KMP_TIME_BLOCK(KMP_setup_icv_copy);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_setup_icv_copy);
KMP_DEBUG_ASSERT(team && new_nproc && new_icvs);
KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
diff --git a/openmp/runtime/src/kmp_cancel.cpp b/openmp/runtime/src/kmp_cancel.cpp
index e5a76d26951..d1eb00c6649 100644
--- a/openmp/runtime/src/kmp_cancel.cpp
+++ b/openmp/runtime/src/kmp_cancel.cpp
@@ -58,7 +58,7 @@ kmp_int32 __kmpc_cancel(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind) {
break;
}
case cancel_taskgroup:
- // cancellation requests for parallel and worksharing constructs
+ // cancellation requests for a task group
// are handled through the taskgroup structure
{
kmp_taskdata_t* task;
@@ -141,7 +141,7 @@ kmp_int32 __kmpc_cancellationpoint(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 c
break;
}
case cancel_taskgroup:
- // cancellation requests for parallel and worksharing constructs
+ // cancellation requests for a task group
// are handled through the taskgroup structure
{
kmp_taskdata_t* task;
diff --git a/openmp/runtime/src/kmp_csupport.c b/openmp/runtime/src/kmp_csupport.c
index 1c8c5fb7d1b..e6e98130ca4 100644
--- a/openmp/runtime/src/kmp_csupport.c
+++ b/openmp/runtime/src/kmp_csupport.c
@@ -280,9 +280,21 @@ Do the actual fork and call the microtask in the relevant number of threads.
void
__kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
{
- KMP_STOP_EXPLICIT_TIMER(OMP_serial);
- KMP_COUNT_BLOCK(OMP_PARALLEL);
int gtid = __kmp_entry_gtid();
+
+#if (KMP_STATS_ENABLED)
+ int inParallel = __kmpc_in_parallel(loc);
+ if (inParallel)
+ {
+ KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
+ }
+ else
+ {
+ KMP_STOP_EXPLICIT_TIMER(OMP_serial);
+ KMP_COUNT_BLOCK(OMP_PARALLEL);
+ }
+#endif
+
// maybe to save thr_state is enough here
{
va_list ap;
@@ -329,7 +341,10 @@ __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
}
#endif
}
- KMP_START_EXPLICIT_TIMER(OMP_serial);
+#if (KMP_STATS_ENABLED)
+ if (!inParallel)
+ KMP_START_EXPLICIT_TIMER(OMP_serial);
+#endif
}
#if OMP_40_ENABLED
@@ -370,6 +385,8 @@ __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
va_list ap;
va_start( ap, microtask );
+ KMP_COUNT_BLOCK(OMP_TEAMS);
+
// remember teams entry point and nesting level
this_thr->th.th_teams_microtask = microtask;
this_thr->th.th_teams_level = this_thr->th.th_team->t.t_level; // AC: can be >0 on host
@@ -715,8 +732,10 @@ __kmpc_master(ident_t *loc, kmp_int32 global_tid)
if( ! TCR_4( __kmp_init_parallel ) )
__kmp_parallel_initialize();
- if( KMP_MASTER_GTID( global_tid ))
+ if( KMP_MASTER_GTID( global_tid )) {
+ KMP_START_EXPLICIT_TIMER(OMP_master);
status = 1;
+ }
#if OMPT_SUPPORT && OMPT_TRACE
if (status) {
@@ -764,6 +783,7 @@ __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
KC_TRACE( 10, ("__kmpc_end_master: called T#%d\n", global_tid ) );
KMP_DEBUG_ASSERT( KMP_MASTER_GTID( global_tid ));
+ KMP_STOP_EXPLICIT_TIMER(OMP_master);
#if OMPT_SUPPORT && OMPT_TRACE
kmp_info_t *this_thr = __kmp_threads[ global_tid ];
@@ -1386,6 +1406,9 @@ __kmpc_single(ident_t *loc, kmp_int32 global_tid)
{
KMP_COUNT_BLOCK(OMP_SINGLE);
kmp_int32 rc = __kmp_enter_single( global_tid, loc, TRUE );
+ if(rc == TRUE) {
+ KMP_START_EXPLICIT_TIMER(OMP_single);
+ }
#if OMPT_SUPPORT && OMPT_TRACE
kmp_info_t *this_thr = __kmp_threads[ global_tid ];
@@ -1427,6 +1450,7 @@ void
__kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
{
__kmp_exit_single( global_tid );
+ KMP_STOP_EXPLICIT_TIMER(OMP_single);
#if OMPT_SUPPORT && OMPT_TRACE
kmp_info_t *this_thr = __kmp_threads[ global_tid ];
@@ -2191,7 +2215,6 @@ int
__kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
{
KMP_COUNT_BLOCK(OMP_test_lock);
- KMP_TIME_BLOCK(OMP_test_lock);
#if KMP_USE_DYNAMIC_LOCK
int rc;
diff --git a/openmp/runtime/src/kmp_dispatch.cpp b/openmp/runtime/src/kmp_dispatch.cpp
index 65abcf7fc43..ba87a5407cb 100644
--- a/openmp/runtime/src/kmp_dispatch.cpp
+++ b/openmp/runtime/src/kmp_dispatch.cpp
@@ -670,6 +670,7 @@ __kmp_dispatch_init(
} else {
pr->ordered = FALSE;
}
+
if ( schedule == kmp_sch_static ) {
schedule = __kmp_static;
} else {
@@ -761,6 +762,19 @@ __kmp_dispatch_init(
tc = 0; // zero-trip
}
+ // Any half-decent optimizer will remove this test when the blocks are empty since the macros expand to nothing
+ // when statistics are disabled.
+ if (schedule == __kmp_static)
+ {
+ KMP_COUNT_BLOCK(OMP_FOR_static);
+ KMP_COUNT_VALUE(FOR_static_iterations, tc);
+ }
+ else
+ {
+ KMP_COUNT_BLOCK(OMP_FOR_dynamic);
+ KMP_COUNT_VALUE(FOR_dynamic_iterations, tc);
+ }
+
pr->u.p.lb = lb;
pr->u.p.ub = ub;
pr->u.p.st = st;
@@ -1384,6 +1398,11 @@ __kmp_dispatch_next(
static const int ___kmp_size_type = sizeof( UT );
#endif
+ // This is potentially slightly misleading, schedule(runtime) will appear here even if the actual runtme schedule
+ // is static. (Which points out a disadavantage of schedule(runtime): even when static scheduling is used it costs
+ // more than a compile time choice to use static scheduling would.)
+ KMP_TIME_BLOCK(FOR_dynamic_scheduling);
+
int status;
dispatch_private_info_template< T > * pr;
kmp_info_t * th = __kmp_threads[ gtid ];
@@ -2164,7 +2183,6 @@ __kmp_dist_get_bounds(
T *pupper,
typename traits_t< T >::signed_t incr
) {
- KMP_COUNT_BLOCK(OMP_DISTR_FOR_dynamic);
typedef typename traits_t< T >::unsigned_t UT;
typedef typename traits_t< T >::signed_t ST;
register kmp_uint32 team_id;
@@ -2222,6 +2240,7 @@ __kmp_dist_get_bounds(
} else {
trip_count = (ST)(*pupper - *plower) / incr + 1; // cast to signed to cover incr<0 case
}
+
if( trip_count <= nteams ) {
KMP_DEBUG_ASSERT(
__kmp_static == kmp_sch_static_greedy || \
@@ -2297,7 +2316,6 @@ void
__kmpc_dispatch_init_4( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk )
{
- KMP_COUNT_BLOCK(OMP_FOR_dynamic);
KMP_DEBUG_ASSERT( __kmp_init_serial );
__kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
}
@@ -2308,7 +2326,6 @@ void
__kmpc_dispatch_init_4u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk )
{
- KMP_COUNT_BLOCK(OMP_FOR_dynamic);
KMP_DEBUG_ASSERT( __kmp_init_serial );
__kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
}
@@ -2321,7 +2338,6 @@ __kmpc_dispatch_init_8( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
kmp_int64 lb, kmp_int64 ub,
kmp_int64 st, kmp_int64 chunk )
{
- KMP_COUNT_BLOCK(OMP_FOR_dynamic);
KMP_DEBUG_ASSERT( __kmp_init_serial );
__kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
}
@@ -2334,7 +2350,6 @@ __kmpc_dispatch_init_8u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
kmp_uint64 lb, kmp_uint64 ub,
kmp_int64 st, kmp_int64 chunk )
{
- KMP_COUNT_BLOCK(OMP_FOR_dynamic);
KMP_DEBUG_ASSERT( __kmp_init_serial );
__kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
}
@@ -2352,7 +2367,6 @@ void
__kmpc_dist_dispatch_init_4( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
kmp_int32 *p_last, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk )
{
- KMP_COUNT_BLOCK(OMP_FOR_dynamic);
KMP_DEBUG_ASSERT( __kmp_init_serial );
__kmp_dist_get_bounds< kmp_int32 >( loc, gtid, p_last, &lb, &ub, st );
__kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
@@ -2362,7 +2376,6 @@ void
__kmpc_dist_dispatch_init_4u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
kmp_int32 *p_last, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk )
{
- KMP_COUNT_BLOCK(OMP_FOR_dynamic);
KMP_DEBUG_ASSERT( __kmp_init_serial );
__kmp_dist_get_bounds< kmp_uint32 >( loc, gtid, p_last, &lb, &ub, st );
__kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
@@ -2372,7 +2385,6 @@ void
__kmpc_dist_dispatch_init_8( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
kmp_int32 *p_last, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk )
{
- KMP_COUNT_BLOCK(OMP_FOR_dynamic);
KMP_DEBUG_ASSERT( __kmp_init_serial );
__kmp_dist_get_bounds< kmp_int64 >( loc, gtid, p_last, &lb, &ub, st );
__kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
@@ -2382,7 +2394,6 @@ void
__kmpc_dist_dispatch_init_8u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
kmp_int32 *p_last, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk )
{
- KMP_COUNT_BLOCK(OMP_FOR_dynamic);
KMP_DEBUG_ASSERT( __kmp_init_serial );
__kmp_dist_get_bounds< kmp_uint64 >( loc, gtid, p_last, &lb, &ub, st );
__kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
diff --git a/openmp/runtime/src/kmp_runtime.c b/openmp/runtime/src/kmp_runtime.c
index 8b90177a32a..70e4150487c 100644
--- a/openmp/runtime/src/kmp_runtime.c
+++ b/openmp/runtime/src/kmp_runtime.c
@@ -1495,7 +1495,8 @@ __kmp_fork_call(
kmp_hot_team_ptr_t **p_hot_teams;
#endif
{ // KMP_TIME_BLOCK
- KMP_TIME_BLOCK(KMP_fork_call);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_fork_call);
+ KMP_COUNT_VALUE(OMP_PARALLEL_args, argc);
KA_TRACE( 20, ("__kmp_fork_call: enter T#%d\n", gtid ));
if ( __kmp_stkpadding > 0 && __kmp_root[gtid] != NULL ) {
@@ -1620,12 +1621,14 @@ __kmp_fork_call(
}
#endif
- KMP_TIME_BLOCK(OMP_work);
- __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
+ {
+ KMP_TIME_BLOCK(OMP_work);
+ __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
#if OMPT_SUPPORT
- , exit_runtime_p
+ , exit_runtime_p
#endif
- );
+ );
+ }
#if OMPT_SUPPORT
if (ompt_status & ompt_status_track) {
@@ -2224,8 +2227,8 @@ __kmp_fork_call(
} // END of timer KMP_fork_call block
{
- //KMP_TIME_BLOCK(OMP_work);
- KMP_TIME_BLOCK(USER_master_invoke);
+ KMP_TIME_BLOCK(OMP_work);
+ // KMP_TIME_DEVELOPER_BLOCK(USER_master_invoke);
if (! team->t.t_invoke( gtid )) {
KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
}
@@ -2280,7 +2283,7 @@ __kmp_join_call(ident_t *loc, int gtid, enum fork_context_e fork_context
#endif /* OMP_40_ENABLED */
)
{
- KMP_TIME_BLOCK(KMP_join_call);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_join_call);
kmp_team_t *team;
kmp_team_t *parent_team;
kmp_info_t *master_th;
@@ -2582,6 +2585,7 @@ __kmp_set_num_threads( int new_nth, int gtid )
else if (new_nth > __kmp_max_nth)
new_nth = __kmp_max_nth;
+ KMP_COUNT_VALUE(OMP_set_numthreads, new_nth);
thread = __kmp_threads[gtid];
__kmp_save_internal_controls( thread );
@@ -4790,7 +4794,7 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
kmp_internal_control_t *new_icvs,
int argc USE_NESTED_HOT_ARG(kmp_info_t *master) )
{
- KMP_TIME_BLOCK(KMP_allocate_team);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_allocate_team);
int f;
kmp_team_t *team;
int use_hot_team = ! root->r.r_active;
@@ -5577,12 +5581,12 @@ __kmp_launch_thread( kmp_info_t *this_thr )
}
#endif
- KMP_STOP_EXPLICIT_TIMER(USER_launch_thread_loop);
+ KMP_STOP_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
{
- KMP_TIME_BLOCK(USER_worker_invoke);
+ KMP_TIME_DEVELOPER_BLOCK(USER_worker_invoke);
rc = (*pteam)->t.t_invoke( gtid );
}
- KMP_START_EXPLICIT_TIMER(USER_launch_thread_loop);
+ KMP_START_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
KMP_ASSERT( rc );
#if OMPT_SUPPORT
@@ -6910,12 +6914,15 @@ __kmp_invoke_task_func( int gtid )
#endif
#endif
- rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
- gtid, tid, (int) team->t.t_argc, (void **) team->t.t_argv
+ {
+ KMP_TIME_BLOCK(OMP_work);
+ rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
+ gtid, tid, (int) team->t.t_argc, (void **) team->t.t_argv
#if OMPT_SUPPORT
- , exit_runtime_p
+ , exit_runtime_p
#endif
- );
+ );
+ }
#if OMPT_SUPPORT && OMPT_TRACE
if (ompt_status & ompt_status_track) {
diff --git a/openmp/runtime/src/kmp_sched.cpp b/openmp/runtime/src/kmp_sched.cpp
index 0821f3805a4..4eec41b9d44 100644
--- a/openmp/runtime/src/kmp_sched.cpp
+++ b/openmp/runtime/src/kmp_sched.cpp
@@ -84,6 +84,8 @@ __kmp_for_static_init(
typename traits_t< T >::signed_t chunk
) {
KMP_COUNT_BLOCK(OMP_FOR_static);
+ KMP_TIME_BLOCK (FOR_static_scheduling);
+
typedef typename traits_t< T >::unsigned_t UT;
typedef typename traits_t< T >::signed_t ST;
/* this all has to be changed back to TID and such.. */
@@ -151,6 +153,7 @@ __kmp_for_static_init(
team_info->microtask);
}
#endif
+ KMP_COUNT_VALUE (FOR_static_iterations, 0);
return;
}
@@ -246,6 +249,7 @@ __kmp_for_static_init(
__kmp_error_construct( kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, loc );
}
}
+ KMP_COUNT_VALUE (FOR_static_iterations, trip_count);
/* compute remaining parameters */
switch ( schedtype ) {
@@ -372,7 +376,7 @@ __kmp_dist_for_static_init(
typename traits_t< T >::signed_t incr,
typename traits_t< T >::signed_t chunk
) {
- KMP_COUNT_BLOCK(OMP_DISTR_FOR_static);
+ KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
typedef typename traits_t< T >::unsigned_t UT;
typedef typename traits_t< T >::signed_t ST;
register kmp_uint32 tid;
@@ -437,6 +441,7 @@ __kmp_dist_for_static_init(
} else {
trip_count = (ST)(*pupper - *plower) / incr + 1; // cast to signed to cover incr<0 case
}
+
*pstride = *pupper - *plower; // just in case (can be unused)
if( trip_count <= nteams ) {
KMP_DEBUG_ASSERT(
diff --git a/openmp/runtime/src/kmp_stats.cpp b/openmp/runtime/src/kmp_stats.cpp
index 9750f7b3636..0167d3666a0 100644
--- a/openmp/runtime/src/kmp_stats.cpp
+++ b/openmp/runtime/src/kmp_stats.cpp
@@ -521,16 +521,14 @@ void kmp_stats_output_module::outputStats(const char* heading)
// Special handling for synthesized statistics.
// These just have to be coded specially here for now.
- // At present we only have one: the total parallel work done in each thread.
+ // At present we only have a few:
+ // The total parallel work done in each thread.
// The variance here makes it easy to see load imbalance over the whole program (though, of course,
// it's possible to have a code with awful load balance in every parallel region but perfect load
// balance oever the whole program.)
+ // The time spent in barriers in each thread.
allStats[TIMER_Total_work].addSample ((*it)->getTimer(TIMER_OMP_work)->getTotal());
- // Time waiting for work (synthesized)
- if ((t != 0) || !timeStat::workerOnly(timer_e(TIMER_OMP_await_work)))
- allStats[TIMER_Total_await_work].addSample ((*it)->getTimer(TIMER_OMP_await_work)->getTotal());
-
// Time in explicit barriers.
allStats[TIMER_Total_barrier].addSample ((*it)->getTimer(TIMER_OMP_barrier)->getTotal());
diff --git a/openmp/runtime/src/kmp_stats.h b/openmp/runtime/src/kmp_stats.h
index 9189b8015fd..9922bfc7961 100644
--- a/openmp/runtime/src/kmp_stats.h
+++ b/openmp/runtime/src/kmp_stats.h
@@ -31,6 +31,11 @@
#include <new> // placement new
#include "kmp_stats_timing.h"
+/*
+ * Enable developer statistics here if you want them. They are more detailed than is useful for application characterisation and
+ * are intended for the runtime library developer.
+ */
+// #define KMP_DEVELOPER_STATS 1
/*!
* @ingroup STATS_GATHERING
@@ -56,7 +61,7 @@ class stats_flags_e {
* Each thread accumulates its own count, at the end of execution the counts are aggregated treating each thread
* as a separate measurement. (Unless onlyInMaster is set, in which case there's only a single measurement).
* The min,mean,max are therefore the values for the threads.
- * Adding the counter here and then putting in a KMP_BLOCK_COUNTER(name) is all you need to do.
+ * Adding the counter here and then putting a KMP_BLOCK_COUNTER(name) at the point you want to count is all you need to do.
* All of the tables and printing is generated from this macro.
* Format is "macro(name, flags, arg)"
*
@@ -64,21 +69,30 @@ class stats_flags_e {
*/
#define KMP_FOREACH_COUNTER(macro, arg) \
macro (OMP_PARALLEL, stats_flags_e::onlyInMaster, arg) \
+ macro (OMP_NESTED_PARALLEL, 0, arg) \
macro (OMP_FOR_static, 0, arg) \
macro (OMP_FOR_dynamic, 0, arg) \
- macro (OMP_DISTR_FOR_static, 0, arg) \
- macro (OMP_DISTR_FOR_dynamic, 0, arg) \
+ macro (OMP_DISTRIBUTE, 0, arg) \
macro (OMP_BARRIER, 0, arg) \
macro (OMP_CRITICAL,0, arg) \
macro (OMP_SINGLE, 0, arg) \
macro (OMP_MASTER, 0, arg) \
+ macro (OMP_TEAMS, 0, arg) \
macro (OMP_set_lock, 0, arg) \
macro (OMP_test_lock, 0, arg) \
- macro (OMP_test_lock_failure, 0, arg) \
macro (REDUCE_wait, 0, arg) \
macro (REDUCE_nowait, 0, arg) \
+ macro (OMP_TASKYIELD, 0, arg) \
+ macro (TASK_executed, 0, arg) \
+ macro (TASK_cancelled, 0, arg) \
+ macro (TASK_stolen, 0, arg) \
macro (LAST,0,arg)
+// OMP_PARALLEL_args -- the number of arguments passed to a fork
+// FOR_static_iterations -- Number of available parallel chunks of work in a static for
+// FOR_dynamic_iterations -- Number of available parallel chunks of work in a dynamic for
+// Both adjust for any chunking, so if there were an iteration count of 20 but a chunk size of 10, we'd record 2.
+
/*!
* \brief Add new timers under KMP_FOREACH_TIMER() macro in kmp_stats.h
*
@@ -87,72 +101,45 @@ class stats_flags_e {
*
* \details A timer collects multiple samples of some count in each thread and then finally aggregates over all the threads.
* The count is normally a time (in ticks), hence the name "timer". (But can be any value, so we use this for "number of arguments passed to fork"
- * as well, or we could collect "loop iteration count" if we wanted to).
+ * as well).
* For timers the threads are not significant, it's the individual observations that count, so the statistics are at that level.
* Format is "macro(name, flags, arg)"
*
- * @ingroup STATS_GATHERING
+ * @ingroup STATS_GATHERING2
*/
-#define KMP_FOREACH_TIMER(macro, arg) \
- macro (OMP_PARALLEL_args, stats_flags_e::onlyInMaster | stats_flags_e::noUnits, arg) \
- macro (FOR_static_iterations, stats_flags_e::onlyInMaster | stats_flags_e::noUnits, arg) \
- macro (FOR_dynamic_iterations, stats_flags_e::noUnits, arg) \
+#define KMP_FOREACH_TIMER(macro, arg) \
macro (OMP_start_end, stats_flags_e::onlyInMaster, arg) \
macro (OMP_serial, stats_flags_e::onlyInMaster, arg) \
macro (OMP_work, 0, arg) \
macro (Total_work, stats_flags_e::synthesized, arg) \
- macro (OMP_await_work, stats_flags_e::notInMaster, arg) \
- macro (Total_await_work, stats_flags_e::synthesized, arg) \
macro (OMP_barrier, 0, arg) \
macro (Total_barrier, stats_flags_e::synthesized, arg) \
- macro (OMP_test_lock, 0, arg) \
+ macro (FOR_static_iterations, stats_flags_e::noUnits, arg) \
macro (FOR_static_scheduling, 0, arg) \
+ macro (FOR_dynamic_iterations, stats_flags_e::noUnits, arg) \
macro (FOR_dynamic_scheduling, 0, arg) \
- macro (KMP_fork_call, 0, arg) \
- macro (KMP_join_call, 0, arg) \
- macro (KMP_fork_barrier, stats_flags_e::logEvent, arg) \
- macro (KMP_join_barrier, stats_flags_e::logEvent, arg) \
- macro (KMP_barrier, 0, arg) \
- macro (KMP_end_split_barrier, 0, arg) \
- macro (KMP_wait_sleep, 0, arg) \
- macro (KMP_release, 0, arg) \
- macro (KMP_hier_gather, 0, arg) \
- macro (KMP_hier_release, 0, arg) \
- macro (KMP_hyper_gather, stats_flags_e::logEvent, arg) \
- macro (KMP_hyper_release, stats_flags_e::logEvent, arg) \
- macro (KMP_linear_gather, 0, arg) \
- macro (KMP_linear_release, 0, arg) \
- macro (KMP_tree_gather, 0, arg) \
- macro (KMP_tree_release, 0, arg) \
- macro (USER_master_invoke, stats_flags_e::logEvent, arg) \
- macro (USER_worker_invoke, stats_flags_e::logEvent, arg) \
- macro (USER_resume, stats_flags_e::logEvent, arg) \
- macro (USER_suspend, stats_flags_e::logEvent, arg) \
- macro (USER_launch_thread_loop, stats_flags_e::logEvent, arg) \
- macro (KMP_allocate_team, 0, arg) \
- macro (KMP_setup_icv_copy, 0, arg) \
- macro (USER_icv_copy, 0, arg) \
+ macro (TASK_execution, 0, arg) \
+ macro (OMP_set_numthreads, stats_flags_e::noUnits, arg) \
+ macro (OMP_PARALLEL_args, stats_flags_e::noUnits, arg) \
+ macro (OMP_single, 0, arg) \
+ macro (OMP_master, 0, arg) \
+ KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \
macro (LAST,0, arg)
-
-// OMP_PARALLEL_args -- the number of arguments passed to a fork
-// FOR_static_iterations -- Number of available parallel chunks of work in a static for
-// FOR_dynamic_iterations -- Number of available parallel chunks of work in a dynamic for
-// Both adjust for any chunking, so if there were an iteration count of 20 but a chunk size of 10, we'd record 2.
-// OMP_serial -- thread zero time executing serial code
// OMP_start_end -- time from when OpenMP is initialized until the stats are printed at exit
+// OMP_serial -- thread zero time executing serial code
// OMP_work -- elapsed time in code dispatched by a fork (measured in the thread)
// Total_work -- a synthesized statistic summarizing how much parallel work each thread executed.
// OMP_barrier -- time at "real" barriers
// Total_barrier -- a synthesized statistic summarizing how much time at real barriers in each thread
-// OMP_set_lock -- time in lock setting
-// OMP_test_lock -- time in testing a lock
-// LOCK_WAIT -- time waiting for a lock
// FOR_static_scheduling -- time spent doing scheduling for a static "for"
// FOR_dynamic_scheduling -- time spent doing scheduling for a dynamic "for"
-// KMP_wait_sleep -- time in __kmp_wait_sleep
-// KMP_release -- time in __kmp_release
+
+#if (KMP_DEVELOPER_STATS)
+// Timers which are of interest tio runtime library developers, not end users.
+// THese have to be explicitly enabled in addition to the other stats.
+
// KMP_fork_barrier -- time in __kmp_fork_barrier
// KMP_join_barrier -- time in __kmp_join_barrier
// KMP_barrier -- time in __kmp_barrier
@@ -165,6 +152,32 @@ class stats_flags_e {
// KMP_tree_release -- time in __kmp_tree_barrier_release
// KMP_hyper_gather -- time in __kmp_hyper_barrier_gather
// KMP_hyper_release -- time in __kmp_hyper_barrier_release
+# define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \
+ macro (KMP_fork_call, 0, arg) \
+ macro (KMP_join_call, 0, arg) \
+ macro (KMP_fork_barrier, stats_flags_e::logEvent, arg) \
+ macro (KMP_join_barrier, stats_flags_e::logEvent, arg) \
+ macro (KMP_barrier, 0, arg) \
+ macro (KMP_end_split_barrier, 0, arg) \
+ macro (KMP_hier_gather, 0, arg) \
+ macro (KMP_hier_release, 0, arg) \
+ macro (KMP_hyper_gather, stats_flags_e::logEvent, arg) \
+ macro (KMP_hyper_release, stats_flags_e::logEvent, arg) \
+ macro (KMP_linear_gather, 0, arg) \
+ macro (KMP_linear_release, 0, arg) \
+ macro (KMP_tree_gather, 0, arg) \
+ macro (KMP_tree_release, 0, arg) \
+ macro (USER_master_invoke, stats_flags_e::logEvent, arg) \
+ macro (USER_worker_invoke, stats_flags_e::logEvent, arg) \
+ macro (USER_resume, stats_flags_e::logEvent, arg) \
+ macro (USER_suspend, stats_flags_e::logEvent, arg) \
+ macro (USER_launch_thread_loop, stats_flags_e::logEvent, arg) \
+ macro (KMP_allocate_team, 0, arg) \
+ macro (KMP_setup_icv_copy, 0, arg) \
+ macro (USER_icv_copy, 0, arg)
+#else
+# define KMP_FOREACH_DEVELOPER_TIMER(macro, arg)
+#endif
/*!
* \brief Add new explicit timers under KMP_FOREACH_EXPLICIT_TIMER() macro.
@@ -182,13 +195,21 @@ class stats_flags_e {
*
* @ingroup STATS_GATHERING
*/
-#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) \
- macro(OMP_serial, 0, arg) \
- macro(OMP_start_end, 0, arg) \
- macro(USER_icv_copy, 0, arg) \
- macro(USER_launch_thread_loop, stats_flags_e::logEvent, arg) \
+#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) \
+ macro(OMP_serial, 0, arg) \
+ macro(OMP_start_end, 0, arg) \
+ macro(OMP_single, 0, arg) \
+ macro(OMP_master, 0, arg) \
+ KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro,arg) \
macro(LAST, 0, arg)
+#if (KMP_DEVELOPER_STATS)
+# define KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro, arg) \
+ macro(USER_launch_thread_loop, stats_flags_e::logEvent, arg)
+#else
+# define KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro, arg)
+#endif
+
#define ENUMERATE(name,ignore,prefix) prefix##name,
enum timer_e {
KMP_FOREACH_TIMER(ENUMERATE, TIMER_)
@@ -689,6 +710,21 @@ extern kmp_stats_output_module __kmp_stats_output;
*/
#define KMP_RESET_STATS() __kmp_reset_stats()
+#if (KMP_DEVELOPER_STATS)
+# define KMP_TIME_DEVELOPER_BLOCK(n) KMP_TIME_BLOCK(n)
+# define KMP_COUNT_DEVELOPER_VALUE(n,v) KMP_COUNT_VALUE(n,v)
+# define KMP_COUNT_DEVELOPER_BLOCK(n) KMP_COUNT_BLOCK(n)
+# define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) KMP_START_EXPLICIT_TIMER(n)
+# define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) KMP_STOP_EXPLICIT_TIMER(n)
+#else
+// Null definitions
+# define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0)
+# define KMP_COUNT_DEVELOPER_VALUE(n,v) ((void)0)
+# define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0)
+# define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
+# define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
+#endif
+
#else // KMP_STATS_ENABLED
// Null definitions
@@ -701,6 +737,11 @@ extern kmp_stats_output_module __kmp_stats_output;
#define KMP_OUTPUT_STATS(heading_string) ((void)0)
#define KMP_RESET_STATS() ((void)0)
+#define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0)
+#define KMP_COUNT_DEVELOPER_VALUE(n,v) ((void)0)
+#define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0)
+#define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
+#define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
#endif // KMP_STATS_ENABLED
#endif // KMP_STATS_H
diff --git a/openmp/runtime/src/kmp_tasking.c b/openmp/runtime/src/kmp_tasking.c
index d1a94f690aa..683cbb23df7 100644
--- a/openmp/runtime/src/kmp_tasking.c
+++ b/openmp/runtime/src/kmp_tasking.c
@@ -17,6 +17,7 @@
#include "kmp_i18n.h"
#include "kmp_itt.h"
#include "kmp_wait_release.h"
+#include "kmp_stats.h"
#if OMPT_SUPPORT
#include "ompt-specific.h"
@@ -1136,6 +1137,7 @@ __kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_ta
kmp_team_t * this_team = this_thr->th.th_team;
kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
+ KMP_COUNT_BLOCK(TASK_cancelled);
// this task belongs to a task group and we need to cancel it
discard = 1 /* true */;
}
@@ -1146,6 +1148,8 @@ __kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_ta
// Thunks generated by gcc take a different argument list.
//
if (!discard) {
+ KMP_COUNT_BLOCK(TASK_executed);
+ KMP_TIME_BLOCK (TASK_execution);
#endif // OMP_40_ENABLED
#ifdef KMP_GOMP_COMPAT
if (taskdata->td_flags.native) {
@@ -1356,6 +1360,8 @@ __kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part )
kmp_info_t * thread;
int thread_finished = FALSE;
+ KMP_COUNT_BLOCK(OMP_TASKYIELD);
+
KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
gtid, loc_ref, end_part) );
@@ -1648,6 +1654,7 @@ __kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team
__kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
+ KMP_COUNT_BLOCK(TASK_stolen);
KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
"ntasks=%d head=%u tail=%u\n",
gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
diff --git a/openmp/runtime/src/z_Linux_util.c b/openmp/runtime/src/z_Linux_util.c
index 41e6273796b..c476ebff785 100644
--- a/openmp/runtime/src/z_Linux_util.c
+++ b/openmp/runtime/src/z_Linux_util.c
@@ -1688,7 +1688,7 @@ __kmp_suspend_uninitialize_thread( kmp_info_t *th )
template <class C>
static inline void __kmp_suspend_template( int th_gtid, C *flag )
{
- KMP_TIME_BLOCK(USER_suspend);
+ KMP_TIME_DEVELOPER_BLOCK(USER_suspend);
kmp_info_t *th = __kmp_threads[th_gtid];
int status;
typename C::flag_t old_spin;
@@ -1826,6 +1826,7 @@ void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {
template <class C>
static inline void __kmp_resume_template( int target_gtid, C *flag )
{
+ KMP_TIME_DEVELOPER_BLOCK(USER_resume);
kmp_info_t *th = __kmp_threads[target_gtid];
int status;
@@ -1900,7 +1901,6 @@ void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) {
void
__kmp_resume_monitor()
{
- KMP_TIME_BLOCK(USER_resume);
int status;
#ifdef KMP_DEBUG
int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
OpenPOWER on IntegriCloud