diff options
Diffstat (limited to 'openmp/runtime/src')
-rw-r--r-- | openmp/runtime/src/kmp_barrier.cpp | 42 | ||||
-rw-r--r-- | openmp/runtime/src/kmp_csupport.c | 37 | ||||
-rw-r--r-- | openmp/runtime/src/kmp_dispatch.cpp | 23 | ||||
-rw-r--r-- | openmp/runtime/src/kmp_runtime.c | 122 | ||||
-rw-r--r-- | openmp/runtime/src/kmp_sched.cpp | 14 |
5 files changed, 125 insertions, 113 deletions
diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp index 429390b48cd..d72939870fa 100644 --- a/openmp/runtime/src/kmp_barrier.cpp +++ b/openmp/runtime/src/kmp_barrier.cpp @@ -57,7 +57,7 @@ __kmp_linear_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid #if USE_ITT_BUILD && USE_ITT_NOTIFY // Barrier imbalance - save arrive time to the thread - if(__kmp_forkjoin_frames_mode == 2 || __kmp_forkjoin_frames_mode == 3) { + if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp(); } #endif @@ -97,7 +97,7 @@ __kmp_linear_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid USE_ITT_BUILD_ARG(itt_sync_obj) ); #if USE_ITT_BUILD && USE_ITT_NOTIFY // Barrier imbalance - write min of the thread time and the other thread time to the thread. - if (__kmp_forkjoin_frames_mode == 2 || __kmp_forkjoin_frames_mode == 3) { + if (__kmp_forkjoin_frames_mode == 2) { this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time, other_threads[i]->th.th_bar_min_time); } @@ -234,7 +234,7 @@ __kmp_tree_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, #if USE_ITT_BUILD && USE_ITT_NOTIFY // Barrier imbalance - save arrive time to the thread - if(__kmp_forkjoin_frames_mode == 2 || __kmp_forkjoin_frames_mode == 3) { + if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp(); } #endif @@ -262,7 +262,7 @@ __kmp_tree_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, USE_ITT_BUILD_ARG(itt_sync_obj) ); #if USE_ITT_BUILD && USE_ITT_NOTIFY // Barrier imbalance - write min of the thread time and a child time to the thread. - if (__kmp_forkjoin_frames_mode == 2 || __kmp_forkjoin_frames_mode == 3) { + if (__kmp_forkjoin_frames_mode == 2) { this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time, child_thr->th.th_bar_min_time); } @@ -432,7 +432,7 @@ __kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, #if USE_ITT_BUILD && USE_ITT_NOTIFY // Barrier imbalance - save arrive time to the thread - if(__kmp_forkjoin_frames_mode == 2 || __kmp_forkjoin_frames_mode == 3) { + if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp(); } #endif @@ -485,7 +485,7 @@ __kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, USE_ITT_BUILD_ARG(itt_sync_obj) ); #if USE_ITT_BUILD && USE_ITT_NOTIFY // Barrier imbalance - write min of the thread time and a child time to the thread. - if (__kmp_forkjoin_frames_mode == 2 || __kmp_forkjoin_frames_mode == 3) { + if (__kmp_forkjoin_frames_mode == 2) { this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time, child_thr->th.th_bar_min_time); } @@ -1147,24 +1147,29 @@ __kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size, __kmp_itt_barrier_middle(gtid, itt_sync_obj); #endif /* USE_ITT_BUILD */ #if USE_ITT_BUILD && USE_ITT_NOTIFY - // Barrier - report frame end - if (__itt_frame_submit_v3_ptr && __kmp_forkjoin_frames_mode) { + // Barrier - report frame end (only if active_level == 1) + if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && __kmp_forkjoin_frames_mode && +#if OMP_40_ENABLED + this_thr->th.th_teams_microtask == NULL && +#endif + team->t.t_active_level == 1) + { kmp_uint64 cur_time = __itt_get_timestamp(); - kmp_info_t **other_threads = this_thr->th.th_team->t.t_threads; + kmp_info_t **other_threads = team->t.t_threads; int nproc = this_thr->th.th_team_nproc; int i; - // Initialize with master's wait time - kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time; switch(__kmp_forkjoin_frames_mode) { case 1: __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc); this_thr->th.th_frame_time = cur_time; break; - case 2: + case 2: // AC 2015-01-19: currently does not work for hierarchical (to be fixed) __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1, loc, nproc); break; case 3: if( __itt_metadata_add_ptr ) { + // Initialize with master's wait time + kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time; for (i=1; i<nproc; ++i) { delta += ( cur_time - other_threads[i]->th.th_bar_arrive_time ); } @@ -1413,14 +1418,17 @@ __kmp_join_barrier(int gtid) # if USE_ITT_BUILD && USE_ITT_NOTIFY // Join barrier - report frame end - if (__itt_frame_submit_v3_ptr && __kmp_forkjoin_frames_mode) { + if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && __kmp_forkjoin_frames_mode && +#if OMP_40_ENABLED + this_thr->th.th_teams_microtask == NULL && +#endif + team->t.t_active_level == 1) + { kmp_uint64 cur_time = __itt_get_timestamp(); ident_t * loc = team->t.t_ident; - kmp_info_t **other_threads = this_thr->th.th_team->t.t_threads; + kmp_info_t **other_threads = team->t.t_threads; int nproc = this_thr->th.th_team_nproc; int i; - // Initialize with master's wait time - kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time; switch(__kmp_forkjoin_frames_mode) { case 1: __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc); @@ -1430,6 +1438,8 @@ __kmp_join_barrier(int gtid) break; case 3: if( __itt_metadata_add_ptr ) { + // Initialize with master's wait time + kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time; for (i=1; i<nproc; ++i) { delta += ( cur_time - other_threads[i]->th.th_bar_arrive_time ); } diff --git a/openmp/runtime/src/kmp_csupport.c b/openmp/runtime/src/kmp_csupport.c index f5451e90b42..6f782edcf82 100644 --- a/openmp/runtime/src/kmp_csupport.c +++ b/openmp/runtime/src/kmp_csupport.c @@ -535,27 +535,30 @@ __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) #if USE_ITT_BUILD kmp_uint64 cur_time = 0; #if USE_ITT_NOTIFY - if( __itt_get_timestamp_ptr ) { + if ( __itt_get_timestamp_ptr ) { cur_time = __itt_get_timestamp(); } #endif /* USE_ITT_NOTIFY */ - // Report the barrier - if( ( __kmp_forkjoin_frames_mode == 1 || __kmp_forkjoin_frames_mode == 3 ) && __itt_frame_submit_v3_ptr ) { - if( this_thr->th.th_team->t.t_level == 0 ) { - __kmp_itt_frame_submit( global_tid, this_thr->th.th_frame_time_serialized, cur_time, 0, loc, this_thr->th.th_team_nproc, 0 ); - } - } - // Mark the end of the "parallel" region for VTune. Only use one of frame notification scheme at the moment. - if ( ( __itt_frame_end_v3_ptr && __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode ) || KMP_ITT_DEBUG ) - { - this_thr->th.th_ident = loc; - __kmp_itt_region_joined( global_tid, 1 ); - } - if ( ( __itt_frame_submit_v3_ptr && __kmp_forkjoin_frames_mode == 3 ) || KMP_ITT_DEBUG ) - { + if ( this_thr->th.th_team->t.t_level == 0 +#if OMP_40_ENABLED + && this_thr->th.th_teams_microtask == NULL +#endif + ) { + // Report the barrier this_thr->th.th_ident = loc; - // Since barrier frame for serialized region is equal to the region we use the same begin timestamp as for the barrier. - __kmp_itt_frame_submit( global_tid, serial_team->t.t_region_time, cur_time, 0, loc, this_thr->th.th_team_nproc, 2 ); + if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && + ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) ) + { + __kmp_itt_frame_submit( global_tid, this_thr->th.th_frame_time_serialized, + cur_time, 0, loc, this_thr->th.th_team_nproc, 0 ); + if ( __kmp_forkjoin_frames_mode == 3 ) + // Since barrier frame for serialized region is equal to the region we use the same begin timestamp as for the barrier. + __kmp_itt_frame_submit( global_tid, serial_team->t.t_region_time, + cur_time, 0, loc, this_thr->th.th_team_nproc, 2 ); + } else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) && + ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames ) + // Mark the end of the "parallel" region for VTune. Only use one of frame notification scheme at the moment. + __kmp_itt_region_joined( global_tid, 1 ); } #endif /* USE_ITT_BUILD */ diff --git a/openmp/runtime/src/kmp_dispatch.cpp b/openmp/runtime/src/kmp_dispatch.cpp index 2cf3d7f42cd..6e93049b9d1 100644 --- a/openmp/runtime/src/kmp_dispatch.cpp +++ b/openmp/runtime/src/kmp_dispatch.cpp @@ -633,6 +633,12 @@ __kmp_dispatch_init( #if USE_ITT_BUILD kmp_uint64 cur_chunk = chunk; + int itt_need_metadata_reporting = __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && + KMP_MASTER_GTID(gtid) && +#if OMP_40_ENABLED + th->th.th_teams_microtask == NULL && +#endif + team->t.t_active_level == 1; #endif if ( ! active ) { pr = reinterpret_cast< dispatch_private_info_template< T >* > @@ -869,9 +875,8 @@ __kmp_dispatch_init( } #if USE_ITT_BUILD // Calculate chunk for metadata report - if( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 ) { + if ( itt_need_metadata_reporting ) cur_chunk = limit - init + 1; - } #endif if ( st == 1 ) { pr->u.p.lb = lb + init; @@ -1124,16 +1129,10 @@ __kmp_dispatch_init( if ( pr->ordered ) { __kmp_itt_ordered_init( gtid ); }; // if -#endif /* USE_ITT_BUILD */ - }; // if - -#if USE_ITT_BUILD - // Report loop metadata - if( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 ) { - kmp_uint32 tid = __kmp_tid_from_gtid( gtid ); - if (KMP_MASTER_TID(tid)) { + // Report loop metadata + if ( itt_need_metadata_reporting ) { + // Only report metadata by master of active team at level 1 kmp_uint64 schedtype = 0; - switch ( schedule ) { case kmp_sch_static_chunked: case kmp_sch_static_balanced:// Chunk is calculated in the switch above @@ -1156,8 +1155,8 @@ __kmp_dispatch_init( } __kmp_itt_metadata_loop(loc, schedtype, tc, cur_chunk); } - } #endif /* USE_ITT_BUILD */ + }; // if #ifdef KMP_DEBUG { diff --git a/openmp/runtime/src/kmp_runtime.c b/openmp/runtime/src/kmp_runtime.c index e530011a924..2c1234ff3a6 100644 --- a/openmp/runtime/src/kmp_runtime.c +++ b/openmp/runtime/src/kmp_runtime.c @@ -814,6 +814,16 @@ __kmp_enter_single( int gtid, ident_t *id_ref, int push_ws ) /* TODO: Should this be acquire or release? */ status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this, th->th.th_local.this_construct); +#if USE_ITT_BUILD + if ( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && KMP_MASTER_GTID(gtid) && +#if OMP_40_ENABLED + th->th.th_teams_microtask == NULL && +#endif + team->t.t_active_level == 1 ) + { // Only report metadata by master of active team at level 1 + __kmp_itt_metadata_single( id_ref ); + } +#endif /* USE_ITT_BUILD */ } if( __kmp_env_consistency_check ) { @@ -827,10 +837,6 @@ __kmp_enter_single( int gtid, ident_t *id_ref, int push_ws ) if ( status ) { __kmp_itt_single_start( gtid ); } - if( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && KMP_MASTER_GTID(gtid)) { - __kmp_itt_metadata_single( id_ref ); - } - #endif /* USE_ITT_BUILD */ return status; } @@ -1420,22 +1426,26 @@ __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) #if USE_ITT_BUILD // Mark the start of the "parallel" region for VTune. Only use one of frame notification scheme at the moment - if ( ( __itt_frame_begin_v3_ptr && __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode ) || KMP_ITT_DEBUG ) - { - this_thr->th.th_ident = loc; - // 0 - no barriers; 1 - serialized parallel - __kmp_itt_region_forking( global_tid, this_thr->th.th_team_nproc, 0, 1 ); - } - // Save the start of the "parallel" region for VTune. This is the join barrier begin at the same time. - if( ( ( __kmp_forkjoin_frames_mode == 1 || __kmp_forkjoin_frames_mode == 3 ) && - __itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr ) || KMP_ITT_DEBUG ) - { - this_thr->th.th_ident = loc; + if ( serial_team->t.t_level == 1 +#if OMP_40_ENABLED + && this_thr->th.th_teams_microtask == NULL +#endif + ) { #if USE_ITT_NOTIFY - if( this_thr->th.th_team->t.t_level == 1 ) { - serial_team->t.t_region_time = this_thr->th.th_frame_time_serialized = __itt_get_timestamp(); - } + // Save the start of the "parallel" region for VTune. This is the frame begin at the same time. + if ( ( __itt_get_timestamp_ptr || KMP_ITT_DEBUG ) && + ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) ) + { + serial_team->t.t_region_time = this_thr->th.th_frame_time_serialized = __itt_get_timestamp(); + } else // only one notification scheme (either "submit" or "forking/joined", not both) #endif + if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) && + __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode ) + { + this_thr->th.th_ident = loc; + // 0 - no barriers; 1 - serialized parallel + __kmp_itt_region_forking( global_tid, this_thr->th.th_team_nproc, 0, 1 ); + } } #endif /* USE_ITT_BUILD */ } @@ -2137,36 +2147,30 @@ __kmp_fork_call( #if USE_ITT_BUILD - // Mark start of "parallel" region for VTune. Only use one of frame notification scheme at the moment. - if ((__itt_frame_begin_v3_ptr && __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) || KMP_ITT_DEBUG) { + if ( team->t.t_active_level == 1 // only report frames at level 1 # if OMP_40_ENABLED - if (!master_th->th.th_teams_microtask || microtask == (microtask_t)__kmp_teams_master) - // Either not in teams or the outer fork of the teams construct + && !master_th->th.th_teams_microtask // not in teams construct # endif /* OMP_40_ENABLED */ - { - __kmp_itt_region_forking(gtid, team->t.t_nproc, 0); - } - } + ) { #if USE_ITT_NOTIFY - kmp_uint64 tmp_time = 0; - if (((__kmp_forkjoin_frames_mode == 1 || __kmp_forkjoin_frames_mode == 3) && __itt_frame_submit_v3_ptr) || KMP_ITT_DEBUG) { - if (!(team->t.t_active_level > 1)) { -# if OMP_40_ENABLED - if (!master_th->th.th_teams_microtask || microtask == (microtask_t)__kmp_teams_master) { - // Either not in teams or the outer fork of the teams construct -# endif /* OMP_40_ENABLED */ - if ( __itt_get_timestamp_ptr ) - tmp_time = __itt_get_timestamp(); - // Internal fork - report frame begin + if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && + ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) ) + { + kmp_uint64 tmp_time = 0; + if ( __itt_get_timestamp_ptr ) + tmp_time = __itt_get_timestamp(); + // Internal fork - report frame begin master_th->th.th_frame_time = tmp_time; - if ( __kmp_forkjoin_frames_mode==3 ) - team->t.t_region_time = tmp_time; -# if OMP_40_ENABLED - } -# endif /* OMP_40_ENABLED */ + if ( __kmp_forkjoin_frames_mode == 3 ) + team->t.t_region_time = tmp_time; + } else // only one notification scheme (either "submit" or "forking/joined", not both) +#endif /* USE_ITT_NOTIFY */ + if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) && + __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode ) + { // Mark start of "parallel" region for VTune. + __kmp_itt_region_forking(gtid, team->t.t_nproc, 0); } } -#endif /* USE_ITT_NOTIFY */ #endif /* USE_ITT_BUILD */ /* now go on and do the work */ @@ -2342,31 +2346,21 @@ __kmp_join_call(ident_t *loc, int gtid __kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id ); // destroy the stack stitching id after join barrier } - // Mark end of "parallel" region for VTune. Only use one of frame notification scheme at the moment. - if ( ( __itt_frame_end_v3_ptr && __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode ) || KMP_ITT_DEBUG ) { + // Mark end of "parallel" region for VTune. + if ( team->t.t_active_level == 1 # if OMP_40_ENABLED - if ( !master_th->th.th_teams_microtask /* not in teams */ || - ( !exit_teams && team->t.t_level == master_th->th.th_teams_level ) ) - // Either not in teams or exiting teams region - // (teams is a frame and no other frames inside the teams) + && !master_th->th.th_teams_microtask /* not in teams construct */ # endif /* OMP_40_ENABLED */ - { - master_th->th.th_ident = loc; - __kmp_itt_region_joined( gtid ); - } - } - if ( ( __itt_frame_submit_v3_ptr && __kmp_forkjoin_frames_mode == 3 ) || KMP_ITT_DEBUG ) { -# if OMP_40_ENABLED - if ( !master_th->th.th_teams_microtask /* not in teams */ || - ( !exit_teams && team->t.t_level == master_th->th.th_teams_level ) ) - // Either not in teams or exiting teams region - // (teams is a frame and no other frames inside the teams) -# endif /* OMP_40_ENABLED */ - { + ) { master_th->th.th_ident = loc; - __kmp_itt_frame_submit( gtid, team->t.t_region_time, master_th->th.th_frame_time, 0, loc, master_th->th.th_team_nproc, 1 ); - } - } + // only one notification scheme (either "submit" or "forking/joined", not both) + if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && __kmp_forkjoin_frames_mode == 3 ) + __kmp_itt_frame_submit( gtid, team->t.t_region_time, master_th->th.th_frame_time, + 0, loc, master_th->th.th_team_nproc, 1 ); + else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) && + ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames ) + __kmp_itt_region_joined( gtid ); + } // active_level == 1 #endif /* USE_ITT_BUILD */ #if OMP_40_ENABLED diff --git a/openmp/runtime/src/kmp_sched.cpp b/openmp/runtime/src/kmp_sched.cpp index 375cacbc835..46b5987cf56 100644 --- a/openmp/runtime/src/kmp_sched.cpp +++ b/openmp/runtime/src/kmp_sched.cpp @@ -92,6 +92,7 @@ __kmp_for_static_init( register kmp_uint32 nth; register UT trip_count; register kmp_team_t *team; + register kmp_info_t *th = __kmp_threads[ gtid ]; #if OMPT_SUPPORT && OMPT_TRACE ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); @@ -157,13 +158,13 @@ __kmp_for_static_init( if ( schedtype > kmp_ord_upper ) { // we are in DISTRIBUTE construct schedtype += kmp_sch_static - kmp_distribute_static; // AC: convert to usual schedule type - tid = __kmp_threads[ gtid ]->th.th_team->t.t_master_tid; - team = __kmp_threads[ gtid ]->th.th_team->t.t_parent; + tid = th->th.th_team->t.t_master_tid; + team = th->th.th_team->t.t_parent; } else #endif { tid = __kmp_tid_from_gtid( global_tid ); - team = __kmp_threads[ gtid ]->th.th_team; + team = th->th.th_team; } /* determine if "for" loop is an active worksharing construct */ @@ -318,7 +319,12 @@ __kmp_for_static_init( #if USE_ITT_BUILD // Report loop metadata - if ( KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 ) { + if ( KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && +#if OMP_40_ENABLED + th->th.th_teams_microtask == NULL && +#endif + team->t.t_active_level == 1 ) + { kmp_uint64 cur_chunk = chunk; // Calculate chunk in case it was not specified; it is specified for kmp_sch_static_chunked if ( schedtype == kmp_sch_static ) { |