diff options
| author | Andrey Churbanov <Andrey.Churbanov@intel.com> | 2015-02-10 18:37:43 +0000 |
|---|---|---|
| committer | Andrey Churbanov <Andrey.Churbanov@intel.com> | 2015-02-10 18:37:43 +0000 |
| commit | 6d224dbda70cf6d88832f770efe12a558ba5bf10 (patch) | |
| tree | 24ff62f5d9d696f741a216e0b8f8d3817d1eb178 | |
| parent | d003ab32079bc2d5c8c67e5efd1f6532a386087b (diff) | |
| download | bcm5719-llvm-6d224dbda70cf6d88832f770efe12a558ba5bf10.tar.gz bcm5719-llvm-6d224dbda70cf6d88832f770efe12a558ba5bf10.zip | |
The usage of tt_state flag is replaced by an array of two task_team pointers.
llvm-svn: 228718
| -rw-r--r-- | openmp/runtime/src/kmp.h | 15 | ||||
| -rw-r--r-- | openmp/runtime/src/kmp_barrier.cpp | 10 | ||||
| -rw-r--r-- | openmp/runtime/src/kmp_csupport.c | 16 | ||||
| -rw-r--r-- | openmp/runtime/src/kmp_omp.h | 1 | ||||
| -rw-r--r-- | openmp/runtime/src/kmp_runtime.c | 271 | ||||
| -rw-r--r-- | openmp/runtime/src/kmp_tasking.c | 91 | ||||
| -rw-r--r-- | openmp/runtime/src/kmp_wait_release.h | 2 |
7 files changed, 222 insertions, 184 deletions
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index 97d9a6632db..f6735554fd8 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -1852,10 +1852,8 @@ extern kmp_int32 __kmp_task_stealing_constraint; // The tt_found_tasks flag is a signal to all threads in the team that tasks were spawned and // queued since the previous barrier release. -// State is used to alternate task teams for successive barriers -#define KMP_TASKING_ENABLED(task_team,state) \ - ((TCR_SYNC_4((task_team)->tt.tt_found_tasks) == TRUE) && \ - (TCR_4((task_team)->tt.tt_state) == (state))) +#define KMP_TASKING_ENABLED(task_team) \ + (TCR_SYNC_4((task_team)->tt.tt_found_tasks) == TRUE) /*! @ingroup BASIC_TYPES @{ @@ -2071,8 +2069,6 @@ typedef struct kmp_base_task_team { volatile kmp_uint32 tt_ref_ct; /* #threads accessing struct */ /* (not incl. master) */ - kmp_int32 tt_state; /* alternating 0/1 for task team identification */ - /* Note: VERY sensitive to padding! */ } kmp_base_task_team_t; union KMP_ALIGN_CACHE kmp_task_team { @@ -2195,6 +2191,9 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info { kmp_task_team_t * th_task_team; // Task team struct kmp_taskdata_t * th_current_task; // Innermost Task being executed kmp_uint8 th_task_state; // alternating 0/1 for task team identification + kmp_uint8 * th_task_state_memo_stack; // Stack holding memos of th_task_state at nested levels + kmp_uint32 th_task_state_top; // Top element of th_task_state_memo_stack + kmp_uint32 th_task_state_stack_sz; // Size of th_task_state_memo_stack /* * More stuff for keeping track of active/sleeping threads @@ -2294,7 +2293,7 @@ typedef struct KMP_ALIGN_CACHE kmp_base_team { kmp_team_p *t_parent; // parent team kmp_team_p *t_next_pool; // next free team in the team pool kmp_disp_t *t_dispatch; // thread's dispatch data - kmp_task_team_t *t_task_team; // Task team struct + kmp_task_team_t *t_task_team[2]; // Task team struct; switch between 2 #if OMP_40_ENABLED kmp_proc_bind_t t_proc_bind; // bind type for par region #endif // OMP_40_ENABLED @@ -3100,7 +3099,7 @@ int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_onco extern void __kmp_reap_task_teams( void ); extern void __kmp_unref_task_team( kmp_task_team_t *task_team, kmp_info_t *thread ); extern void __kmp_wait_to_unref_task_teams( void ); -extern void __kmp_task_team_setup ( kmp_info_t *this_thr, kmp_team_t *team ); +extern void __kmp_task_team_setup ( kmp_info_t *this_thr, kmp_team_t *team, int both ); extern void __kmp_task_team_sync ( kmp_info_t *this_thr, kmp_team_t *team ); extern void __kmp_task_team_wait ( kmp_info_t *this_thr, kmp_team_t *team #if USE_ITT_BUILD diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp index 5ece174128e..eb3c21a673d 100644 --- a/openmp/runtime/src/kmp_barrier.cpp +++ b/openmp/runtime/src/kmp_barrier.cpp @@ -1101,7 +1101,7 @@ __kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size, if (__kmp_tasking_mode != tskm_immediate_exec) { __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj) ); - __kmp_task_team_setup(this_thr, team); + __kmp_task_team_setup(this_thr, team, 0); // use 0 to only setup the current team } @@ -1189,7 +1189,7 @@ __kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size, status = 0; if (__kmp_tasking_mode != tskm_immediate_exec) { // The task team should be NULL for serialized code (tasks will be executed immediately) - KMP_DEBUG_ASSERT(team->t.t_task_team == NULL); + KMP_DEBUG_ASSERT(team->t.t_task_team[this_thr->th.th_task_state] == NULL); KMP_DEBUG_ASSERT(this_thr->th.th_task_team == NULL); } } @@ -1293,9 +1293,9 @@ __kmp_join_barrier(int gtid) # ifdef KMP_DEBUG if (__kmp_tasking_mode != tskm_immediate_exec) { KA_TRACE(20, ( "__kmp_join_barrier: T#%d, old team = %d, old task_team = %p, th_task_team = %p\n", - __kmp_gtid_from_thread(this_thr), team_id, team->t.t_task_team, + __kmp_gtid_from_thread(this_thr), team_id, team->t.t_task_team[this_thr->th.th_task_state], this_thr->th.th_task_team)); - KMP_DEBUG_ASSERT(this_thr->th.th_task_team == team->t.t_task_team); + KMP_DEBUG_ASSERT(this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]); } # endif /* KMP_DEBUG */ @@ -1448,7 +1448,7 @@ __kmp_fork_barrier(int gtid, int tid) #endif if (__kmp_tasking_mode != tskm_immediate_exec) { - __kmp_task_team_setup(this_thr, team); + __kmp_task_team_setup(this_thr, team, 1); // 1 indicates setup both task teams } /* The master thread may have changed its blocktime between the join barrier and the diff --git a/openmp/runtime/src/kmp_csupport.c b/openmp/runtime/src/kmp_csupport.c index 4909bd9ed00..b88c0690640 100644 --- a/openmp/runtime/src/kmp_csupport.c +++ b/openmp/runtime/src/kmp_csupport.c @@ -494,13 +494,8 @@ __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) this_thr -> th.th_current_task -> td_flags.executing = 1; if ( __kmp_tasking_mode != tskm_immediate_exec ) { - // - // Copy the task team from the new child / old parent team - // to the thread. If non-NULL, copy the state flag also. - // - if ( ( this_thr -> th.th_task_team = this_thr -> th.th_team -> t.t_task_team ) != NULL ) { - this_thr -> th.th_task_state = this_thr -> th.th_task_team -> tt.tt_state; - } + // Copy the task team from the new child / old parent team to the thread. + this_thr->th.th_task_team = this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d restoring task_team %p / team %p\n", global_tid, this_thr -> th.th_task_team, this_thr -> th.th_team ) ); } @@ -1821,11 +1816,10 @@ __kmpc_reduce_nowait( teams_swapped = 1; th->th.th_info.ds.ds_tid = team->t.t_master_tid; th->th.th_team = team->t.t_parent; - th->th.th_task_team = th->th.th_team->t.t_task_team; th->th.th_team_nproc = th->th.th_team->t.t_nproc; + th->th.th_task_team = th->th.th_team->t.t_task_team[0]; task_state = th->th.th_task_state; - if( th->th.th_task_team ) - th->th.th_task_state = th->th.th_task_team->tt.tt_state; + th->th.th_task_state = 0; } } #endif // OMP_40_ENABLED @@ -1899,8 +1893,8 @@ __kmpc_reduce_nowait( // Restore thread structure th->th.th_info.ds.ds_tid = 0; th->th.th_team = team; - th->th.th_task_team = team->t.t_task_team; th->th.th_team_nproc = team->t.t_nproc; + th->th.th_task_team = team->t.t_task_team[task_state]; th->th.th_task_state = task_state; } #endif diff --git a/openmp/runtime/src/kmp_omp.h b/openmp/runtime/src/kmp_omp.h index 9657f643de7..7bc11498e18 100644 --- a/openmp/runtime/src/kmp_omp.h +++ b/openmp/runtime/src/kmp_omp.h @@ -162,7 +162,6 @@ typedef struct { offset_and_size_t tt_nproc; offset_and_size_t tt_unfinished_threads; offset_and_size_t tt_active; - offset_and_size_t tt_state; /* kmp_taskdata_t */ kmp_int32 td_sizeof_struct; diff --git a/openmp/runtime/src/kmp_runtime.c b/openmp/runtime/src/kmp_runtime.c index 31c8ebddfa5..b486bd54b21 100644 --- a/openmp/runtime/src/kmp_runtime.c +++ b/openmp/runtime/src/kmp_runtime.c @@ -1229,8 +1229,8 @@ __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) KMP_MB(); if ( __kmp_tasking_mode != tskm_immediate_exec ) { - KMP_DEBUG_ASSERT( this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team ); - KMP_DEBUG_ASSERT( serial_team->t.t_task_team == NULL ); + KMP_DEBUG_ASSERT(this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]); + KMP_DEBUG_ASSERT( serial_team->t.t_task_team[this_thr->th.th_task_state] == NULL ); KA_TRACE( 20, ( "__kmpc_serialized_parallel: T#%d pushing task_team %p / team %p, new task_team = NULL\n", global_tid, this_thr->th.th_task_team, this_thr->th.th_team ) ); this_thr->th.th_task_team = NULL; @@ -1565,7 +1565,7 @@ __kmp_fork_call( #if KMP_DEBUG if ( __kmp_tasking_mode != tskm_immediate_exec ) { - KMP_DEBUG_ASSERT( master_th->th.th_task_team == parent_team->t.t_task_team ); + KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]); } #endif @@ -1797,11 +1797,31 @@ __kmp_fork_call( if ( __kmp_tasking_mode != tskm_immediate_exec ) { // Set master's task team to team's task team. Unless this is hot team, it should be NULL. - KMP_DEBUG_ASSERT( master_th->th.th_task_team == parent_team->t.t_task_team ); + KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]); KA_TRACE( 20, ( "__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n", __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, - parent_team, team->t.t_task_team, team ) ); - master_th->th.th_task_team = team->t.t_task_team; + parent_team, team->t.t_task_team[master_th->th.th_task_state], team ) ); + if (level) { + // Take a memo of master's task_state + KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack); + if (master_th->th.th_task_state_top >= master_th->th.th_task_state_stack_sz) { // increase size + kmp_uint8 *old_stack, *new_stack = (kmp_uint8 *) __kmp_allocate( 2*master_th->th.th_task_state_stack_sz ); + kmp_uint32 i; + for (i=0; i<master_th->th.th_task_state_stack_sz; ++i) { + new_stack[i] = master_th->th.th_task_state_memo_stack[i]; + } + old_stack = master_th->th.th_task_state_memo_stack; + master_th->th.th_task_state_memo_stack = new_stack; + master_th->th.th_task_state_stack_sz *= 2; + __kmp_free(old_stack); + } + // Store master's task_state on stack + master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state; + master_th->th.th_task_state_top++; + master_th->th.th_task_state = 0; + } + master_th->th.th_task_team = team->t.t_task_team[master_th->th.th_task_state]; + #if !KMP_NESTED_HOT_TEAMS KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || (team == root->r.r_hot_team)); #endif @@ -1955,8 +1975,8 @@ __kmp_join_call(ident_t *loc, int gtid if ( __kmp_tasking_mode != tskm_immediate_exec ) { KA_TRACE( 20, ( "__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n", __kmp_gtid_from_thread( master_th ), team, - team->t.t_task_team, master_th->th.th_task_team) ); - KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team ); + team->t.t_task_team[master_th->th.th_task_state], master_th->th.th_task_team) ); + KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state] ); } #endif @@ -1991,6 +2011,10 @@ __kmp_join_call(ident_t *loc, int gtid // But there is barrier for external team (league). __kmp_internal_join( loc, gtid, team ); } + else { + master_th->th.th_task_state = 0; // AC: no tasking in teams (out of any parallel) + } + KMP_MB(); #if USE_ITT_BUILD @@ -2062,8 +2086,10 @@ __kmp_join_call(ident_t *loc, int gtid balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived; KMP_DEBUG_ASSERT(balign[ b ].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); } - // Synchronize thread's task state - other_threads[i]->th.th_task_state = master_th->th.th_task_state; + if ( __kmp_tasking_mode != tskm_immediate_exec ) { + // Synchronize thread's task state + other_threads[i]->th.th_task_state = master_th->th.th_task_state; + } } } return; @@ -2112,9 +2138,9 @@ __kmp_join_call(ident_t *loc, int gtid __kmp_free_team( root, team USE_NESTED_HOT_ARG(master_th) ); // this will free worker threads /* this race was fun to find. make sure the following is in the critical - * region otherwise assertions may fail occasiounally since the old team + * region otherwise assertions may fail occasionally since the old team * may be reallocated and the hierarchy appears inconsistent. it is - * actually safe to run and won't cause any bugs, but will cause thoose + * actually safe to run and won't cause any bugs, but will cause those * assertion failures. it's only one deref&assign so might as well put this * in the critical region */ master_th->th.th_team = parent_team; @@ -2131,13 +2157,15 @@ __kmp_join_call(ident_t *loc, int gtid } if ( __kmp_tasking_mode != tskm_immediate_exec ) { - // - // Copy the task team from the new child / old parent team - // to the thread. If non-NULL, copy the state flag also. - // - if ( ( master_th->th.th_task_team = parent_team->t.t_task_team ) != NULL ) { - master_th->th.th_task_state = master_th->th.th_task_team->tt.tt_state; + // Restore task state from memo stack + KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack); + if (master_th->th.th_task_state_top > 0) { + --master_th->th.th_task_state_top; // pop + master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top]; } + // Copy the first task team from the new child / old parent team to the thread and reset state flag. + master_th->th.th_task_team = parent_team->t.t_task_team[master_th->th.th_task_state]; + KA_TRACE( 20, ( "__kmp_join_call: Master T#%d restoring task_team %p / team %p\n", __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, parent_team ) ); @@ -2229,24 +2257,22 @@ __kmp_set_num_threads( int new_nth, int gtid ) if ( __kmp_tasking_mode != tskm_immediate_exec ) { - kmp_task_team_t *task_team = hot_team->t.t_task_team; - if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) { - // - // Signal the worker threads (esp. the extra ones) to stop - // looking for tasks while spin waiting. The task teams - // are reference counted and will be deallocated by the - // last worker thread. - // - KMP_DEBUG_ASSERT( hot_team->t.t_nproc > 1 ); - TCW_SYNC_4( task_team->tt.tt_active, FALSE ); - KMP_MB(); - - KA_TRACE( 20, ( "__kmp_set_num_threads: setting task_team %p to NULL\n", - &hot_team->t.t_task_team ) ); - hot_team->t.t_task_team = NULL; - } - else { - KMP_DEBUG_ASSERT( task_team == NULL ); + int tt_idx; + for (tt_idx=0; tt_idx<2; ++tt_idx) { + kmp_task_team_t *task_team = hot_team->t.t_task_team[tt_idx]; + if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) { + // Signal worker threads (esp. the extra ones) to stop looking for tasks while spin waiting. + // The task teams are reference counted and will be deallocated by the last worker thread. + KMP_DEBUG_ASSERT( hot_team->t.t_nproc > 1 ); + TCW_SYNC_4( task_team->tt.tt_active, FALSE ); + KMP_MB(); + KA_TRACE( 20, ( "__kmp_set_num_threads: setting task_team %p to NULL\n", + &hot_team->t.t_task_team[tt_idx] ) ); + hot_team->t.t_task_team[tt_idx] = NULL; + } + else { + KMP_DEBUG_ASSERT( task_team == NULL ); + } } } @@ -3617,7 +3643,7 @@ void __kmp_task_info() { #endif // KMP_DEBUG /* TODO optimize with one big memclr, take out what isn't needed, - * split responsility to workers as much as possible, and delay + * split responsibility to workers as much as possible, and delay * initialization of features as much as possible */ static void __kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team, int tid, int gtid ) @@ -3723,6 +3749,12 @@ __kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team, int tid, int gtid this_thr->th.th_next_pool = NULL; + if (!this_thr->th.th_task_state_memo_stack) { + this_thr->th.th_task_state_memo_stack = (kmp_uint8 *) __kmp_allocate( 4*sizeof(kmp_uint8) ); + this_thr->th.th_task_state_top = 0; + this_thr->th.th_task_state_stack_sz = 4; + } + KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here ); KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); @@ -3778,6 +3810,9 @@ __kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team, int new_tid ) TCW_4(__kmp_nth, __kmp_nth + 1); + new_thr->th.th_task_state_top = 0; + new_thr->th.th_task_state_stack_sz = 4; + #ifdef KMP_ADJUST_BLOCKTIME /* Adjust blocktime back to zero if necessar y */ /* Middle initialization might not have occurred yet */ @@ -4367,6 +4402,7 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc, char *ptr; size_t size; int use_hot_team = ! root->r.r_active; + int level = 0; KA_TRACE( 20, ("__kmp_allocate_team: called\n")); KMP_DEBUG_ASSERT( new_nproc >=1 && argc >=0 ); @@ -4374,7 +4410,6 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc, KMP_MB(); #if KMP_NESTED_HOT_TEAMS - int level; kmp_hot_team_ptr_t *hot_teams; if( master ) { team = master->th.th_team; @@ -4405,8 +4440,8 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc, #endif #if KMP_DEBUG if ( __kmp_tasking_mode != tskm_immediate_exec ) { - KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team = %p before reinit\n", - team->t.t_task_team )); + KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p before reinit\n", + team->t.t_task_team[0], team->t.t_task_team[1] )); } #endif @@ -4449,30 +4484,35 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc, } # endif /* KMP_AFFINITY_SUPPORTED */ #endif /* OMP_40_ENABLED */ + + if (level) { + for(f = 0; f < new_nproc; ++f) { + team->t.t_threads[f]->th.th_task_state = 0; + } + } } else if( team->t.t_nproc > new_nproc ) { KA_TRACE( 20, ("__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc )); team->t.t_size_changed = 1; if ( __kmp_tasking_mode != tskm_immediate_exec ) { - kmp_task_team_t *task_team = team->t.t_task_team; - if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) { - // - // Signal the worker threads (esp. the extra ones) to stop - // looking for tasks while spin waiting. The task teams - // are reference counted and will be deallocated by the - // last worker thread. - // - KMP_DEBUG_ASSERT( team->t.t_nproc > 1 ); - TCW_SYNC_4( task_team->tt.tt_active, FALSE ); - KMP_MB(); - - KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team %p to NULL\n", - &team->t.t_task_team ) ); - team->t.t_task_team = NULL; - } - else { - KMP_DEBUG_ASSERT( task_team == NULL ); + // Signal the worker threads (esp. extra ones) to stop looking for tasks while spin waiting. + // The task teams are reference counted and will be deallocated by the last worker thread. + int tt_idx; + for (tt_idx=0; tt_idx<2; ++tt_idx) { + // We don't know which of the two task teams workers are waiting on, so deactivate both. + kmp_task_team_t *task_team = team->t.t_task_team[tt_idx]; + if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) { + KMP_DEBUG_ASSERT( team->t.t_nproc > 1 ); + TCW_SYNC_4( task_team->tt.tt_active, FALSE ); + KMP_MB(); + KA_TRACE(20, ("__kmp_allocate_team: setting task_team %p to NULL\n", + &team->t.t_task_team[tt_idx])); + team->t.t_task_team[tt_idx] = NULL; + } + else { + KMP_DEBUG_ASSERT( task_team == NULL ); + } } } #if KMP_NESTED_HOT_TEAMS @@ -4497,20 +4537,31 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc, __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident ); if ( __kmp_tasking_mode != tskm_immediate_exec ) { - kmp_task_team_t *task_team = team->t.t_task_team; - if ( task_team != NULL ) { - KMP_DEBUG_ASSERT( ! TCR_4(task_team->tt.tt_found_tasks) ); - task_team->tt.tt_nproc = new_nproc; - task_team->tt.tt_unfinished_threads = new_nproc; - task_team->tt.tt_ref_ct = new_nproc - 1; + // Init both task teams + int tt_idx; + for (tt_idx=0; tt_idx<2; ++tt_idx) { + kmp_task_team_t *task_team = team->t.t_task_team[tt_idx]; + if ( task_team != NULL ) { + KMP_DEBUG_ASSERT( ! TCR_4(task_team->tt.tt_found_tasks) ); + task_team->tt.tt_nproc = new_nproc; + task_team->tt.tt_unfinished_threads = new_nproc; + task_team->tt.tt_ref_ct = new_nproc - 1; + } } } /* update the remaining threads */ - for(f = 0; f < new_nproc; ++f) { - team->t.t_threads[f]->th.th_team_nproc = new_nproc; + if (level) { + for(f = 0; f < new_nproc; ++f) { + team->t.t_threads[f]->th.th_team_nproc = new_nproc; + team->t.t_threads[f]->th.th_task_state = 0; + } + } + else { + for(f = 0; f < new_nproc; ++f) { + team->t.t_threads[f]->th.th_team_nproc = new_nproc; + } } - // restore the current task state of the master thread: should be the implicit task KF_TRACE( 10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0, team->t.t_threads[0], team ) ); @@ -4621,19 +4672,33 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc, __kmp_initialize_team( team, new_nproc, new_icvs, root->r.r_uber_thread->th.th_ident ); if ( __kmp_tasking_mode != tskm_immediate_exec ) { - kmp_task_team_t *task_team = team->t.t_task_team; - if ( task_team != NULL ) { - KMP_DEBUG_ASSERT( ! TCR_4(task_team->tt.tt_found_tasks) ); - task_team->tt.tt_nproc = new_nproc; - task_team->tt.tt_unfinished_threads = new_nproc; - task_team->tt.tt_ref_ct = new_nproc - 1; + int tt_idx; + for (tt_idx=0; tt_idx<2; ++tt_idx) { + kmp_task_team_t *task_team = team->t.t_task_team[tt_idx]; + if ( task_team != NULL ) { + KMP_DEBUG_ASSERT( ! TCR_4(task_team->tt.tt_found_tasks) ); + task_team->tt.tt_nproc = new_nproc; + task_team->tt.tt_unfinished_threads = new_nproc; + task_team->tt.tt_ref_ct = new_nproc - 1; + } } } /* reinitialize the old threads */ - for( f = 0 ; f < team->t.t_nproc ; f++ ) - __kmp_initialize_info( team->t.t_threads[ f ], team, f, - __kmp_gtid_from_tid( f, team ) ); + if (level) { + for( f = 0 ; f < team->t.t_nproc ; f++ ) { + __kmp_initialize_info( team->t.t_threads[ f ], team, f, + __kmp_gtid_from_tid( f, team ) ); + } + } + else { + int old_state = team->t.t_threads[0]->th.th_task_state; + for (f=0; f < team->t.t_nproc; ++f) { + __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) ); + team->t.t_threads[f]->th.th_task_state = old_state; + } + } + #ifdef KMP_DEBUG for ( f = 0; f < team->t.t_nproc; ++ f ) { KMP_DEBUG_ASSERT( team->t.t_threads[f] && @@ -4666,7 +4731,7 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc, // Sync task (TODO: and barrier?) state for nested hot teams, not needed for outermost hot team. for( f = 1; f < new_nproc; ++f ) { kmp_info_t *thr = team->t.t_threads[f]; - thr->th.th_task_state = master->th.th_task_state; + thr->th.th_task_state = 0; int b; kmp_balign_t * balign = thr->th.th_bar; for( b = 0; b < bs_last_barrier; ++ b ) { @@ -4689,8 +4754,8 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc, #if KMP_DEBUG if ( __kmp_tasking_mode != tskm_immediate_exec ) { - KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team = %p after reinit\n", - team->t.t_task_team )); + KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p after reinit\n", + team->t.t_task_team[0], team->t.t_task_team[1] )); } #endif @@ -4711,9 +4776,10 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc, /* setup the team for fresh use */ __kmp_initialize_team( team, new_nproc, new_icvs, NULL ); - KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team %p to NULL\n", - &team->t.t_task_team ) ); - team->t.t_task_team = NULL; + KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n", + &team->t.t_task_team[0], &team->t.t_task_team[1]) ); + team->t.t_task_team[0] = NULL; + team->t.t_task_team[1] = NULL; /* reallocate space for arguments if necessary */ __kmp_alloc_argv_entries( argc, team, TRUE ); @@ -4759,9 +4825,10 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc, KA_TRACE( 20, ( "__kmp_allocate_team: making a new team\n" ) ); __kmp_initialize_team( team, new_nproc, new_icvs, NULL ); - KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team %p to NULL\n", - &team->t.t_task_team ) ); - team->t.t_task_team = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate + KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n", + &team->t.t_task_team[0], &team->t.t_task_team[1] ) ); + team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate + team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate if ( __kmp_storage_map ) { __kmp_print_team_storage_map( "team", team, team->t.t_id, new_nproc ); @@ -4838,22 +4905,21 @@ __kmp_free_team( kmp_root_t *root, kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info /* if we are non-hot team, release our threads */ if( ! use_hot_team ) { - if ( __kmp_tasking_mode != tskm_immediate_exec ) { - kmp_task_team_t *task_team = team->t.t_task_team; - if ( task_team != NULL ) { - // - // Signal the worker threads to stop looking for tasks while - // spin waiting. The task teams are reference counted and will - // be deallocated by the last worker thread via the thread's - // pointer to the task team. - // - KA_TRACE( 20, ( "__kmp_free_team: deactivating task_team %p\n", - task_team ) ); - KMP_DEBUG_ASSERT( team->t.t_nproc > 1 ); - TCW_SYNC_4( task_team->tt.tt_active, FALSE ); - KMP_MB(); - team->t.t_task_team = NULL; + int tt_idx; + for (tt_idx=0; tt_idx<2; ++tt_idx) { + // We don't know which of the two task teams workers are waiting on, so deactivate both. + kmp_task_team_t *task_team = team->t.t_task_team[tt_idx]; + if ( task_team != NULL ) { + // Signal the worker threads to stop looking for tasks while spin waiting. The task + // teams are reference counted and will be deallocated by the last worker thread via the + // thread's pointer to the task team. + KA_TRACE( 20, ( "__kmp_free_team: deactivating task_team %p\n", task_team ) ); + KMP_DEBUG_ASSERT( team->t.t_nproc > 1 ); + TCW_SYNC_4( task_team->tt.tt_active, FALSE ); + KMP_MB(); + team->t.t_task_team[tt_idx] = NULL; + } } } @@ -5263,6 +5329,11 @@ __kmp_reap_thread( thread->th.th_pri_common = NULL; }; // if + if (thread->th.th_task_state_memo_stack != NULL) { + __kmp_free(thread->th.th_task_state_memo_stack); + thread->th.th_task_state_memo_stack = NULL; + } + #if KMP_USE_BGET if ( thread->th.th_local.bget_data != NULL ) { __kmp_finalize_bget( thread ); diff --git a/openmp/runtime/src/kmp_tasking.c b/openmp/runtime/src/kmp_tasking.c index ad121ad0c1e..3d5a50467c0 100644 --- a/openmp/runtime/src/kmp_tasking.c +++ b/openmp/runtime/src/kmp_tasking.c @@ -284,7 +284,7 @@ __kmp_push_task(kmp_int32 gtid, kmp_task_t * task ) // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); - if ( ! KMP_TASKING_ENABLED( task_team, thread->th.th_task_state ) ) { + if ( ! KMP_TASKING_ENABLED(task_team) ) { __kmp_enable_tasking( task_team, thread ); } KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE ); @@ -1180,7 +1180,7 @@ __kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part ) if ( ! taskdata->td_flags.team_serial ) { kmp_task_team_t * task_team = thread->th.th_task_team; if (task_team != NULL) { - if (KMP_TASKING_ENABLED(task_team, thread->th.th_task_state)) { + if (KMP_TASKING_ENABLED(task_team)) { __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint ); } @@ -2101,7 +2101,6 @@ __kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team ) TCW_4(task_team -> tt.tt_found_tasks, FALSE); task_team -> tt.tt_nproc = nthreads = team->t.t_nproc; - task_team -> tt.tt_state = 0; TCW_4( task_team -> tt.tt_unfinished_threads, nthreads ); TCW_4( task_team -> tt.tt_active, TRUE ); TCW_4( task_team -> tt.tt_ref_ct, nthreads - 1); @@ -2270,13 +2269,12 @@ __kmp_wait_to_unref_task_teams(void) // __kmp_task_team_setup: Create a task_team for the current team, but use // an already created, unused one if it already exists. // This may be called by any thread, but only for teams with # threads >1. - void -__kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team ) +__kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int both ) { KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); - if ( ( team->t.t_task_team == NULL ) && ( team->t.t_nproc > 1 ) ) { + if ( ( team->t.t_task_team[this_thr->th.th_task_state] == NULL ) && ( team->t.t_nproc > 1 ) ) { // Allocate a new task team, which will be propagated to // all of the worker threads after the barrier. As they // spin in the barrier release phase, then will continue @@ -2284,22 +2282,24 @@ __kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team ) // the signal to stop checking for tasks (they can't safely // reference the kmp_team_t struct, which could be reallocated // by the master thread). - team->t.t_task_team = __kmp_allocate_task_team( this_thr, team ); - KA_TRACE( 20, ( "__kmp_task_team_setup: Master T#%d created new " - "task_team %p for team %d\n", - __kmp_gtid_from_thread( this_thr ), team->t.t_task_team, + team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team ); + KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p for team %d\n", + __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state], ((team != NULL) ? team->t.t_id : -1)) ); } - else { + //else // All threads have reported in, and no tasks were spawned // for this release->gather region. Leave the old task // team struct in place for the upcoming region. No task // teams are formed for serialized teams. + if (both) { + int other_team = 1 - this_thr->th.th_task_state; + if ( ( team->t.t_task_team[other_team] == NULL ) && ( team->t.t_nproc > 1 ) ) { // setup other team as well + team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team ); + KA_TRACE( 20, ( "__kmp_task_team_setup: Master T#%d created new task_team %p for team %d\n", + __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team], + ((team != NULL) ? team->t.t_id : -1)) ); } - if ( team->t.t_task_team != NULL ) { - // Toggle the state flag so that we can tell which side of - // the barrier we are on. - team->t.t_task_team->tt.tt_state = 1 - this_thr->th.th_task_state; } } @@ -2314,35 +2314,20 @@ __kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team ) { KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); - // On the rare chance that this thread never saw that the task - // team was no longer active, then unref/deallocate it now. + // In case this thread never saw that the task team was no longer active, unref/deallocate it now. if ( this_thr->th.th_task_team != NULL ) { if ( ! TCR_SYNC_4( this_thr->th.th_task_team->tt.tt_active ) ) { KMP_DEBUG_ASSERT( ! KMP_MASTER_TID( __kmp_tid_from_gtid( __kmp_gtid_from_thread( this_thr ) ) ) ); __kmp_unref_task_team( this_thr->th.th_task_team, this_thr ); - } else { - // - // We are re-using a task team that was never enabled. - // - KMP_DEBUG_ASSERT( this_thr->th.th_task_team == team->t.t_task_team ); + } else { // We are re-using a task team that was never enabled. + KMP_DEBUG_ASSERT(this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]); } } - // - // It is now safe to propagate the task team pointer from the - // team struct to the current thread. - // - TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team); - if ( this_thr->th.th_task_team != NULL ) { - // - // Toggle the th_task_state field, instead of reading it from - // the task team. Reading the tt_state field at this point - // causes a 30% regression on EPCC parallel - toggling it - // is much cheaper. - // + // Toggle the th_task_state field, to switch which task_team this thread refers to this_thr->th.th_task_state = 1 - this_thr->th.th_task_state; - KMP_DEBUG_ASSERT( this_thr->th.th_task_state == TCR_4(team->t.t_task_team->tt.tt_state) ); - } + // It is now safe to propagate the task team pointer from the team struct to the current thread. + TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]); KA_TRACE( 20, ( "__kmp_task_team_sync: Thread T#%d task team assigned pointer (%p) from Team #%d task team\n", __kmp_gtid_from_thread( this_thr ), &this_thr->th.th_task_team, this_thr->th.th_task_team, ((team != NULL) ? (team->t.t_id) : -1) ) ); @@ -2350,41 +2335,31 @@ __kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team ) //------------------------------------------------------------------------------ -// __kmp_task_team_wait: Master thread waits for outstanding tasks after -// the barrier gather phase. Only called by master thread if #threads -// in team > 1 ! - +// __kmp_task_team_wait: Master thread waits for outstanding tasks after the +// barrier gather phase. Only called by master thread if #threads in team > 1 ! void -__kmp_task_team_wait( kmp_info_t *this_thr, - kmp_team_t *team +__kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team USE_ITT_BUILD_ARG(void * itt_sync_obj) ) { - kmp_task_team_t *task_team = team->t.t_task_team; + kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state]; KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team ); - if ( ( task_team != NULL ) && KMP_TASKING_ENABLED( task_team, this_thr->th.th_task_state ) ) { + if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) { KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d waiting for all tasks: task_team = %p\n", __kmp_gtid_from_thread( this_thr ), task_team ) ); - // - // All worker threads might have dropped through to the - // release phase, but could still be executing tasks. - // Wait here for all tasks to complete. To avoid memory - // contention, only the master thread checks for the - // termination condition. - // + // All worker threads might have dropped through to the release phase, but could still + // be executing tasks. Wait here for all tasks to complete. To avoid memory contention, + // only the master thread checks for the termination condition. kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U); flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); - // - // Kill the old task team, so that the worker threads will - // stop referencing it while spinning. They will - // deallocate it when the reference count reaches zero. + // Kill the old task team, so that the worker threads will stop referencing it while spinning. + // They will deallocate it when the reference count reaches zero. // The master thread is not included in the ref count. - // KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d deactivating task_team %p\n", __kmp_gtid_from_thread( this_thr ), task_team ) ); KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 ); @@ -2392,7 +2367,7 @@ __kmp_task_team_wait( kmp_info_t *this_thr, KMP_MB(); TCW_PTR(this_thr->th.th_task_team, NULL); - team->t.t_task_team = NULL; + team->t.t_task_team[this_thr->th.th_task_state] = NULL; } } @@ -2408,7 +2383,7 @@ __kmp_task_team_wait( kmp_info_t *this_thr, void __kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid ) { - volatile kmp_uint32 *spin = &team->t.t_task_team->tt.tt_unfinished_threads; + volatile kmp_uint32 *spin = &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads; int flag = FALSE; KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier ); diff --git a/openmp/runtime/src/kmp_wait_release.h b/openmp/runtime/src/kmp_wait_release.h index 84cb8bed70e..d517af2262c 100644 --- a/openmp/runtime/src/kmp_wait_release.h +++ b/openmp/runtime/src/kmp_wait_release.h @@ -143,7 +143,7 @@ static inline void __kmp_wait_template(kmp_info_t *this_thr, C *flag, int final_ if (!TCR_SYNC_4(task_team->tt.tt_active)) { KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)); __kmp_unref_task_team(task_team, this_thr); - } else if (KMP_TASKING_ENABLED(task_team, this_thr->th.th_task_state)) { + } else if (KMP_TASKING_ENABLED(task_team)) { flag->execute_tasks(this_thr, th_gtid, final_spin, &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0); } |

