summaryrefslogtreecommitdiffstats
path: root/openmp/runtime/src/kmp_runtime.c
diff options
context:
space:
mode:
Diffstat (limited to 'openmp/runtime/src/kmp_runtime.c')
-rw-r--r--openmp/runtime/src/kmp_runtime.c181
1 files changed, 55 insertions, 126 deletions
diff --git a/openmp/runtime/src/kmp_runtime.c b/openmp/runtime/src/kmp_runtime.c
index b5b09284975..ecab297c6f8 100644
--- a/openmp/runtime/src/kmp_runtime.c
+++ b/openmp/runtime/src/kmp_runtime.c
@@ -2104,23 +2104,31 @@ __kmp_fork_call(
// Take a memo of master's task_state
KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
if (master_th->th.th_task_state_top >= master_th->th.th_task_state_stack_sz) { // increase size
- kmp_uint8 *old_stack, *new_stack = (kmp_uint8 *) __kmp_allocate( 2*master_th->th.th_task_state_stack_sz );
+ kmp_uint32 new_size = 2*master_th->th.th_task_state_stack_sz;
+ kmp_uint8 *old_stack, *new_stack;
kmp_uint32 i;
+ new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
for (i=0; i<master_th->th.th_task_state_stack_sz; ++i) {
new_stack[i] = master_th->th.th_task_state_memo_stack[i];
}
+ for (i=master_th->th.th_task_state_stack_sz; i<new_size; ++i) { // zero-init rest of stack
+ new_stack[i] = 0;
+ }
old_stack = master_th->th.th_task_state_memo_stack;
master_th->th.th_task_state_memo_stack = new_stack;
- master_th->th.th_task_state_stack_sz *= 2;
+ master_th->th.th_task_state_stack_sz = new_size;
__kmp_free(old_stack);
}
// Store master's task_state on stack
master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
master_th->th.th_task_state_top++;
- master_th->th.th_task_state = 0;
+ if (team == master_th->th.th_hot_teams[level].hot_team) { // Restore master's nested state if nested hot team
+ master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
+ }
+ else {
+ master_th->th.th_task_state = 0;
+ }
}
- master_th->th.th_task_team = team->t.t_task_team[master_th->th.th_task_state];
-
#if !KMP_NESTED_HOT_TEAMS
KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || (team == root->r.r_hot_team));
#endif
@@ -2410,12 +2418,7 @@ __kmp_join_call(ident_t *loc, int gtid
int old_num = master_th->th.th_team_nproc;
int new_num = master_th->th.th_teams_size.nth;
kmp_info_t **other_threads = team->t.t_threads;
- kmp_task_team_t * task_team = master_th->th.th_task_team;
team->t.t_nproc = new_num;
- if ( task_team ) { // task team might have lesser value of counters
- task_team->tt.tt_ref_ct = new_num - 1;
- task_team->tt.tt_unfinished_threads = new_num;
- }
for ( i = 0; i < old_num; ++i ) {
other_threads[i]->th.th_team_nproc = new_num;
}
@@ -2509,18 +2512,18 @@ __kmp_join_call(ident_t *loc, int gtid
}
if ( __kmp_tasking_mode != tskm_immediate_exec ) {
- // Restore task state from memo stack
- KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
- if (master_th->th.th_task_state_top > 0) {
+ if (master_th->th.th_task_state_top > 0) { // Restore task state from memo stack
+ KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
+ // Remember master's state if we re-use this nested hot team
+ master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
--master_th->th.th_task_state_top; // pop
+ // Now restore state at this level
master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
}
- // Copy the first task team from the new child / old parent team to the thread and reset state flag.
+ // Copy the task team from the parent team to the master thread
master_th->th.th_task_team = parent_team->t.t_task_team[master_th->th.th_task_state];
-
KA_TRACE( 20, ( "__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
- __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
- parent_team ) );
+ __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, parent_team ) );
}
// TODO: GEH - cannot do this assertion because root thread not set up as executing
@@ -2615,31 +2618,13 @@ __kmp_set_num_threads( int new_nth, int gtid )
__kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
- if ( __kmp_tasking_mode != tskm_immediate_exec ) {
- int tt_idx;
- for (tt_idx=0; tt_idx<2; ++tt_idx) {
- kmp_task_team_t *task_team = hot_team->t.t_task_team[tt_idx];
- if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) {
- // Signal worker threads (esp. the extra ones) to stop looking for tasks while spin waiting.
- // The task teams are reference counted and will be deallocated by the last worker thread.
- KMP_DEBUG_ASSERT( hot_team->t.t_nproc > 1 );
- TCW_SYNC_4( task_team->tt.tt_active, FALSE );
- KMP_MB();
- KA_TRACE( 20, ( "__kmp_set_num_threads: setting task_team %p to NULL\n",
- &hot_team->t.t_task_team[tt_idx] ) );
- hot_team->t.t_task_team[tt_idx] = NULL;
- }
- else {
- KMP_DEBUG_ASSERT( task_team == NULL );
- }
- }
- }
-
- //
// Release the extra threads we don't need any more.
- //
for ( f = new_nth; f < hot_team->t.t_nproc; f++ ) {
KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
+ if ( __kmp_tasking_mode != tskm_immediate_exec) {
+ // When decreasing team size, threads no longer in the team should unref task team.
+ hot_team->t.t_threads[f]->th.th_task_team = NULL;
+ }
__kmp_free_thread( hot_team->t.t_threads[f] );
hot_team->t.t_threads[f] = NULL;
}
@@ -4081,7 +4066,6 @@ __kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team, int tid, int gtid
TCW_PTR(this_thr->th.th_sleep_loc, NULL);
KMP_DEBUG_ASSERT( team->t.t_implicit_task_taskdata );
- this_thr->th.th_task_state = 0;
KF_TRACE( 10, ( "__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
tid, gtid, this_thr, this_thr->th.th_current_task ) );
@@ -4151,9 +4135,12 @@ __kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team, int tid, int gtid
this_thr->th.th_next_pool = NULL;
if (!this_thr->th.th_task_state_memo_stack) {
+ size_t i;
this_thr->th.th_task_state_memo_stack = (kmp_uint8 *) __kmp_allocate( 4*sizeof(kmp_uint8) );
this_thr->th.th_task_state_top = 0;
this_thr->th.th_task_state_stack_sz = 4;
+ for (i=0; i<this_thr->th.th_task_state_stack_sz; ++i) // zero init the stack
+ this_thr->th.th_task_state_memo_stack[i] = 0;
}
KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
@@ -4211,6 +4198,7 @@ __kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team, int new_tid )
TCW_4(__kmp_nth, __kmp_nth + 1);
+ new_thr->th.th_task_state = 0;
new_thr->th.th_task_state_top = 0;
new_thr->th.th_task_state_stack_sz = 4;
@@ -4896,26 +4884,6 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
KA_TRACE( 20, ("__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc ));
team->t.t_size_changed = 1;
- if ( __kmp_tasking_mode != tskm_immediate_exec ) {
- // Signal the worker threads (esp. extra ones) to stop looking for tasks while spin waiting.
- // The task teams are reference counted and will be deallocated by the last worker thread.
- int tt_idx;
- for (tt_idx=0; tt_idx<2; ++tt_idx) {
- // We don't know which of the two task teams workers are waiting on, so deactivate both.
- kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
- if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) {
- KMP_DEBUG_ASSERT( team->t.t_nproc > 1 );
- TCW_SYNC_4( task_team->tt.tt_active, FALSE );
- KMP_MB();
- KA_TRACE(20, ("__kmp_allocate_team: setting task_team %p to NULL\n",
- &team->t.t_task_team[tt_idx]));
- team->t.t_task_team[tt_idx] = NULL;
- }
- else {
- KMP_DEBUG_ASSERT( task_team == NULL );
- }
- }
- }
#if KMP_NESTED_HOT_TEAMS
if( __kmp_hot_teams_mode == 0 ) {
// AC: saved number of threads should correspond to team's value in this mode,
@@ -4926,6 +4894,10 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
/* release the extra threads we don't need any more */
for( f = new_nproc ; f < team->t.t_nproc ; f++ ) {
KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
+ if ( __kmp_tasking_mode != tskm_immediate_exec) {
+ // When decreasing team size, threads no longer in the team should unref task team.
+ team->t.t_threads[f]->th.th_task_team = NULL;
+ }
__kmp_free_thread( team->t.t_threads[ f ] );
team->t.t_threads[ f ] = NULL;
}
@@ -4937,32 +4909,9 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
team->t.t_sched = new_icvs->sched;
__kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
- if ( __kmp_tasking_mode != tskm_immediate_exec ) {
- // Init both task teams
- int tt_idx;
- for (tt_idx=0; tt_idx<2; ++tt_idx) {
- kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
- if ( task_team != NULL ) {
- KMP_DEBUG_ASSERT( ! TCR_4(task_team->tt.tt_found_tasks) );
- task_team->tt.tt_nproc = new_nproc;
- task_team->tt.tt_unfinished_threads = new_nproc;
- task_team->tt.tt_ref_ct = new_nproc - 1;
- }
- }
- }
-
/* update the remaining threads */
- if (level) {
- team->t.t_threads[0]->th.th_team_nproc = new_nproc;
- for(f = 1; f < new_nproc; ++f) {
- team->t.t_threads[f]->th.th_team_nproc = new_nproc;
- team->t.t_threads[f]->th.th_task_state = 0;
- }
- }
- else {
- for(f = 0; f < new_nproc; ++f) {
- team->t.t_threads[f]->th.th_team_nproc = new_nproc;
- }
+ for(f = 0; f < new_nproc; ++f) {
+ team->t.t_threads[f]->th.th_team_nproc = new_nproc;
}
// restore the current task state of the master thread: should be the implicit task
KF_TRACE( 10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n",
@@ -5076,39 +5025,24 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
} // end of check of t_nproc vs. new_nproc vs. hot_team_nth
#endif // KMP_NESTED_HOT_TEAMS
/* make sure everyone is syncronized */
+ int old_nproc = team->t.t_nproc; // save old value and use to update only new threads below
__kmp_initialize_team( team, new_nproc, new_icvs, root->r.r_uber_thread->th.th_ident );
- if ( __kmp_tasking_mode != tskm_immediate_exec ) {
- // Signal the worker threads to stop looking for tasks while spin waiting.
- // The task teams are reference counted and will be deallocated by the last worker thread.
- int tt_idx;
- for (tt_idx=0; tt_idx<2; ++tt_idx) {
- // We don't know which of the two task teams workers are waiting on, so deactivate both.
- kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
- if ( (task_team != NULL) && TCR_SYNC_4(task_team->tt.tt_active) ) {
- TCW_SYNC_4( task_team->tt.tt_active, FALSE );
- team->t.t_task_team[tt_idx] = NULL;
- }
- }
- }
-
/* reinitialize the threads */
KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
- if (level) {
- int old_state = team->t.t_threads[0]->th.th_task_state;
- for (f=0; f < team->t.t_nproc; ++f)
- __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) );
- // th_task_state for master thread will be put in stack of states in __kmp_fork_call()
- // before zeroing, for workers it was just zeroed in __kmp_initialize_info()
- team->t.t_threads[0]->th.th_task_state = old_state;
+ for (f=0; f < team->t.t_nproc; ++f)
+ __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) );
+ if (level) { // set th_task_state for new threads in nested hot team
+ // __kmp_initialize_info() no longer zeroes th_task_state, so we should only need to set the
+ // th_task_state for the new threads. th_task_state for master thread will not be accurate until
+ // after this in __kmp_fork_call(), so we look to the master's memo_stack to get the correct value.
+ for (f=old_nproc; f < team->t.t_nproc; ++f)
+ team->t.t_threads[f]->th.th_task_state = team->t.t_threads[0]->th.th_task_state_memo_stack[level];
}
- else {
- int old_state = team->t.t_threads[0]->th.th_task_state;
- for (f=0; f<team->t.t_nproc; ++f) {
- __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) );
+ else { // set th_task_state for new threads in non-nested hot team
+ int old_state = team->t.t_threads[0]->th.th_task_state; // copy master's state
+ for (f=old_nproc; f < team->t.t_nproc; ++f)
team->t.t_threads[f]->th.th_task_state = old_state;
- team->t.t_threads[f]->th.th_task_team = team->t.t_task_team[old_state];
- }
}
#ifdef KMP_DEBUG
@@ -5342,18 +5276,17 @@ __kmp_free_team( kmp_root_t *root, kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info
/* if we are non-hot team, release our threads */
if( ! use_hot_team ) {
if ( __kmp_tasking_mode != tskm_immediate_exec ) {
+ // Delete task teams
int tt_idx;
for (tt_idx=0; tt_idx<2; ++tt_idx) {
- // We don't know which of the two task teams workers are waiting on, so deactivate both.
kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
if ( task_team != NULL ) {
- // Signal the worker threads to stop looking for tasks while spin waiting. The task
- // teams are reference counted and will be deallocated by the last worker thread via the
- // thread's pointer to the task team.
- KA_TRACE( 20, ( "__kmp_free_team: deactivating task_team %p\n", task_team ) );
+ for (f=0; f<team->t.t_nproc; ++f) { // Have all threads unref task teams
+ team->t.t_threads[f]->th.th_task_team = NULL;
+ }
+ KA_TRACE( 20, ( "__kmp_free_team: T#%d deactivating task_team %p on team %d\n", __kmp_get_gtid(), task_team, team->t.t_id ) );
KMP_DEBUG_ASSERT( team->t.t_nproc > 1 );
- TCW_SYNC_4( task_team->tt.tt_active, FALSE );
- KMP_MB();
+ __kmp_free_task_team( master, task_team );
team->t.t_task_team[tt_idx] = NULL;
}
}
@@ -5452,6 +5385,7 @@ __kmp_free_thread( kmp_info_t *this_th )
balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
balign[b].bb.team = NULL;
}
+ this_th->th.th_task_state = 0;
/* put thread back on the free pool */
@@ -5622,9 +5556,7 @@ __kmp_launch_thread( kmp_info_t *this_thr )
}
#endif
- if ( TCR_PTR( this_thr->th.th_task_team ) != NULL ) {
- __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
- }
+ this_thr->th.th_task_team = NULL;
/* run the destructors for the threadprivate data for this thread */
__kmp_common_destroy_gtid( gtid );
@@ -6120,10 +6052,7 @@ __kmp_internal_end_thread( int gtid_req )
KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid ));
if ( gtid >= 0 ) {
- kmp_info_t *this_thr = __kmp_threads[ gtid ];
- if (TCR_PTR(this_thr->th.th_task_team) != NULL) {
- __kmp_unref_task_team(this_thr->th.th_task_team, this_thr);
- }
+ __kmp_threads[gtid]->th.th_task_team = NULL;
}
KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n", gtid ));
OpenPOWER on IntegriCloud