diff options
| -rw-r--r-- | openmp/runtime/src/kmp_tasking.c | 333 | ||||
| -rw-r--r-- | openmp/runtime/test/lit.cfg | 3 | 
2 files changed, 108 insertions, 228 deletions
diff --git a/openmp/runtime/src/kmp_tasking.c b/openmp/runtime/src/kmp_tasking.c index 361fce93bc5..888386a6548 100644 --- a/openmp/runtime/src/kmp_tasking.c +++ b/openmp/runtime/src/kmp_tasking.c @@ -1806,17 +1806,17 @@ static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gti                                                 int *thread_finished                                                 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)  { -    kmp_task_team_t *     task_team; +    kmp_task_team_t *     task_team = thread->th.th_task_team;      kmp_thread_data_t *   threads_data;      kmp_task_t *          task; +    kmp_info_t *          other_thread;      kmp_taskdata_t *      current_task = thread -> th.th_current_task;      volatile kmp_uint32 * unfinished_threads; -    kmp_int32             nthreads, last_stolen, k, tid; +    kmp_int32             nthreads, victim=-2, use_own_tasks=1, new_victim=0, tid=thread->th.th_info.ds.ds_tid;      KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );      KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] ); -    task_team = thread -> th.th_task_team;      if (task_team == NULL) return FALSE;      KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n", @@ -1834,277 +1834,154 @@ static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gti  #endif      KMP_DEBUG_ASSERT( (int)(TCR_4(*unfinished_threads)) >= 0 ); -    // Choose tasks from our own work queue. -    start: -    while (( task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained )) != NULL ) { -#if USE_ITT_BUILD && USE_ITT_NOTIFY -        if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) { -            if ( itt_sync_obj == NULL ) { -                // we are at fork barrier where we could not get the object reliably -                itt_sync_obj  = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier ); +    while (1) { // Outer loop keeps trying to find tasks in case of single thread getting tasks from target constructs +        while (1) { // Inner loop to find a task and execute it +            task = NULL; +            if (use_own_tasks) { // check on own queue first +                task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained );              } -            __kmp_itt_task_starting( itt_sync_obj ); -        } -#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ -        __kmp_invoke_task( gtid, task, current_task ); -#if USE_ITT_BUILD -        if ( itt_sync_obj != NULL ) -            __kmp_itt_task_finished( itt_sync_obj ); -#endif /* USE_ITT_BUILD */ - -        // If this thread is only partway through the barrier and the condition -        // is met, then return now, so that the barrier gather/release pattern can proceed. -        // If this thread is in the last spin loop in the barrier, waiting to be -        // released, we know that the termination condition will not be satisified, -        // so don't waste any cycles checking it. -        if (flag == NULL || (!final_spin && flag->done_check())) { -            KA_TRACE(15, ("__kmp_execute_tasks_template(exit #1): T#%d spin condition satisfied\n", gtid) ); -            return TRUE; -        } -        if (thread->th.th_task_team == NULL) break; -        KMP_YIELD( __kmp_library == library_throughput );   // Yield before executing next task -    } - -    // This thread's work queue is empty.  If we are in the final spin loop -    // of the barrier, check and see if the termination condition is satisfied. -#if OMP_41_ENABLED -    // The work queue may be empty but there might be proxy tasks still executing -    if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0) -#else -    if (final_spin) -#endif -    { -        // First, decrement the #unfinished threads, if that has not already -        // been done.  This decrement might be to the spin location, and -        // result in the termination condition being satisfied. -        if (! *thread_finished) { -            kmp_uint32 count; - -            count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1; -            KA_TRACE(20, ("__kmp_execute_tasks_template(dec #1): T#%d dec unfinished_threads to %d task_team=%p\n", -                          gtid, count, task_team) ); -            *thread_finished = TRUE; -        } - -        // It is now unsafe to reference thread->th.th_team !!! -        // Decrementing task_team->tt.tt_unfinished_threads can allow the master -        // thread to pass through the barrier, where it might reset each thread's -        // th.th_team field for the next parallel region. -        // If we can steal more work, we know that this has not happened yet. -        if (flag != NULL && flag->done_check()) { -            KA_TRACE(15, ("__kmp_execute_tasks_template(exit #2): T#%d spin condition satisfied\n", gtid) ); -            return TRUE; -        } -    } - -    if (thread->th.th_task_team == NULL) return FALSE; -#if OMP_41_ENABLED -    // check if there are other threads to steal from, otherwise go back -    if ( nthreads  == 1 ) -        goto start; -#endif +            if ((task == NULL) && (nthreads > 1)) { // Steal a task +                int asleep = 1; +                use_own_tasks = 0; +                // Try to steal from the last place I stole from successfully. +                if (victim == -2) { // haven't stolen anything yet +                    victim = threads_data[tid].td.td_deque_last_stolen; +                    if (victim != -1) // if we have a last stolen from victim, get the thread +                        other_thread = threads_data[victim].td.td_thr; +                } +                if (victim != -1) { // found last victim +                    asleep = 0; +                } +                else if (!new_victim) { // no recent steals and we haven't already used a new victim; select a random thread +                    do { // Find a different thread to steal work from. +                        // Pick a random thread. Initial plan was to cycle through all the threads, and only return if +                        // we tried to steal from every thread, and failed.  Arch says that's not such a great idea. +                        victim = __kmp_get_random(thread) % (nthreads - 1); +                        if (victim >= tid) { +                            ++victim;  // Adjusts random distribution to exclude self +                        } +                        // Found a potential victim +                        other_thread = threads_data[victim].td.td_thr; +                        // There is a slight chance that __kmp_enable_tasking() did not wake up all threads +                        // waiting at the barrier.  If victim is sleeping, then wake it up.  Since we were going to +                        // pay the cache miss penalty for referencing another thread's kmp_info_t struct anyway, +                        // the check shouldn't cost too much performance at this point. In extra barrier mode, tasks +                        // do not sleep at the separate tasking barrier, so this isn't a problem. +                        asleep = 0; +                        if ( ( __kmp_tasking_mode == tskm_task_teams ) && +                             (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) && +                             (TCR_PTR(other_thread->th.th_sleep_loc) != NULL)) { +                            asleep = 1; +                            __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc); +                            // A sleeping thread should not have any tasks on it's queue. There is a slight +                            // possibility that it resumes, steals a task from another thread, which spawns more +                            // tasks, all in the time that it takes this thread to check => don't write an assertion +                            // that the victim's queue is empty.  Try stealing from a different thread. +                        } +                    } while (asleep); +                } -    // Try to steal from the last place I stole from successfully. -    tid = thread -> th.th_info.ds.ds_tid;//__kmp_tid_from_gtid( gtid ); -    last_stolen = threads_data[ tid ].td.td_deque_last_stolen; +                if (!asleep) { +                    // We have a victim to try to steal from +                    task = __kmp_steal_task(other_thread, gtid, task_team, unfinished_threads, thread_finished, is_constrained); +                } +                if (task != NULL) { // set last stolen to victim +                    if (threads_data[tid].td.td_deque_last_stolen != victim) { +                        threads_data[tid].td.td_deque_last_stolen = victim; +                        // The pre-refactored code did not try more than 1 successful new vicitm, +                        // unless the last one generated more local tasks; new_victim keeps track of this +                        new_victim = 1; +                    } +                } +                else { // No tasks found; unset last_stolen +                    KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1); +                    victim = -2; // no successful victim found +                } +            } -    if (last_stolen != -1) { -        kmp_info_t *other_thread = threads_data[last_stolen].td.td_thr; +            if (task == NULL) // break out of tasking loop +                break; -        while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads, -                                         thread_finished, is_constrained )) != NULL) -        { +            // Found a task; execute it  #if USE_ITT_BUILD && USE_ITT_NOTIFY              if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) { -                if ( itt_sync_obj == NULL ) { -                    // we are at fork barrier where we could not get the object reliably -                    itt_sync_obj  = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier ); +                if ( itt_sync_obj == NULL ) { // we are at fork barrier where we could not get the object reliably +                    itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );                  }                  __kmp_itt_task_starting( itt_sync_obj );              }  #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */              __kmp_invoke_task( gtid, task, current_task );  #if USE_ITT_BUILD -            if ( itt_sync_obj != NULL ) -                __kmp_itt_task_finished( itt_sync_obj ); +            if ( itt_sync_obj != NULL ) __kmp_itt_task_finished( itt_sync_obj );  #endif /* USE_ITT_BUILD */ - -            // Check to see if this thread can proceed. +            // If this thread is only partway through the barrier and the condition is met, then return now, +            // so that the barrier gather/release pattern can proceed. If this thread is in the last spin loop +            // in the barrier, waiting to be released, we know that the termination condition will not be +            // satisified, so don't waste any cycles checking it.              if (flag == NULL || (!final_spin && flag->done_check())) { -                KA_TRACE(15, ("__kmp_execute_tasks_template(exit #3): T#%d spin condition satisfied\n", -                              gtid) ); +                KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n", gtid) );                  return TRUE;              } - -            if (thread->th.th_task_team == NULL) break; +            if (thread->th.th_task_team == NULL) { +                break; +            }              KMP_YIELD( __kmp_library == library_throughput );   // Yield before executing next task -            // If the execution of the stolen task resulted in more tasks being -            // placed on our run queue, then restart the whole process. -            if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) { -                KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n", -                              gtid) ); -                goto start; +            // If execution of a stolen task results in more tasks being placed on our run queue, reset use_own_tasks +            if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) { +                KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n", gtid)); +                use_own_tasks = 1; +                new_victim = 0;              }          } -        // Don't give priority to stealing from this thread anymore. -        threads_data[ tid ].td.td_deque_last_stolen = -1; - -        // The victims's work queue is empty.  If we are in the final spin loop -        // of the barrier, check and see if the termination condition is satisfied. +        // The task source has been exhausted. If in final spin loop of barrier, check if termination condition is satisfied.  #if OMP_41_ENABLED          // The work queue may be empty but there might be proxy tasks still executing -        if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0) +        if (final_spin && TCR_4(current_task->td_incomplete_child_tasks) == 0)  #else          if (final_spin)  #endif          { -            // First, decrement the #unfinished threads, if that has not already -            // been done.  This decrement might be to the spin location, and -            // result in the termination condition being satisfied. +            // First, decrement the #unfinished threads, if that has not already been done.  This decrement +            // might be to the spin location, and result in the termination condition being satisfied.              if (! *thread_finished) {                  kmp_uint32 count;                  count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1; -                KA_TRACE(20, ("__kmp_execute_tasks_template(dec #2): T#%d dec unfinished_threads to %d " -                              "task_team=%p\n", gtid, count, task_team) ); +                KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d dec unfinished_threads to %d task_team=%p\n", +                              gtid, count, task_team) );                  *thread_finished = TRUE;              } -            // If __kmp_tasking_mode != tskm_immediate_exec -            // then it is now unsafe to reference thread->th.th_team !!! -            // Decrementing task_team->tt.tt_unfinished_threads can allow the master -            // thread to pass through the barrier, where it might reset each thread's -            // th.th_team field for the next parallel region. +            // It is now unsafe to reference thread->th.th_team !!! +            // Decrementing task_team->tt.tt_unfinished_threads can allow the master thread to pass through +            // the barrier, where it might reset each thread's th.th_team field for the next parallel region.              // If we can steal more work, we know that this has not happened yet.              if (flag != NULL && flag->done_check()) { -                KA_TRACE(15, ("__kmp_execute_tasks_template(exit #4): T#%d spin condition satisfied\n", -                              gtid) ); +                KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n", gtid) );                  return TRUE;              }          } -        if (thread->th.th_task_team == NULL) return FALSE; -    } - -    // Find a different thread to steal work from.  Pick a random thread. -    // My initial plan was to cycle through all the threads, and only return -    // if we tried to steal from every thread, and failed.  Arch says that's -    // not such a great idea. -    // GEH - need yield code in this loop for throughput library mode? -    new_victim: -    k = __kmp_get_random( thread ) % (nthreads - 1); -    if ( k >= thread -> th.th_info.ds.ds_tid ) { -        ++k;               // Adjusts random distribution to exclude self -    } -    { -        kmp_info_t *other_thread = threads_data[k].td.td_thr; -        int first; - -        // There is a slight chance that __kmp_enable_tasking() did not wake up -        // all threads waiting at the barrier.  If this thread is sleeping, then -        // wake it up.  Since we were going to pay the cache miss penalty -        // for referencing another thread's kmp_info_t struct anyway, the check -        // shouldn't cost too much performance at this point. -        // In extra barrier mode, tasks do not sleep at the separate tasking -        // barrier, so this isn't a problem. -        if ( ( __kmp_tasking_mode == tskm_task_teams ) && -             (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) && -             (TCR_PTR(other_thread->th.th_sleep_loc) != NULL)) -        { -            __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc); -            // A sleeping thread should not have any tasks on it's queue. -            // There is a slight possibility that it resumes, steals a task from -            // another thread, which spawns more tasks, all in the time that it takes -            // this thread to check => don't write an assertion that the victim's -            // queue is empty.  Try stealing from a different thread. -            goto new_victim; -        } - -        // Now try to steal work from the selected thread -        first = TRUE; -        while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads, -                                         thread_finished, is_constrained )) != NULL) -        { -#if USE_ITT_BUILD && USE_ITT_NOTIFY -            if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) { -                if ( itt_sync_obj == NULL ) { -                    // we are at fork barrier where we could not get the object reliably -                    itt_sync_obj  = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier ); -                } -                __kmp_itt_task_starting( itt_sync_obj ); -            } -#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ -            __kmp_invoke_task( gtid, task, current_task ); -#if USE_ITT_BUILD -            if ( itt_sync_obj != NULL ) -                __kmp_itt_task_finished( itt_sync_obj ); -#endif /* USE_ITT_BUILD */ - -            // Try stealing from this victim again, in the future. -            if (first) { -                threads_data[ tid ].td.td_deque_last_stolen = k; -                first = FALSE; -            } - -            // Check to see if this thread can proceed. -            if (flag == NULL || (!final_spin && flag->done_check())) { -                KA_TRACE(15, ("__kmp_execute_tasks_template(exit #5): T#%d spin condition satisfied\n", -                              gtid) ); -                return TRUE; -            } -            if (thread->th.th_task_team == NULL) break; -            KMP_YIELD( __kmp_library == library_throughput );   // Yield before executing next task -            // If the execution of the stolen task resulted in more tasks being -            // placed on our run queue, then restart the whole process. -            if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) { -                KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n", -                              gtid) ); -                goto start; -            } +        // If this thread's task team is NULL, master has recognized that there are no more tasks; bail out +        if (thread->th.th_task_team == NULL) { +            KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d no more tasks\n", gtid) ); +            return FALSE;          } -        // The victims's work queue is empty.  If we are in the final spin loop -        // of the barrier, check and see if the termination condition is satisfied. -        // Going on and finding a new victim to steal from is expensive, as it -        // involves a lot of cache misses, so we definitely want to re-check the -        // termination condition before doing that.  #if OMP_41_ENABLED -        // The work queue may be empty but there might be proxy tasks still executing -        if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0) -#else -        if (final_spin) +        // We could be getting tasks from target constructs; if this is the only thread, keep trying to execute +        // tasks from own queue +        if (nthreads == 1) +            use_own_tasks = 1; +        else  #endif          { -            // First, decrement the #unfinished threads, if that has not already -            // been done.  This decrement might be to the spin location, and -            // result in the termination condition being satisfied. -            if (! *thread_finished) { -                kmp_uint32 count; - -                count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1; -                KA_TRACE(20, ("__kmp_execute_tasks_template(dec #3): T#%d dec unfinished_threads to %d; " -                              "task_team=%p\n", -                              gtid, count, task_team) ); -                *thread_finished = TRUE; -            } - -            // If __kmp_tasking_mode != tskm_immediate_exec, -            // then it is now unsafe to reference thread->th.th_team !!! -            // Decrementing task_team->tt.tt_unfinished_threads can allow the master -            // thread to pass through the barrier, where it might reset each thread's -            // th.th_team field for the next parallel region. -            // If we can steal more work, we know that this has not happened yet. -            if (flag != NULL && flag->done_check()) { -                KA_TRACE(15, ("__kmp_execute_tasks_template(exit #6): T#%d spin condition satisfied\n", gtid) ); -                return TRUE; -            } +            KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d can't find work\n", gtid) ); +            return FALSE;          } -        if (thread->th.th_task_team == NULL) return FALSE;      } - -    KA_TRACE(15, ("__kmp_execute_tasks_template(exit #7): T#%d can't find work\n", gtid) ); -    return FALSE;  }  int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin, diff --git a/openmp/runtime/test/lit.cfg b/openmp/runtime/test/lit.cfg index 7f18da4b1b7..64e3b618d2b 100644 --- a/openmp/runtime/test/lit.cfg +++ b/openmp/runtime/test/lit.cfg @@ -26,6 +26,9 @@ def append_dynamic_library_path(path):      else:          config.environment[name] = path +for name,value in os.environ.items(): +    config.environment[name] = value +  # name: The name of this test suite.  config.name = 'libomp'  | 

