summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--openmp/runtime/src/kmp_tasking.c333
-rw-r--r--openmp/runtime/test/lit.cfg3
2 files changed, 108 insertions, 228 deletions
diff --git a/openmp/runtime/src/kmp_tasking.c b/openmp/runtime/src/kmp_tasking.c
index 361fce93bc5..888386a6548 100644
--- a/openmp/runtime/src/kmp_tasking.c
+++ b/openmp/runtime/src/kmp_tasking.c
@@ -1806,17 +1806,17 @@ static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gti
int *thread_finished
USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
{
- kmp_task_team_t * task_team;
+ kmp_task_team_t * task_team = thread->th.th_task_team;
kmp_thread_data_t * threads_data;
kmp_task_t * task;
+ kmp_info_t * other_thread;
kmp_taskdata_t * current_task = thread -> th.th_current_task;
volatile kmp_uint32 * unfinished_threads;
- kmp_int32 nthreads, last_stolen, k, tid;
+ kmp_int32 nthreads, victim=-2, use_own_tasks=1, new_victim=0, tid=thread->th.th_info.ds.ds_tid;
KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
- task_team = thread -> th.th_task_team;
if (task_team == NULL) return FALSE;
KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n",
@@ -1834,277 +1834,154 @@ static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gti
#endif
KMP_DEBUG_ASSERT( (int)(TCR_4(*unfinished_threads)) >= 0 );
- // Choose tasks from our own work queue.
- start:
- while (( task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained )) != NULL ) {
-#if USE_ITT_BUILD && USE_ITT_NOTIFY
- if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
- if ( itt_sync_obj == NULL ) {
- // we are at fork barrier where we could not get the object reliably
- itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
+ while (1) { // Outer loop keeps trying to find tasks in case of single thread getting tasks from target constructs
+ while (1) { // Inner loop to find a task and execute it
+ task = NULL;
+ if (use_own_tasks) { // check on own queue first
+ task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained );
}
- __kmp_itt_task_starting( itt_sync_obj );
- }
-#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
- __kmp_invoke_task( gtid, task, current_task );
-#if USE_ITT_BUILD
- if ( itt_sync_obj != NULL )
- __kmp_itt_task_finished( itt_sync_obj );
-#endif /* USE_ITT_BUILD */
-
- // If this thread is only partway through the barrier and the condition
- // is met, then return now, so that the barrier gather/release pattern can proceed.
- // If this thread is in the last spin loop in the barrier, waiting to be
- // released, we know that the termination condition will not be satisified,
- // so don't waste any cycles checking it.
- if (flag == NULL || (!final_spin && flag->done_check())) {
- KA_TRACE(15, ("__kmp_execute_tasks_template(exit #1): T#%d spin condition satisfied\n", gtid) );
- return TRUE;
- }
- if (thread->th.th_task_team == NULL) break;
- KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
- }
-
- // This thread's work queue is empty. If we are in the final spin loop
- // of the barrier, check and see if the termination condition is satisfied.
-#if OMP_41_ENABLED
- // The work queue may be empty but there might be proxy tasks still executing
- if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
-#else
- if (final_spin)
-#endif
- {
- // First, decrement the #unfinished threads, if that has not already
- // been done. This decrement might be to the spin location, and
- // result in the termination condition being satisfied.
- if (! *thread_finished) {
- kmp_uint32 count;
-
- count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
- KA_TRACE(20, ("__kmp_execute_tasks_template(dec #1): T#%d dec unfinished_threads to %d task_team=%p\n",
- gtid, count, task_team) );
- *thread_finished = TRUE;
- }
-
- // It is now unsafe to reference thread->th.th_team !!!
- // Decrementing task_team->tt.tt_unfinished_threads can allow the master
- // thread to pass through the barrier, where it might reset each thread's
- // th.th_team field for the next parallel region.
- // If we can steal more work, we know that this has not happened yet.
- if (flag != NULL && flag->done_check()) {
- KA_TRACE(15, ("__kmp_execute_tasks_template(exit #2): T#%d spin condition satisfied\n", gtid) );
- return TRUE;
- }
- }
-
- if (thread->th.th_task_team == NULL) return FALSE;
-#if OMP_41_ENABLED
- // check if there are other threads to steal from, otherwise go back
- if ( nthreads == 1 )
- goto start;
-#endif
+ if ((task == NULL) && (nthreads > 1)) { // Steal a task
+ int asleep = 1;
+ use_own_tasks = 0;
+ // Try to steal from the last place I stole from successfully.
+ if (victim == -2) { // haven't stolen anything yet
+ victim = threads_data[tid].td.td_deque_last_stolen;
+ if (victim != -1) // if we have a last stolen from victim, get the thread
+ other_thread = threads_data[victim].td.td_thr;
+ }
+ if (victim != -1) { // found last victim
+ asleep = 0;
+ }
+ else if (!new_victim) { // no recent steals and we haven't already used a new victim; select a random thread
+ do { // Find a different thread to steal work from.
+ // Pick a random thread. Initial plan was to cycle through all the threads, and only return if
+ // we tried to steal from every thread, and failed. Arch says that's not such a great idea.
+ victim = __kmp_get_random(thread) % (nthreads - 1);
+ if (victim >= tid) {
+ ++victim; // Adjusts random distribution to exclude self
+ }
+ // Found a potential victim
+ other_thread = threads_data[victim].td.td_thr;
+ // There is a slight chance that __kmp_enable_tasking() did not wake up all threads
+ // waiting at the barrier. If victim is sleeping, then wake it up. Since we were going to
+ // pay the cache miss penalty for referencing another thread's kmp_info_t struct anyway,
+ // the check shouldn't cost too much performance at this point. In extra barrier mode, tasks
+ // do not sleep at the separate tasking barrier, so this isn't a problem.
+ asleep = 0;
+ if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
+ (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
+ (TCR_PTR(other_thread->th.th_sleep_loc) != NULL)) {
+ asleep = 1;
+ __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc);
+ // A sleeping thread should not have any tasks on it's queue. There is a slight
+ // possibility that it resumes, steals a task from another thread, which spawns more
+ // tasks, all in the time that it takes this thread to check => don't write an assertion
+ // that the victim's queue is empty. Try stealing from a different thread.
+ }
+ } while (asleep);
+ }
- // Try to steal from the last place I stole from successfully.
- tid = thread -> th.th_info.ds.ds_tid;//__kmp_tid_from_gtid( gtid );
- last_stolen = threads_data[ tid ].td.td_deque_last_stolen;
+ if (!asleep) {
+ // We have a victim to try to steal from
+ task = __kmp_steal_task(other_thread, gtid, task_team, unfinished_threads, thread_finished, is_constrained);
+ }
+ if (task != NULL) { // set last stolen to victim
+ if (threads_data[tid].td.td_deque_last_stolen != victim) {
+ threads_data[tid].td.td_deque_last_stolen = victim;
+ // The pre-refactored code did not try more than 1 successful new vicitm,
+ // unless the last one generated more local tasks; new_victim keeps track of this
+ new_victim = 1;
+ }
+ }
+ else { // No tasks found; unset last_stolen
+ KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1);
+ victim = -2; // no successful victim found
+ }
+ }
- if (last_stolen != -1) {
- kmp_info_t *other_thread = threads_data[last_stolen].td.td_thr;
+ if (task == NULL) // break out of tasking loop
+ break;
- while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
- thread_finished, is_constrained )) != NULL)
- {
+ // Found a task; execute it
#if USE_ITT_BUILD && USE_ITT_NOTIFY
if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
- if ( itt_sync_obj == NULL ) {
- // we are at fork barrier where we could not get the object reliably
- itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
+ if ( itt_sync_obj == NULL ) { // we are at fork barrier where we could not get the object reliably
+ itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
}
__kmp_itt_task_starting( itt_sync_obj );
}
#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
__kmp_invoke_task( gtid, task, current_task );
#if USE_ITT_BUILD
- if ( itt_sync_obj != NULL )
- __kmp_itt_task_finished( itt_sync_obj );
+ if ( itt_sync_obj != NULL ) __kmp_itt_task_finished( itt_sync_obj );
#endif /* USE_ITT_BUILD */
-
- // Check to see if this thread can proceed.
+ // If this thread is only partway through the barrier and the condition is met, then return now,
+ // so that the barrier gather/release pattern can proceed. If this thread is in the last spin loop
+ // in the barrier, waiting to be released, we know that the termination condition will not be
+ // satisified, so don't waste any cycles checking it.
if (flag == NULL || (!final_spin && flag->done_check())) {
- KA_TRACE(15, ("__kmp_execute_tasks_template(exit #3): T#%d spin condition satisfied\n",
- gtid) );
+ KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n", gtid) );
return TRUE;
}
-
- if (thread->th.th_task_team == NULL) break;
+ if (thread->th.th_task_team == NULL) {
+ break;
+ }
KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
- // If the execution of the stolen task resulted in more tasks being
- // placed on our run queue, then restart the whole process.
- if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
- KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
- gtid) );
- goto start;
+ // If execution of a stolen task results in more tasks being placed on our run queue, reset use_own_tasks
+ if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) {
+ KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n", gtid));
+ use_own_tasks = 1;
+ new_victim = 0;
}
}
- // Don't give priority to stealing from this thread anymore.
- threads_data[ tid ].td.td_deque_last_stolen = -1;
-
- // The victims's work queue is empty. If we are in the final spin loop
- // of the barrier, check and see if the termination condition is satisfied.
+ // The task source has been exhausted. If in final spin loop of barrier, check if termination condition is satisfied.
#if OMP_41_ENABLED
// The work queue may be empty but there might be proxy tasks still executing
- if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
+ if (final_spin && TCR_4(current_task->td_incomplete_child_tasks) == 0)
#else
if (final_spin)
#endif
{
- // First, decrement the #unfinished threads, if that has not already
- // been done. This decrement might be to the spin location, and
- // result in the termination condition being satisfied.
+ // First, decrement the #unfinished threads, if that has not already been done. This decrement
+ // might be to the spin location, and result in the termination condition being satisfied.
if (! *thread_finished) {
kmp_uint32 count;
count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
- KA_TRACE(20, ("__kmp_execute_tasks_template(dec #2): T#%d dec unfinished_threads to %d "
- "task_team=%p\n", gtid, count, task_team) );
+ KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d dec unfinished_threads to %d task_team=%p\n",
+ gtid, count, task_team) );
*thread_finished = TRUE;
}
- // If __kmp_tasking_mode != tskm_immediate_exec
- // then it is now unsafe to reference thread->th.th_team !!!
- // Decrementing task_team->tt.tt_unfinished_threads can allow the master
- // thread to pass through the barrier, where it might reset each thread's
- // th.th_team field for the next parallel region.
+ // It is now unsafe to reference thread->th.th_team !!!
+ // Decrementing task_team->tt.tt_unfinished_threads can allow the master thread to pass through
+ // the barrier, where it might reset each thread's th.th_team field for the next parallel region.
// If we can steal more work, we know that this has not happened yet.
if (flag != NULL && flag->done_check()) {
- KA_TRACE(15, ("__kmp_execute_tasks_template(exit #4): T#%d spin condition satisfied\n",
- gtid) );
+ KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n", gtid) );
return TRUE;
}
}
- if (thread->th.th_task_team == NULL) return FALSE;
- }
-
- // Find a different thread to steal work from. Pick a random thread.
- // My initial plan was to cycle through all the threads, and only return
- // if we tried to steal from every thread, and failed. Arch says that's
- // not such a great idea.
- // GEH - need yield code in this loop for throughput library mode?
- new_victim:
- k = __kmp_get_random( thread ) % (nthreads - 1);
- if ( k >= thread -> th.th_info.ds.ds_tid ) {
- ++k; // Adjusts random distribution to exclude self
- }
- {
- kmp_info_t *other_thread = threads_data[k].td.td_thr;
- int first;
-
- // There is a slight chance that __kmp_enable_tasking() did not wake up
- // all threads waiting at the barrier. If this thread is sleeping, then
- // wake it up. Since we were going to pay the cache miss penalty
- // for referencing another thread's kmp_info_t struct anyway, the check
- // shouldn't cost too much performance at this point.
- // In extra barrier mode, tasks do not sleep at the separate tasking
- // barrier, so this isn't a problem.
- if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
- (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
- (TCR_PTR(other_thread->th.th_sleep_loc) != NULL))
- {
- __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc);
- // A sleeping thread should not have any tasks on it's queue.
- // There is a slight possibility that it resumes, steals a task from
- // another thread, which spawns more tasks, all in the time that it takes
- // this thread to check => don't write an assertion that the victim's
- // queue is empty. Try stealing from a different thread.
- goto new_victim;
- }
-
- // Now try to steal work from the selected thread
- first = TRUE;
- while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
- thread_finished, is_constrained )) != NULL)
- {
-#if USE_ITT_BUILD && USE_ITT_NOTIFY
- if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
- if ( itt_sync_obj == NULL ) {
- // we are at fork barrier where we could not get the object reliably
- itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
- }
- __kmp_itt_task_starting( itt_sync_obj );
- }
-#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
- __kmp_invoke_task( gtid, task, current_task );
-#if USE_ITT_BUILD
- if ( itt_sync_obj != NULL )
- __kmp_itt_task_finished( itt_sync_obj );
-#endif /* USE_ITT_BUILD */
-
- // Try stealing from this victim again, in the future.
- if (first) {
- threads_data[ tid ].td.td_deque_last_stolen = k;
- first = FALSE;
- }
-
- // Check to see if this thread can proceed.
- if (flag == NULL || (!final_spin && flag->done_check())) {
- KA_TRACE(15, ("__kmp_execute_tasks_template(exit #5): T#%d spin condition satisfied\n",
- gtid) );
- return TRUE;
- }
- if (thread->th.th_task_team == NULL) break;
- KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
- // If the execution of the stolen task resulted in more tasks being
- // placed on our run queue, then restart the whole process.
- if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
- KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
- gtid) );
- goto start;
- }
+ // If this thread's task team is NULL, master has recognized that there are no more tasks; bail out
+ if (thread->th.th_task_team == NULL) {
+ KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d no more tasks\n", gtid) );
+ return FALSE;
}
- // The victims's work queue is empty. If we are in the final spin loop
- // of the barrier, check and see if the termination condition is satisfied.
- // Going on and finding a new victim to steal from is expensive, as it
- // involves a lot of cache misses, so we definitely want to re-check the
- // termination condition before doing that.
#if OMP_41_ENABLED
- // The work queue may be empty but there might be proxy tasks still executing
- if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
-#else
- if (final_spin)
+ // We could be getting tasks from target constructs; if this is the only thread, keep trying to execute
+ // tasks from own queue
+ if (nthreads == 1)
+ use_own_tasks = 1;
+ else
#endif
{
- // First, decrement the #unfinished threads, if that has not already
- // been done. This decrement might be to the spin location, and
- // result in the termination condition being satisfied.
- if (! *thread_finished) {
- kmp_uint32 count;
-
- count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
- KA_TRACE(20, ("__kmp_execute_tasks_template(dec #3): T#%d dec unfinished_threads to %d; "
- "task_team=%p\n",
- gtid, count, task_team) );
- *thread_finished = TRUE;
- }
-
- // If __kmp_tasking_mode != tskm_immediate_exec,
- // then it is now unsafe to reference thread->th.th_team !!!
- // Decrementing task_team->tt.tt_unfinished_threads can allow the master
- // thread to pass through the barrier, where it might reset each thread's
- // th.th_team field for the next parallel region.
- // If we can steal more work, we know that this has not happened yet.
- if (flag != NULL && flag->done_check()) {
- KA_TRACE(15, ("__kmp_execute_tasks_template(exit #6): T#%d spin condition satisfied\n", gtid) );
- return TRUE;
- }
+ KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d can't find work\n", gtid) );
+ return FALSE;
}
- if (thread->th.th_task_team == NULL) return FALSE;
}
-
- KA_TRACE(15, ("__kmp_execute_tasks_template(exit #7): T#%d can't find work\n", gtid) );
- return FALSE;
}
int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
diff --git a/openmp/runtime/test/lit.cfg b/openmp/runtime/test/lit.cfg
index 7f18da4b1b7..64e3b618d2b 100644
--- a/openmp/runtime/test/lit.cfg
+++ b/openmp/runtime/test/lit.cfg
@@ -26,6 +26,9 @@ def append_dynamic_library_path(path):
else:
config.environment[name] = path
+for name,value in os.environ.items():
+ config.environment[name] = value
+
# name: The name of this test suite.
config.name = 'libomp'
OpenPOWER on IntegriCloud