diff options
author | Jonathan Peyton <jonathan.l.peyton@intel.com> | 2017-07-18 18:50:13 +0000 |
---|---|---|
committer | Jonathan Peyton <jonathan.l.peyton@intel.com> | 2017-07-18 18:50:13 +0000 |
commit | 93e17cfe6c2e8e20f99d45241804732a78737baf (patch) | |
tree | 674194c7c383f1f20131be6cfe0224849d67a03c /openmp/runtime/src | |
parent | d01e956d38b60508cef4851a6668838261e9b700 (diff) | |
download | bcm5719-llvm-93e17cfe6c2e8e20f99d45241804732a78737baf.tar.gz bcm5719-llvm-93e17cfe6c2e8e20f99d45241804732a78737baf.zip |
Add recursive task scheduling strategy to taskloop implementation
Summary:
Taskloop implementation is extended by using recursive task scheduling.
Envirable KMP_TASKLOOP_MIN_TASKS added as a manual threshold for the user
to switch from recursive to linear tasks scheduling.
Details:
* The calculations for the loop parameters are moved from __kmp_taskloop_linear
upper level
* Initial calculation is done in the __kmpc_taskloop, further range splitting
is done in the __kmp_taskloop_recur.
* Added threshold to switch from recursive to linear tasks scheduling;
* One half of split range is scheduled as an internal task which just moves
sub-range parameters to the stealing thread that continues recursive
scheduling (if number of tasks still enough), the other half is processed
recursively;
* Internal task duplication routine fixed to assign parent task, that was not
needed when all tasks were scheduled by same thread, but is needed now.
Patch by Andrey Churbanov
Differential Revision: https://reviews.llvm.org/D35273
llvm-svn: 308338
Diffstat (limited to 'openmp/runtime/src')
-rw-r--r-- | openmp/runtime/src/kmp.h | 6 | ||||
-rw-r--r-- | openmp/runtime/src/kmp_global.cpp | 1 | ||||
-rw-r--r-- | openmp/runtime/src/kmp_settings.cpp | 16 | ||||
-rw-r--r-- | openmp/runtime/src/kmp_tasking.cpp | 380 |
4 files changed, 308 insertions, 95 deletions
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index 4bb9da92ec1..e7ccc3031fb 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -2050,8 +2050,10 @@ extern kmp_int32 __kmp_default_device; // Set via OMP_DEFAULT_DEVICE if // specified, defaults to 0 otherwise #endif #if OMP_45_ENABLED -extern kmp_int32 __kmp_max_task_priority; // Set via OMP_MAX_TASK_PRIORITY if -// specified, defaults to 0 otherwise +// Set via OMP_MAX_TASK_PRIORITY if specified, defaults to 0 otherwise +extern kmp_int32 __kmp_max_task_priority; +// Set via KMP_TASKLOOP_MIN_TASKS if specified, defaults to 0 otherwise +extern kmp_uint64 __kmp_taskloop_min_tasks; #endif /* NOTE: kmp_taskdata_t and kmp_task_t structures allocated in single block with diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp index 93a7f65d506..55e0acb9065 100644 --- a/openmp/runtime/src/kmp_global.cpp +++ b/openmp/runtime/src/kmp_global.cpp @@ -301,6 +301,7 @@ kmp_int32 __kmp_default_device = 0; kmp_tasking_mode_t __kmp_tasking_mode = tskm_task_teams; #if OMP_45_ENABLED kmp_int32 __kmp_max_task_priority = 0; +kmp_uint64 __kmp_taskloop_min_tasks = 0; #endif /* This check ensures that the compiler is passing the correct data type for the diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp index 2360ed86186..00d9ba51eb4 100644 --- a/openmp/runtime/src/kmp_settings.cpp +++ b/openmp/runtime/src/kmp_settings.cpp @@ -1167,6 +1167,20 @@ static void __kmp_stg_print_max_task_priority(kmp_str_buf_t *buffer, char const *name, void *data) { __kmp_stg_print_int(buffer, name, __kmp_max_task_priority); } // __kmp_stg_print_max_task_priority + +// KMP_TASKLOOP_MIN_TASKS +// taskloop threashold to switch from recursive to linear tasks creation +static void __kmp_stg_parse_taskloop_min_tasks(char const *name, + char const *value, void *data) { + int tmp; + __kmp_stg_parse_int(name, value, 0, INT_MAX, &tmp); + __kmp_taskloop_min_tasks = tmp; +} // __kmp_stg_parse_taskloop_min_tasks + +static void __kmp_stg_print_taskloop_min_tasks(kmp_str_buf_t *buffer, + char const *name, void *data) { + __kmp_stg_print_int(buffer, name, __kmp_taskloop_min_tasks); +} // __kmp_stg_print_taskloop_min_tasks #endif // OMP_45_ENABLED // ----------------------------------------------------------------------------- @@ -4370,6 +4384,8 @@ static kmp_setting_t __kmp_stg_table[] = { #if OMP_45_ENABLED {"OMP_MAX_TASK_PRIORITY", __kmp_stg_parse_max_task_priority, __kmp_stg_print_max_task_priority, NULL, 0, 0}, + {"KMP_TASKLOOP_MIN_TASKS", __kmp_stg_parse_taskloop_min_tasks, + __kmp_stg_print_taskloop_min_tasks, NULL, 0, 0}, #endif {"OMP_THREAD_LIMIT", __kmp_stg_parse_all_threads, __kmp_stg_print_all_threads, NULL, 0, 0}, diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index 8f1eb02499c..27207b35c37 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -3237,6 +3237,7 @@ kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src) { 0); } taskdata->td_alloc_thread = thread; + taskdata->td_parent = parent_task; taskdata->td_taskgroup = parent_task ->td_taskgroup; // task inherits the taskgroup from the parent task @@ -3263,32 +3264,37 @@ kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src) { return task; } -// Routine optionally generated by th ecompiler for setting the lastprivate flag +// Routine optionally generated by the compiler for setting the lastprivate flag // and calling needed constructors for private/firstprivate objects // (used to form taskloop tasks from pattern task) +// Parameters: dest task, src task, lastprivate flag. typedef void (*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32); // __kmp_taskloop_linear: Start tasks of the taskloop linearly // // loc Source location information // gtid Global thread ID -// task Task with whole loop iteration range -// lb Pointer to loop lower bound -// ub Pointer to loop upper bound +// task Pattern task, exposes the loop iteration range +// lb Pointer to loop lower bound in task structure +// ub Pointer to loop upper bound in task structure // st Loop stride -// sched Schedule specified 0/1/2 for none/grainsize/num_tasks -// grainsize Schedule value if specified +// ub_glob Global upper bound (used for lastprivate check) +// num_tasks Number of tasks to execute +// grainsize Number of loop iterations per task +// extras Number of chunks with grainsize+1 iterations +// tc Iterations count // task_dup Tasks duplication routine void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, - int sched, kmp_uint64 grainsize, void *task_dup) { + kmp_uint64 ub_glob, kmp_uint64 num_tasks, + kmp_uint64 grainsize, kmp_uint64 extras, + kmp_uint64 tc, void *task_dup) { KMP_COUNT_BLOCK(OMP_TASKLOOP); KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling); p_task_dup_t ptask_dup = (p_task_dup_t)task_dup; - kmp_uint64 tc; kmp_uint64 lower = *lb; // compiler provides global bounds here kmp_uint64 upper = *ub; - kmp_uint64 i, num_tasks = 0, extras = 0; + kmp_uint64 i; kmp_info_t *thread = __kmp_threads[gtid]; kmp_taskdata_t *current_task = thread->th.th_current_task; kmp_task_t *next_task; @@ -3298,6 +3304,254 @@ void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task, size_t upper_offset = (char *)ub - (char *)task; // remember offset of ub in the task structure + KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras); + KMP_DEBUG_ASSERT(num_tasks > extras); + KMP_DEBUG_ASSERT(num_tasks > 0); + KA_TRACE(20, ("__kmp_taskloop_linear: T#%d: %lld tasks, grainsize %lld, " + "extras %lld, i=%lld,%lld(%d)%lld, dup %p\n", gtid, num_tasks, + grainsize, extras, lower, upper, ub_glob, st, task_dup)); + + // Launch num_tasks tasks, assign grainsize iterations each task + for (i = 0; i < num_tasks; ++i) { + kmp_uint64 chunk_minus_1; + if (extras == 0) { + chunk_minus_1 = grainsize - 1; + } else { + chunk_minus_1 = grainsize; + --extras; // first extras iterations get bigger chunk (grainsize+1) + } + upper = lower + st * chunk_minus_1; + if (i == num_tasks - 1) { + // schedule the last task, set lastprivate flag if needed + if (st == 1) { // most common case + KMP_DEBUG_ASSERT(upper == *ub); + if (upper == ub_glob) + lastpriv = 1; + } else if (st > 0) { // positive loop stride + KMP_DEBUG_ASSERT((kmp_uint64)st > *ub - upper); + if ((kmp_uint64)st > ub_glob - upper) + lastpriv = 1; + } else { // negative loop stride + KMP_DEBUG_ASSERT(upper + st < *ub); + if (upper - ub_glob < (kmp_uint64)(-st)) + lastpriv = 1; + } + } + next_task = __kmp_task_dup_alloc(thread, task); // allocate new task + // adjust task-specific bounds + *(kmp_uint64 *)((char *)next_task + lower_offset) = lower; + *(kmp_uint64 *)((char *)next_task + upper_offset) = upper; + if (ptask_dup != NULL) // set lastprivate flag, construct fistprivates, etc. + ptask_dup(next_task, task, lastpriv); + KA_TRACE(40, ("__kmp_taskloop_linear: T#%d; task %p: lower %lld, " + "upper %lld (offsets %p %p)\n", + gtid, next_task, lower, upper, lower_offset, upper_offset)); + __kmp_omp_task(gtid, next_task, true); // schedule new task + lower = upper + st; // adjust lower bound for the next iteration + } + // free the pattern task and exit + __kmp_task_start(gtid, task, current_task); // make internal bookkeeping + // do not execute the pattern task, just do internal bookkeeping + __kmp_task_finish(gtid, task, current_task); +} + +// Structure to keep taskloop parameters for auxiliary task +// kept in the shareds of the task structure. +typedef struct __taskloop_params { + kmp_task_t *task; + kmp_uint64 *lb; + kmp_uint64 *ub; + void *task_dup; + kmp_int64 st; + kmp_uint64 ub_glob; + kmp_uint64 num_tasks; + kmp_uint64 grainsize; + kmp_uint64 extras; + kmp_uint64 tc; + kmp_uint64 num_t_min; +} __taskloop_params_t; + +void __kmp_taskloop_recur(ident_t *, int, kmp_task_t *, kmp_uint64 *, + kmp_uint64 *, kmp_int64, kmp_uint64, kmp_uint64, + kmp_uint64, kmp_uint64, kmp_uint64, kmp_uint64, + void *); + +// Execute part of the the taskloop submitted as a task. +int __kmp_taskloop_task(int gtid, void *ptask) { + __taskloop_params_t *p = (__taskloop_params_t*)((kmp_task_t*)ptask)->shareds; + kmp_task_t *task = p->task; + kmp_uint64 *lb = p->lb; + kmp_uint64 *ub = p->ub; + void *task_dup = p->task_dup; +// p_task_dup_t ptask_dup = (p_task_dup_t)task_dup; + kmp_int64 st = p->st; + kmp_uint64 ub_glob = p->ub_glob; + kmp_uint64 num_tasks = p->num_tasks; + kmp_uint64 grainsize = p->grainsize; + kmp_uint64 extras = p->extras; + kmp_uint64 tc = p->tc; + kmp_uint64 num_t_min = p->num_t_min; +#if KMP_DEBUG + kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); + KMP_DEBUG_ASSERT(task != NULL); + KA_TRACE(20, ("__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize" + " %lld, extras %lld, i=%lld,%lld(%d), dup %p\n", gtid, taskdata, + num_tasks, grainsize, extras, *lb, *ub, st, task_dup)); +#endif + KMP_DEBUG_ASSERT(num_tasks*2+1 > num_t_min); + if (num_tasks > num_t_min) + __kmp_taskloop_recur(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks, + grainsize, extras, tc, num_t_min, task_dup); + else + __kmp_taskloop_linear(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks, + grainsize, extras, tc, task_dup); + + KA_TRACE(40, ("__kmp_taskloop_task(exit): T#%d\n", gtid)); + return 0; +} + +// Schedule part of the the taskloop as a task, +// execute the rest of the the taskloop. +// +// loc Source location information +// gtid Global thread ID +// task Pattern task, exposes the loop iteration range +// lb Pointer to loop lower bound in task structure +// ub Pointer to loop upper bound in task structure +// st Loop stride +// ub_glob Global upper bound (used for lastprivate check) +// num_tasks Number of tasks to execute +// grainsize Number of loop iterations per task +// extras Number of chunks with grainsize+1 iterations +// tc Iterations count +// num_t_min Threashold to launch tasks recursively +// task_dup Tasks duplication routine +void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task, + kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, + kmp_uint64 ub_glob, kmp_uint64 num_tasks, + kmp_uint64 grainsize, kmp_uint64 extras, + kmp_uint64 tc, kmp_uint64 num_t_min, void *task_dup) { +#if KMP_DEBUG + kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); + KMP_DEBUG_ASSERT(task != NULL); + KMP_DEBUG_ASSERT(num_tasks > num_t_min); + KA_TRACE(20, ("__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize" + " %lld, extras %lld, i=%lld,%lld(%d), dup %p\n", gtid, taskdata, + num_tasks, grainsize, extras, *lb, *ub, st, task_dup)); +#endif + p_task_dup_t ptask_dup = (p_task_dup_t)task_dup; + kmp_uint64 lower = *lb; + kmp_uint64 upper = *ub; + kmp_info_t *thread = __kmp_threads[gtid]; +// kmp_taskdata_t *current_task = thread->th.th_current_task; + kmp_task_t *next_task; + kmp_int32 lastpriv = 0; + size_t lower_offset = + (char *)lb - (char *)task; // remember offset of lb in the task structure + size_t upper_offset = + (char *)ub - (char *)task; // remember offset of ub in the task structure + + KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras); + KMP_DEBUG_ASSERT(num_tasks > extras); + KMP_DEBUG_ASSERT(num_tasks > 0); + + // split the loop in two halves + kmp_uint64 lb1, ub0, tc0, tc1, ext0, ext1; + kmp_uint64 gr_size0 = grainsize; + kmp_uint64 n_tsk0 = num_tasks >> 1; // num_tasks/2 to execute + kmp_uint64 n_tsk1 = num_tasks - n_tsk0; // to schedule as a task + if (n_tsk0 <= extras) { + gr_size0++; // integrate extras into grainsize + ext0 = 0; // no extra iters in 1st half + ext1 = extras - n_tsk0; // remaining extras + tc0 = gr_size0 * n_tsk0; + tc1 = tc - tc0; + } else { // n_tsk0 > extras + ext1 = 0; // no extra iters in 2nd half + ext0 = extras; + tc1 = grainsize * n_tsk1; + tc0 = tc - tc1; + } + ub0 = lower + st * (tc0 - 1); + lb1 = ub0 + st; + + // create pattern task for 2nd half of the loop + next_task = __kmp_task_dup_alloc(thread, task); // duplicate the task + // adjust lower bound (upper bound is not changed) for the 2nd half + *(kmp_uint64 *)((char *)next_task + lower_offset) = lb1; + if (ptask_dup != NULL) // construct fistprivates, etc. + ptask_dup(next_task, task, 0); + *ub = ub0; // adjust upper bound for the 1st half + + // create auxiliary task for 2nd half of the loop + kmp_task_t *new_task = + __kmpc_omp_task_alloc(loc, gtid, 1, 3 * sizeof(void*), + sizeof(__taskloop_params_t), &__kmp_taskloop_task); + __taskloop_params_t * p = (__taskloop_params_t *)new_task->shareds; + p->task = next_task; + p->lb = (kmp_uint64 *)((char *)next_task + lower_offset); + p->ub = (kmp_uint64 *)((char *)next_task + upper_offset); + p->task_dup = task_dup; + p->st = st; + p->ub_glob = ub_glob; + p->num_tasks = n_tsk1; + p->grainsize = grainsize; + p->extras = ext1; + p->tc = tc1; + p->num_t_min = num_t_min; + __kmp_omp_task(gtid, new_task, true); // schedule new task + + // execute the 1st half of current subrange + if (n_tsk0 > num_t_min) + __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0, + gr_size0, ext0, tc0, num_t_min, task_dup); + else + __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0, + gr_size0, ext0, tc0, task_dup); + + KA_TRACE(40, ("__kmpc_taskloop_recur(exit): T#%d\n", gtid)); +} + +/*! +@ingroup TASKING +@param loc Source location information +@param gtid Global thread ID +@param task Task structure +@param if_val Value of the if clause +@param lb Pointer to loop lower bound in task structure +@param ub Pointer to loop upper bound in task structure +@param st Loop stride +@param nogroup Flag, 1 if nogroup clause specified, 0 otherwise +@param sched Schedule specified 0/1/2 for none/grainsize/num_tasks +@param grainsize Schedule value if specified +@param task_dup Tasks duplication routine + +Execute the taskloop construct. +*/ +void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, + kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, + int sched, kmp_uint64 grainsize, void *task_dup) { + kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); + KMP_DEBUG_ASSERT(task != NULL); + + KA_TRACE(20, ("__kmpc_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, " + "grain %llu(%d), dup %p\n", gtid, taskdata, *lb, *ub, st, + grainsize, sched, task_dup)); + + if (nogroup == 0) + __kmpc_taskgroup(loc, gtid); + + // ========================================================================= + // calculate loop parameters + kmp_uint64 tc; + kmp_uint64 lower = *lb; // compiler provides global bounds here + kmp_uint64 upper = *ub; + kmp_uint64 ub_glob = upper; // global upper used to calc lastprivate flag + kmp_uint64 num_tasks = 0, extras = 0; + kmp_uint64 num_tasks_min = __kmp_taskloop_min_tasks; + kmp_info_t *thread = __kmp_threads[gtid]; + kmp_taskdata_t *current_task = thread->th.th_current_task; + // compute trip count if (st == 1) { // most common case tc = upper - lower + 1; @@ -3314,6 +3568,10 @@ void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task, __kmp_task_finish(gtid, task, current_task); return; } + if (num_tasks_min == 0) + // TODO: can we choose better default heuristic? + num_tasks_min = KMP_MIN(thread->th.th_team_nproc * 10, + INITIAL_TASK_DEQUE_SIZE); // compute num_tasks/grainsize based on the input provided switch (sched) { @@ -3338,9 +3596,8 @@ void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task, extras = 0; } else { num_tasks = tc / grainsize; - grainsize = - tc / - num_tasks; // adjust grainsize for balanced distribution of iterations + // adjust grainsize for balanced distribution of iterations + grainsize = tc / num_tasks; extras = tc % num_tasks; } break; @@ -3350,95 +3607,32 @@ void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task, KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras); KMP_DEBUG_ASSERT(num_tasks > extras); KMP_DEBUG_ASSERT(num_tasks > 0); - KA_TRACE(20, ("__kmpc_taskloop: T#%d will launch: num_tasks %lld, grainsize " - "%lld, extras %lld\n", - gtid, num_tasks, grainsize, extras)); - - // Main loop, launch num_tasks tasks, assign grainsize iterations each task - for (i = 0; i < num_tasks; ++i) { - kmp_uint64 chunk_minus_1; - if (extras == 0) { - chunk_minus_1 = grainsize - 1; - } else { - chunk_minus_1 = grainsize; - --extras; // first extras iterations get bigger chunk (grainsize+1) - } - upper = lower + st * chunk_minus_1; - if (i == num_tasks - 1) { - // schedule the last task, set lastprivate flag - lastpriv = 1; -#if KMP_DEBUG - if (st == 1) - KMP_DEBUG_ASSERT(upper == *ub); - else if (st > 0) - KMP_DEBUG_ASSERT(upper + st > *ub); - else - KMP_DEBUG_ASSERT(upper + st < *ub); -#endif - } - next_task = __kmp_task_dup_alloc(thread, task); // allocate new task - *(kmp_uint64 *)((char *)next_task + lower_offset) = - lower; // adjust task-specific bounds - *(kmp_uint64 *)((char *)next_task + upper_offset) = upper; - if (ptask_dup != NULL) - ptask_dup(next_task, task, - lastpriv); // set lastprivate flag, construct fistprivates, etc. - KA_TRACE(20, ("__kmpc_taskloop: T#%d schedule task %p: lower %lld, upper " - "%lld (offsets %p %p)\n", - gtid, next_task, lower, upper, lower_offset, upper_offset)); - __kmp_omp_task(gtid, next_task, true); // schedule new task - lower = upper + st; // adjust lower bound for the next iteration - } - // free the pattern task and exit - __kmp_task_start(gtid, task, current_task); - // do not execute the pattern task, just do bookkeeping - __kmp_task_finish(gtid, task, current_task); -} - -/*! -@ingroup TASKING -@param loc Source location information -@param gtid Global thread ID -@param task Task structure -@param if_val Value of the if clause -@param lb Pointer to loop lower bound -@param ub Pointer to loop upper bound -@param st Loop stride -@param nogroup Flag, 1 if nogroup clause specified, 0 otherwise -@param sched Schedule specified 0/1/2 for none/grainsize/num_tasks -@param grainsize Schedule value if specified -@param task_dup Tasks duplication routine - -Execute the taskloop construct. -*/ -void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, - kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, - int sched, kmp_uint64 grainsize, void *task_dup) { - kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); - KMP_DEBUG_ASSERT(task != NULL); - - KA_TRACE(10, ("__kmpc_taskloop(enter): T#%d, pattern task %p, lb %lld ub " - "%lld st %lld, grain %llu(%d)\n", - gtid, taskdata, *lb, *ub, st, grainsize, sched)); + // ========================================================================= // check if clause value first if (if_val == 0) { // if(0) specified, mark task as serial taskdata->td_flags.task_serial = 1; taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied - } - if (nogroup == 0) { - __kmpc_taskgroup(loc, gtid); - } - - if (1 /* AC: use some heuristic here to choose task scheduling method */) { - __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, sched, grainsize, - task_dup); + // always start serial tasks linearly + __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks, + grainsize, extras, tc, task_dup); + } else if (num_tasks > num_tasks_min) { + KA_TRACE(20, ("__kmpc_taskloop: T#%d, go recursive: tc %llu, #tasks %llu" + "(%lld), grain %llu, extras %llu\n", gtid, tc, num_tasks, + num_tasks_min, grainsize, extras)); + __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, num_tasks, + grainsize, extras, tc, num_tasks_min, task_dup); + } else { + KA_TRACE(20, ("__kmpc_taskloop: T#%d, go linear: tc %llu, #tasks %llu" + "(%lld), grain %llu, extras %llu\n", gtid, tc, num_tasks, + num_tasks_min, grainsize, extras)); + __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks, + grainsize, extras, tc, task_dup); } - if (nogroup == 0) { + if (nogroup == 0) __kmpc_end_taskgroup(loc, gtid); - } - KA_TRACE(10, ("__kmpc_taskloop(exit): T#%d\n", gtid)); + KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d\n", gtid)); } #endif |