summaryrefslogtreecommitdiffstats
path: root/openmp/runtime/src/kmp_tasking.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'openmp/runtime/src/kmp_tasking.cpp')
-rw-r--r--openmp/runtime/src/kmp_tasking.cpp156
1 files changed, 138 insertions, 18 deletions
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index 0eac1130d31..5e1f5529edc 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -3559,6 +3559,112 @@ kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src) {
// Parameters: dest task, src task, lastprivate flag.
typedef void (*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
+KMP_BUILD_ASSERT(sizeof(long) == 4 || sizeof(long) == 8);
+
+// class to encapsulate manipulating loop bounds in a taskloop task.
+// this abstracts away the Intel vs GOMP taskloop interface for setting/getting
+// the loop bound variables.
+class kmp_taskloop_bounds_t {
+ kmp_task_t *task;
+ const kmp_taskdata_t *taskdata;
+ size_t lower_offset;
+ size_t upper_offset;
+
+public:
+ kmp_taskloop_bounds_t(kmp_task_t *_task, kmp_uint64 *lb, kmp_uint64 *ub)
+ : task(_task), taskdata(KMP_TASK_TO_TASKDATA(task)),
+ lower_offset((char *)lb - (char *)task),
+ upper_offset((char *)ub - (char *)task) {
+ KMP_DEBUG_ASSERT((char *)lb > (char *)_task);
+ KMP_DEBUG_ASSERT((char *)ub > (char *)_task);
+ }
+ kmp_taskloop_bounds_t(kmp_task_t *_task, const kmp_taskloop_bounds_t &bounds)
+ : task(_task), taskdata(KMP_TASK_TO_TASKDATA(_task)),
+ lower_offset(bounds.lower_offset), upper_offset(bounds.upper_offset) {}
+ size_t get_lower_offset() const { return lower_offset; }
+ size_t get_upper_offset() const { return upper_offset; }
+ kmp_uint64 get_lb() const {
+ kmp_int64 retval;
+#if defined(KMP_GOMP_COMPAT)
+ // Intel task just returns the lower bound normally
+ if (!taskdata->td_flags.native) {
+ retval = *(kmp_int64 *)((char *)task + lower_offset);
+ } else {
+ // GOMP task has to take into account the sizeof(long)
+ if (taskdata->td_size_loop_bounds == 4) {
+ kmp_int32 *lb = RCAST(kmp_int32 *, task->shareds);
+ retval = (kmp_int64)*lb;
+ } else {
+ kmp_int64 *lb = RCAST(kmp_int64 *, task->shareds);
+ retval = (kmp_int64)*lb;
+ }
+ }
+#else
+ retval = *(kmp_int64 *)((char *)task + lower_offset);
+#endif // defined(KMP_GOMP_COMPAT)
+ return retval;
+ }
+ kmp_uint64 get_ub() const {
+ kmp_int64 retval;
+#if defined(KMP_GOMP_COMPAT)
+ // Intel task just returns the upper bound normally
+ if (!taskdata->td_flags.native) {
+ retval = *(kmp_int64 *)((char *)task + upper_offset);
+ } else {
+ // GOMP task has to take into account the sizeof(long)
+ if (taskdata->td_size_loop_bounds == 4) {
+ kmp_int32 *ub = RCAST(kmp_int32 *, task->shareds) + 1;
+ retval = (kmp_int64)*ub;
+ } else {
+ kmp_int64 *ub = RCAST(kmp_int64 *, task->shareds) + 1;
+ retval = (kmp_int64)*ub;
+ }
+ }
+#else
+ retval = *(kmp_int64 *)((char *)task + upper_offset);
+#endif // defined(KMP_GOMP_COMPAT)
+ return retval;
+ }
+ void set_lb(kmp_uint64 lb) {
+#if defined(KMP_GOMP_COMPAT)
+ // Intel task just sets the lower bound normally
+ if (!taskdata->td_flags.native) {
+ *(kmp_uint64 *)((char *)task + lower_offset) = lb;
+ } else {
+ // GOMP task has to take into account the sizeof(long)
+ if (taskdata->td_size_loop_bounds == 4) {
+ kmp_uint32 *lower = RCAST(kmp_uint32 *, task->shareds);
+ *lower = (kmp_uint32)lb;
+ } else {
+ kmp_uint64 *lower = RCAST(kmp_uint64 *, task->shareds);
+ *lower = (kmp_uint64)lb;
+ }
+ }
+#else
+ *(kmp_uint64 *)((char *)task + lower_offset) = lb;
+#endif // defined(KMP_GOMP_COMPAT)
+ }
+ void set_ub(kmp_uint64 ub) {
+#if defined(KMP_GOMP_COMPAT)
+ // Intel task just sets the upper bound normally
+ if (!taskdata->td_flags.native) {
+ *(kmp_uint64 *)((char *)task + upper_offset) = ub;
+ } else {
+ // GOMP task has to take into account the sizeof(long)
+ if (taskdata->td_size_loop_bounds == 4) {
+ kmp_uint32 *upper = RCAST(kmp_uint32 *, task->shareds) + 1;
+ *upper = (kmp_uint32)ub;
+ } else {
+ kmp_uint64 *upper = RCAST(kmp_uint64 *, task->shareds) + 1;
+ *upper = (kmp_uint64)ub;
+ }
+ }
+#else
+ *(kmp_uint64 *)((char *)task + upper_offset) = ub;
+#endif // defined(KMP_GOMP_COMPAT)
+ }
+};
+
// __kmp_taskloop_linear: Start tasks of the taskloop linearly
//
// loc Source location information
@@ -3581,17 +3687,15 @@ void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
KMP_COUNT_BLOCK(OMP_TASKLOOP);
KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling);
p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
- kmp_uint64 lower = *lb; // compiler provides global bounds here
- kmp_uint64 upper = *ub;
+ // compiler provides global bounds here
+ kmp_taskloop_bounds_t task_bounds(task, lb, ub);
+ kmp_uint64 lower = task_bounds.get_lb();
+ kmp_uint64 upper = task_bounds.get_ub();
kmp_uint64 i;
kmp_info_t *thread = __kmp_threads[gtid];
kmp_taskdata_t *current_task = thread->th.th_current_task;
kmp_task_t *next_task;
kmp_int32 lastpriv = 0;
- size_t lower_offset =
- (char *)lb - (char *)task; // remember offset of lb in the task structure
- size_t upper_offset =
- (char *)ub - (char *)task; // remember offset of ub in the task structure
KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
KMP_DEBUG_ASSERT(num_tasks > extras);
@@ -3628,14 +3732,25 @@ void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
}
}
next_task = __kmp_task_dup_alloc(thread, task); // allocate new task
+ kmp_taskdata_t *next_taskdata = KMP_TASK_TO_TASKDATA(next_task);
+ kmp_taskloop_bounds_t next_task_bounds =
+ kmp_taskloop_bounds_t(next_task, task_bounds);
+
// adjust task-specific bounds
- *(kmp_uint64 *)((char *)next_task + lower_offset) = lower;
- *(kmp_uint64 *)((char *)next_task + upper_offset) = upper;
+ next_task_bounds.set_lb(lower);
+ if (next_taskdata->td_flags.native) {
+ next_task_bounds.set_ub(upper + (st > 0 ? 1 : -1));
+ } else {
+ next_task_bounds.set_ub(upper);
+ }
if (ptask_dup != NULL) // set lastprivate flag, construct fistprivates, etc.
ptask_dup(next_task, task, lastpriv);
- KA_TRACE(40, ("__kmp_taskloop_linear: T#%d; task %p: lower %lld, "
- "upper %lld (offsets %p %p)\n",
- gtid, next_task, lower, upper, lower_offset, upper_offset));
+ KA_TRACE(40,
+ ("__kmp_taskloop_linear: T#%d; task #%llu: task %p: lower %lld, "
+ "upper %lld stride %lld, (offsets %p %p)\n",
+ gtid, i, next_task, lower, upper, st,
+ next_task_bounds.get_lower_offset(),
+ next_task_bounds.get_upper_offset()));
__kmp_omp_task(gtid, next_task, true); // schedule new task
lower = upper + st; // adjust lower bound for the next iteration
}
@@ -3827,10 +3942,6 @@ void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
KMP_DEBUG_ASSERT(task != NULL);
- KA_TRACE(20, ("__kmpc_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, "
- "grain %llu(%d), dup %p\n",
- gtid, taskdata, *lb, *ub, st, grainsize, sched, task_dup));
-
#if OMPT_SUPPORT && OMPT_OPTIONAL
ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
@@ -3850,15 +3961,21 @@ void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
// =========================================================================
// calculate loop parameters
+ kmp_taskloop_bounds_t task_bounds(task, lb, ub);
kmp_uint64 tc;
- kmp_uint64 lower = *lb; // compiler provides global bounds here
- kmp_uint64 upper = *ub;
+ // compiler provides global bounds here
+ kmp_uint64 lower = task_bounds.get_lb();
+ kmp_uint64 upper = task_bounds.get_ub();
kmp_uint64 ub_glob = upper; // global upper used to calc lastprivate flag
kmp_uint64 num_tasks = 0, extras = 0;
kmp_uint64 num_tasks_min = __kmp_taskloop_min_tasks;
kmp_info_t *thread = __kmp_threads[gtid];
kmp_taskdata_t *current_task = thread->th.th_current_task;
+ KA_TRACE(20, ("__kmpc_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, "
+ "grain %llu(%d), dup %p\n",
+ gtid, taskdata, lower, upper, st, grainsize, sched, task_dup));
+
// compute trip count
if (st == 1) { // most common case
tc = upper - lower + 1;
@@ -3917,6 +4034,7 @@ void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
// =========================================================================
// check if clause value first
+ // Also require GOMP_taskloop to reduce to linear (taskdata->td_flags.native)
if (if_val == 0) { // if(0) specified, mark task as serial
taskdata->td_flags.task_serial = 1;
taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied
@@ -3926,7 +4044,9 @@ void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
// always start serial tasks linearly
__kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
grainsize, extras, tc, task_dup);
- } else if (num_tasks > num_tasks_min) {
+ // !taskdata->td_flags.native => currently force linear spawning of tasks
+ // for GOMP_taskloop
+ } else if (num_tasks > num_tasks_min && !taskdata->td_flags.native) {
KA_TRACE(20, ("__kmpc_taskloop: T#%d, go recursive: tc %llu, #tasks %llu"
"(%lld), grain %llu, extras %llu\n",
gtid, tc, num_tasks, num_tasks_min, grainsize, extras));
OpenPOWER on IntegriCloud