diff options
Diffstat (limited to 'openmp/runtime/src/kmp_gsupport.cpp')
| -rw-r--r-- | openmp/runtime/src/kmp_gsupport.cpp | 392 |
1 files changed, 240 insertions, 152 deletions
diff --git a/openmp/runtime/src/kmp_gsupport.cpp b/openmp/runtime/src/kmp_gsupport.cpp index b7045cfc83e..66582591069 100644 --- a/openmp/runtime/src/kmp_gsupport.cpp +++ b/openmp/runtime/src/kmp_gsupport.cpp @@ -31,14 +31,20 @@ void xexpand(KMP_API_NAME_GOMP_BARRIER)(void) { int gtid = __kmp_entry_gtid(); MKLOC(loc, "GOMP_barrier"); KA_TRACE(20, ("GOMP_barrier: T#%d\n", gtid)); -#if OMPT_SUPPORT && OMPT_TRACE +#if OMPT_SUPPORT && OMPT_OPTIONAL ompt_frame_t *ompt_frame; - if (ompt_enabled) { - ompt_frame = __ompt_get_task_frame_internal(0); - ompt_frame->reenter_runtime_frame = __builtin_frame_address(1); + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(gtid); } #endif __kmpc_barrier(&loc, gtid); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + ompt_frame->reenter_runtime_frame = NULL; + } +#endif } // Mutual exclusion @@ -56,6 +62,9 @@ void xexpand(KMP_API_NAME_GOMP_CRITICAL_START)(void) { int gtid = __kmp_entry_gtid(); MKLOC(loc, "GOMP_critical_start"); KA_TRACE(20, ("GOMP_critical_start: T#%d\n", gtid)); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmpc_critical(&loc, gtid, __kmp_unnamed_critical_addr); } @@ -63,6 +72,9 @@ void xexpand(KMP_API_NAME_GOMP_CRITICAL_END)(void) { int gtid = __kmp_get_gtid(); MKLOC(loc, "GOMP_critical_end"); KA_TRACE(20, ("GOMP_critical_end: T#%d\n", gtid)); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmpc_end_critical(&loc, gtid, __kmp_unnamed_critical_addr); } @@ -111,7 +123,40 @@ int xexpand(KMP_API_NAME_GOMP_SINGLE_START)(void) { // 3rd parameter == FALSE prevents kmp_enter_single from pushing a // workshare when USE_CHECKS is defined. We need to avoid the push, // as there is no corresponding GOMP_single_end() call. - return __kmp_enter_single(gtid, &loc, FALSE); + kmp_int32 rc = __kmp_enter_single(gtid, &loc, FALSE); + +#if OMPT_SUPPORT && OMPT_OPTIONAL + kmp_info_t *this_thr = __kmp_threads[gtid]; + kmp_team_t *team = this_thr->th.th_team; + int tid = __kmp_tid_from_gtid(gtid); + + if (ompt_enabled.enabled) { + if (rc) { + if (ompt_enabled.ompt_callback_work) { + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_single_executor, ompt_scope_begin, + &(team->t.ompt_team_info.parallel_data), + &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), + 1, OMPT_GET_RETURN_ADDRESS(0)); + } + } else { + if (ompt_enabled.ompt_callback_work) { + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_single_other, ompt_scope_begin, + &(team->t.ompt_team_info.parallel_data), + &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), + 1, OMPT_GET_RETURN_ADDRESS(0)); + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_single_other, ompt_scope_end, + &(team->t.ompt_team_info.parallel_data), + &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), + 1, OMPT_GET_RETURN_ADDRESS(0)); + } + } + } +#endif + + return rc; } void *xexpand(KMP_API_NAME_GOMP_SINGLE_COPY_START)(void) { @@ -129,14 +174,33 @@ void *xexpand(KMP_API_NAME_GOMP_SINGLE_COPY_START)(void) { if (__kmp_enter_single(gtid, &loc, FALSE)) return NULL; - // Wait for the first thread to set the copyprivate data pointer, - // and for all other threads to reach this point. +// Wait for the first thread to set the copyprivate data pointer, +// and for all other threads to reach this point. + +#if OMPT_SUPPORT && OMPT_OPTIONAL + ompt_frame_t *ompt_frame; + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(gtid); + } +#endif __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); // Retrieve the value of the copyprivate data point, and wait for all // threads to do likewise, then return. retval = __kmp_team_from_gtid(gtid)->t.t_copypriv_data; +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + OMPT_STORE_RETURN_ADDRESS(gtid); + } +#endif __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + ompt_frame->reenter_runtime_frame = NULL; + } +#endif return retval; } @@ -149,14 +213,35 @@ void xexpand(KMP_API_NAME_GOMP_SINGLE_COPY_END)(void *data) { // continuing, so that the know that the copyprivate data pointer has been // propagated to all threads before trying to reuse the t_copypriv_data field. __kmp_team_from_gtid(gtid)->t.t_copypriv_data = data; +#if OMPT_SUPPORT && OMPT_OPTIONAL + ompt_frame_t *ompt_frame; + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(gtid); + } +#endif __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + OMPT_STORE_RETURN_ADDRESS(gtid); + } +#endif __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + ompt_frame->reenter_runtime_frame = NULL; + } +#endif } void xexpand(KMP_API_NAME_GOMP_ORDERED_START)(void) { int gtid = __kmp_entry_gtid(); MKLOC(loc, "GOMP_ordered_start"); KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid)); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmpc_ordered(&loc, gtid); } @@ -164,6 +249,9 @@ void xexpand(KMP_API_NAME_GOMP_ORDERED_END)(void) { int gtid = __kmp_get_gtid(); MKLOC(loc, "GOMP_ordered_end"); KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid)); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmpc_end_ordered(&loc, gtid); } @@ -197,26 +285,26 @@ static #if OMPT_SUPPORT kmp_info_t *thr; ompt_frame_t *ompt_frame; - ompt_state_t enclosing_state; + omp_state_t enclosing_state; - if (ompt_enabled) { + if (ompt_enabled.enabled) { // get pointer to thread data structure thr = __kmp_threads[*gtid]; // save enclosing task state; set current state for task enclosing_state = thr->th.ompt_thread_info.state; - thr->th.ompt_thread_info.state = ompt_state_work_parallel; + thr->th.ompt_thread_info.state = omp_state_work_parallel; // set task frame - ompt_frame = __ompt_get_task_frame_internal(0); - ompt_frame->exit_runtime_frame = __builtin_frame_address(0); + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + ompt_frame->exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(0); } #endif task(data); #if OMPT_SUPPORT - if (ompt_enabled) { + if (ompt_enabled.enabled) { // clear task frame ompt_frame->exit_runtime_frame = NULL; @@ -236,24 +324,29 @@ static enum sched_type schedule, long start, long end, long incr, long chunk_size) { - // Intialize the loop worksharing construct. +// Intialize the loop worksharing construct. + +#if OMPT_SUPPORT + if (ompt_enabled.enabled) + OMPT_STORE_RETURN_ADDRESS(*gtid); +#endif KMP_DISPATCH_INIT(loc, *gtid, schedule, start, end, incr, chunk_size, schedule != kmp_sch_static); #if OMPT_SUPPORT kmp_info_t *thr; ompt_frame_t *ompt_frame; - ompt_state_t enclosing_state; + omp_state_t enclosing_state; - if (ompt_enabled) { + if (ompt_enabled.enabled) { thr = __kmp_threads[*gtid]; // save enclosing task state; set current state for task enclosing_state = thr->th.ompt_thread_info.state; - thr->th.ompt_thread_info.state = ompt_state_work_parallel; + thr->th.ompt_thread_info.state = omp_state_work_parallel; // set task frame - ompt_frame = __ompt_get_task_frame_internal(0); - ompt_frame->exit_runtime_frame = __builtin_frame_address(0); + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + ompt_frame->exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(0); } #endif @@ -261,7 +354,7 @@ static task(data); #if OMPT_SUPPORT - if (ompt_enabled) { + if (ompt_enabled.enabled) { // clear task frame ompt_frame->exit_runtime_frame = NULL; @@ -285,11 +378,8 @@ static va_list ap; va_start(ap, argc); - rc = __kmp_fork_call(loc, gtid, fork_context_gnu, argc, -#if OMPT_SUPPORT - VOLATILE_CAST(void *) unwrapped_task, -#endif - wrapper, __kmp_invoke_task_func, + rc = __kmp_fork_call(loc, gtid, fork_context_gnu, argc, wrapper, + __kmp_invoke_task_func, #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX &ap #else @@ -304,18 +394,19 @@ static } #if OMPT_SUPPORT - if (ompt_enabled) { -#if OMPT_TRACE + int ompt_team_size; + if (ompt_enabled.enabled) { ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); - ompt_task_info_t *task_info = __ompt_get_taskinfo(0); + ompt_task_info_t *task_info = __ompt_get_task_info_object(0); // implicit task callback - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( - team_info->parallel_id, task_info->task_id); + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_team_size = __kmp_team_from_gtid(gtid)->t.t_nproc; + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_begin, &(team_info->parallel_data), + &(task_info->task_data), ompt_team_size, __kmp_tid_from_gtid(gtid)); } -#endif - thr->th.ompt_thread_info.state = ompt_state_work_parallel; + thr->th.ompt_thread_info.state = omp_state_work_parallel; } #endif } @@ -323,47 +414,9 @@ static static void __kmp_GOMP_serialized_parallel(ident_t *loc, kmp_int32 gtid, void (*task)(void *)) { #if OMPT_SUPPORT - ompt_parallel_id_t ompt_parallel_id; - if (ompt_enabled) { - ompt_task_info_t *task_info = __ompt_get_taskinfo(0); - - ompt_parallel_id = __ompt_parallel_id_new(gtid); - - // parallel region callback - if (ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) { - int team_size = 1; - ompt_callbacks.ompt_callback(ompt_event_parallel_begin)( - task_info->task_id, &task_info->frame, ompt_parallel_id, team_size, - (void *)task, OMPT_INVOKER(fork_context_gnu)); - } - } + OMPT_STORE_RETURN_ADDRESS(gtid); #endif - __kmp_serialized_parallel(loc, gtid); - -#if OMPT_SUPPORT - if (ompt_enabled) { - kmp_info_t *thr = __kmp_threads[gtid]; - - ompt_task_id_t my_ompt_task_id = __ompt_task_id_new(gtid); - - // set up lightweight task - ompt_lw_taskteam_t *lwt = - (ompt_lw_taskteam_t *)__kmp_allocate(sizeof(ompt_lw_taskteam_t)); - __ompt_lw_taskteam_init(lwt, thr, gtid, (void *)task, ompt_parallel_id); - lwt->ompt_task_info.task_id = my_ompt_task_id; - __ompt_lw_taskteam_link(lwt, thr); - -#if OMPT_TRACE - // implicit task callback - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( - ompt_parallel_id, my_ompt_task_id); - } - thr->th.ompt_thread_info.state = ompt_state_work_parallel; -#endif - } -#endif } void xexpand(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *), void *data, @@ -373,10 +426,11 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *), void *data, #if OMPT_SUPPORT ompt_frame_t *parent_frame, *frame; - if (ompt_enabled) { - parent_frame = __ompt_get_task_frame_internal(0); - parent_frame->reenter_runtime_frame = __builtin_frame_address(1); + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL); + parent_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); } + OMPT_STORE_RETURN_ADDRESS(gtid); #endif MKLOC(loc, "GOMP_parallel_start"); @@ -394,9 +448,9 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *), void *data, } #if OMPT_SUPPORT - if (ompt_enabled) { - frame = __ompt_get_task_frame_internal(0); - frame->exit_runtime_frame = __builtin_frame_address(1); + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &frame, NULL, NULL); + frame->exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); } #endif } @@ -404,44 +458,23 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *), void *data, void xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void) { int gtid = __kmp_get_gtid(); kmp_info_t *thr; + int ompt_team_size = __kmp_team_from_gtid(gtid)->t.t_nproc; thr = __kmp_threads[gtid]; MKLOC(loc, "GOMP_parallel_end"); KA_TRACE(20, ("GOMP_parallel_end: T#%d\n", gtid)); -#if OMPT_SUPPORT - ompt_parallel_id_t parallel_id; - ompt_task_id_t serialized_task_id; - ompt_frame_t *ompt_frame = NULL; - - if (ompt_enabled) { - ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); - parallel_id = team_info->parallel_id; - - ompt_task_info_t *task_info = __ompt_get_taskinfo(0); - serialized_task_id = task_info->task_id; - - // unlink if necessary. no-op if there is not a lightweight task. - ompt_lw_taskteam_t *lwt = __ompt_lw_taskteam_unlink(thr); - // GOMP allocates/frees lwt since it can't be kept on the stack - if (lwt) { - __kmp_free(lwt); - } - } -#endif - if (!thr->th.th_team->t.t_serialized) { __kmp_run_after_invoked_task(gtid, __kmp_tid_from_gtid(gtid), thr, thr->th.th_team); #if OMPT_SUPPORT - if (ompt_enabled) { + if (ompt_enabled.enabled) { // Implicit task is finished here, in the barrier we might schedule // deferred tasks, // these don't see the implicit task on the stack - ompt_frame = __ompt_get_task_frame_internal(0); - ompt_frame->exit_runtime_frame = NULL; + OMPT_CUR_TASK_INFO(thr)->frame.exit_runtime_frame = NULL; } #endif @@ -452,35 +485,7 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void) { #endif ); } else { -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( - parallel_id, serialized_task_id); - } -#endif - __kmpc_end_serialized_parallel(&loc, gtid); - -#if OMPT_SUPPORT - if (ompt_enabled) { - // Record that we re-entered the runtime system in the frame that - // created the parallel region. - ompt_task_info_t *parent_task_info = __ompt_get_taskinfo(0); - - if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { - ompt_callbacks.ompt_callback(ompt_event_parallel_end)( - parallel_id, parent_task_info->task_id, - OMPT_INVOKER(fork_context_gnu)); - } - - parent_task_info->frame.reenter_runtime_frame = NULL; - - thr->th.ompt_thread_info.state = - (((thr->th.th_team)->t.t_serialized) ? ompt_state_work_serial - : ompt_state_work_parallel); - } -#endif } } @@ -508,6 +513,12 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void) { // num and calculate the iteration space using the result. It doesn't do this // with ordered static loop, so they can be checked. +#if OMPT_SUPPORT +#define IF_OMPT_SUPPORT(code) code +#else +#define IF_OMPT_SUPPORT(code) +#endif + #define LOOP_START(func, schedule) \ int func(long lb, long ub, long str, long chunk_sz, long *p_lb, \ long *p_ub) { \ @@ -520,9 +531,11 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void) { gtid, lb, ub, str, chunk_sz)); \ \ if ((str > 0) ? (lb < ub) : (lb > ub)) { \ + IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ (schedule) != kmp_sch_static); \ + IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ (kmp_int *)p_ub, (kmp_int *)&stride); \ if (status) { \ @@ -551,8 +564,10 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void) { gtid, lb, ub, str, chunk_sz)); \ \ if ((str > 0) ? (lb < ub) : (lb > ub)) { \ + IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, TRUE); \ + IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ (kmp_int *)p_ub, (kmp_int *)&stride); \ if (status) { \ @@ -577,6 +592,7 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void) { MKLOC(loc, #func); \ KA_TRACE(20, (#func ": T#%d\n", gtid)); \ \ + IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ fini_code status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ (kmp_int *)p_ub, (kmp_int *)&stride); \ if (status) { \ @@ -621,7 +637,20 @@ void xexpand(KMP_API_NAME_GOMP_LOOP_END)(void) { int gtid = __kmp_get_gtid(); KA_TRACE(20, ("GOMP_loop_end: T#%d\n", gtid)) +#if OMPT_SUPPORT && OMPT_OPTIONAL + ompt_frame_t *ompt_frame; + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(gtid); + } +#endif __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + ompt_frame->reenter_runtime_frame = NULL; + } +#endif KA_TRACE(20, ("GOMP_loop_end exit: T#%d\n", gtid)) } @@ -796,17 +825,18 @@ LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT), KA_TRACE(20, (#func " exit: T#%d\n", gtid)); \ } -#if OMPT_SUPPORT +#if OMPT_SUPPORT && OMPT_OPTIONAL #define OMPT_LOOP_PRE() \ ompt_frame_t *parent_frame; \ - if (ompt_enabled) { \ - parent_frame = __ompt_get_task_frame_internal(0); \ - parent_frame->reenter_runtime_frame = __builtin_frame_address(1); \ - } + if (ompt_enabled.enabled) { \ + __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL); \ + parent_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); \ + } \ + OMPT_STORE_RETURN_ADDRESS(gtid); #define OMPT_LOOP_POST() \ - if (ompt_enabled) { \ + if (ompt_enabled.enabled) { \ parent_frame->reenter_runtime_frame = NULL; \ } @@ -878,6 +908,16 @@ void xexpand(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data, } } +#if OMPT_SUPPORT + kmp_taskdata_t *current_task; + if (ompt_enabled.enabled) { + OMPT_STORE_RETURN_ADDRESS(gtid); + current_task = __kmp_threads[gtid]->th.th_current_task; + current_task->ompt_task_info.frame.reenter_runtime_frame = + OMPT_GET_FRAME_ADDRESS(1); + } +#endif + if (if_cond) { #if OMP_40_ENABLED if (gomp_flags & 8) { @@ -893,23 +933,26 @@ void xexpand(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data, dep_list[i].flags.out = (i < nout); } __kmpc_omp_task_with_deps(&loc, gtid, task, ndeps, dep_list, 0, NULL); - } else + } else { #endif __kmpc_omp_task(&loc, gtid, task); + } } else { #if OMPT_SUPPORT ompt_thread_info_t oldInfo; kmp_info_t *thread; kmp_taskdata_t *taskdata; - if (ompt_enabled) { + kmp_taskdata_t *current_task; + if (ompt_enabled.enabled) { // Store the threads states and restore them after the task thread = __kmp_threads[gtid]; taskdata = KMP_TASK_TO_TASKDATA(task); oldInfo = thread->th.ompt_thread_info; thread->th.ompt_thread_info.wait_id = 0; - thread->th.ompt_thread_info.state = ompt_state_work_parallel; + thread->th.ompt_thread_info.state = omp_state_work_parallel; taskdata->ompt_task_info.frame.exit_runtime_frame = - __builtin_frame_address(0); + OMPT_GET_FRAME_ADDRESS(0); + OMPT_STORE_RETURN_ADDRESS(gtid); } #endif @@ -918,12 +961,17 @@ void xexpand(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data, __kmpc_omp_task_complete_if0(&loc, gtid, task); #if OMPT_SUPPORT - if (ompt_enabled) { + if (ompt_enabled.enabled) { thread->th.ompt_thread_info = oldInfo; taskdata->ompt_task_info.frame.exit_runtime_frame = NULL; } #endif } +#if OMPT_SUPPORT + if (ompt_enabled.enabled) { + current_task->ompt_task_info.frame.reenter_runtime_frame = NULL; + } +#endif KA_TRACE(20, ("GOMP_task exit: T#%d\n", gtid)); } @@ -932,6 +980,11 @@ void xexpand(KMP_API_NAME_GOMP_TASKWAIT)(void) { MKLOC(loc, "GOMP_taskwait"); int gtid = __kmp_entry_gtid(); +#if OMPT_SUPPORT + if (ompt_enabled.enabled) + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + KA_TRACE(20, ("GOMP_taskwait: T#%d\n", gtid)); __kmpc_omp_taskwait(&loc, gtid); @@ -1001,10 +1054,11 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START)(void (*task)(void *), #if OMPT_SUPPORT ompt_frame_t *parent_frame; - if (ompt_enabled) { - parent_frame = __ompt_get_task_frame_internal(0); - parent_frame->reenter_runtime_frame = __builtin_frame_address(1); + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL); + parent_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); } + OMPT_STORE_RETURN_ADDRESS(gtid); #endif MKLOC(loc, "GOMP_parallel_sections_start"); @@ -1023,7 +1077,7 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START)(void (*task)(void *), } #if OMPT_SUPPORT - if (ompt_enabled) { + if (ompt_enabled.enabled) { parent_frame->reenter_runtime_frame = NULL; } #endif @@ -1037,7 +1091,20 @@ void xexpand(KMP_API_NAME_GOMP_SECTIONS_END)(void) { int gtid = __kmp_get_gtid(); KA_TRACE(20, ("GOMP_sections_end: T#%d\n", gtid)) +#if OMPT_SUPPORT + ompt_frame_t *ompt_frame; + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(gtid); + } +#endif __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); +#if OMPT_SUPPORT + if (ompt_enabled.enabled) { + ompt_frame->reenter_runtime_frame = NULL; + } +#endif KA_TRACE(20, ("GOMP_sections_end exit: T#%d\n", gtid)) } @@ -1063,10 +1130,11 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL)(void (*task)(void *), void *data, #if OMPT_SUPPORT ompt_task_info_t *parent_task_info, *task_info; - if (ompt_enabled) { - parent_task_info = __ompt_get_taskinfo(0); - parent_task_info->frame.reenter_runtime_frame = __builtin_frame_address(1); + if (ompt_enabled.enabled) { + parent_task_info = __ompt_get_task_info_object(0); + parent_task_info->frame.reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); } + OMPT_STORE_RETURN_ADDRESS(gtid); #endif if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { if (num_threads != 0) { @@ -1082,15 +1150,20 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL)(void (*task)(void *), void *data, __kmp_GOMP_serialized_parallel(&loc, gtid, task); } #if OMPT_SUPPORT - if (ompt_enabled) { - task_info = __ompt_get_taskinfo(0); - task_info->frame.exit_runtime_frame = __builtin_frame_address(0); + if (ompt_enabled.enabled) { + task_info = __ompt_get_task_info_object(0); + task_info->frame.exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(0); } #endif task(data); +#if OMPT_SUPPORT + if (ompt_enabled.enabled) { + OMPT_STORE_RETURN_ADDRESS(gtid); + } +#endif xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(); #if OMPT_SUPPORT - if (ompt_enabled) { + if (ompt_enabled.enabled) { task_info->frame.exit_runtime_frame = NULL; parent_task_info->frame.reenter_runtime_frame = NULL; } @@ -1106,6 +1179,10 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_SECTIONS)(void (*task)(void *), MKLOC(loc, "GOMP_parallel_sections"); KA_TRACE(20, ("GOMP_parallel_sections: T#%d\n", gtid)); +#if OMPT_SUPPORT + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { if (num_threads != 0) { __kmp_push_num_threads(&loc, gtid, num_threads); @@ -1153,6 +1230,7 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_SECTIONS)(void (*task)(void *), __kmp_GOMP_serialized_parallel(&loc, gtid, task); \ } \ \ + IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ (schedule) != kmp_sch_static); \ @@ -1177,6 +1255,11 @@ void xexpand(KMP_API_NAME_GOMP_TASKGROUP_START)(void) { MKLOC(loc, "GOMP_taskgroup_start"); KA_TRACE(20, ("GOMP_taskgroup_start: T#%d\n", gtid)); +#if OMPT_SUPPORT + if (ompt_enabled.enabled) + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmpc_taskgroup(&loc, gtid); return; @@ -1187,6 +1270,11 @@ void xexpand(KMP_API_NAME_GOMP_TASKGROUP_END)(void) { MKLOC(loc, "GOMP_taskgroup_end"); KA_TRACE(20, ("GOMP_taskgroup_end: T#%d\n", gtid)); +#if OMPT_SUPPORT + if (ompt_enabled.enabled) + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmpc_end_taskgroup(&loc, gtid); return; |

