diff options
| author | Jonathan Peyton <jonathan.l.peyton@intel.com> | 2018-09-26 20:24:39 +0000 |
|---|---|---|
| committer | Jonathan Peyton <jonathan.l.peyton@intel.com> | 2018-09-26 20:24:39 +0000 |
| commit | cf27e31bdd424b3f13537a09418d539272e7bb36 (patch) | |
| tree | 951e148b7417802940b639aecbf6e9781f881b67 /openmp/runtime/src | |
| parent | 60eec6fecb74eb225c76645a9a5a96b3bd2c9684 (diff) | |
| download | bcm5719-llvm-cf27e31bdd424b3f13537a09418d539272e7bb36.tar.gz bcm5719-llvm-cf27e31bdd424b3f13537a09418d539272e7bb36.zip | |
[OpenMP] Fix performance issue from 376.kdtree
This change improves the performance of 376.kdtree by giving the compiler an
opportunity to do inlining and other optimizations for the call path,
__kmpc_omp_task_complete_if0()->__kmp_task_finish(), which is one of the hot
paths in the program; some functions in kmp_taskdeps.cpp were moved to the new
header file, kmp_taskdeps.h to achieve this.
Patch by Hansang Bae
Differential Revision: https://reviews.llvm.org/D51889
llvm-svn: 343138
Diffstat (limited to 'openmp/runtime/src')
| -rw-r--r-- | openmp/runtime/src/kmp.h | 3 | ||||
| -rw-r--r-- | openmp/runtime/src/kmp_taskdeps.cpp | 122 | ||||
| -rw-r--r-- | openmp/runtime/src/kmp_taskdeps.h | 145 | ||||
| -rw-r--r-- | openmp/runtime/src/kmp_tasking.cpp | 14 |
4 files changed, 153 insertions, 131 deletions
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index 97908e1aadb..34c722ac7ac 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -3748,9 +3748,6 @@ KMP_EXPORT void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); -extern void __kmp_release_deps(kmp_int32 gtid, kmp_taskdata_t *task); -extern void __kmp_dephash_free_entries(kmp_info_t *thread, kmp_dephash_t *h); -extern void __kmp_dephash_free(kmp_info_t *thread, kmp_dephash_t *h); extern kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task, bool serialize_immediate); diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp index 34ea3793d1d..b5d53f1ae68 100644 --- a/openmp/runtime/src/kmp_taskdeps.cpp +++ b/openmp/runtime/src/kmp_taskdeps.cpp @@ -16,6 +16,7 @@ #include "kmp.h" #include "kmp_io.h" #include "kmp_wait_release.h" +#include "kmp_taskdeps.h" #if OMPT_SUPPORT #include "ompt-specific.h" #endif @@ -52,26 +53,6 @@ static inline kmp_depnode_t *__kmp_node_ref(kmp_depnode_t *node) { return node; } -static inline void __kmp_node_deref(kmp_info_t *thread, kmp_depnode_t *node) { - if (!node) - return; - - kmp_int32 n = KMP_ATOMIC_DEC(&node->dn.nrefs) - 1; - if (n == 0) { - KMP_ASSERT(node->dn.nrefs == 0); -#if USE_FAST_MEMORY - __kmp_fast_free(thread, node); -#else - __kmp_thread_free(thread, node); -#endif - } -} - -#define KMP_ACQUIRE_DEPNODE(gtid, n) __kmp_acquire_lock(&(n)->dn.lock, (gtid)) -#define KMP_RELEASE_DEPNODE(gtid, n) __kmp_release_lock(&(n)->dn.lock, (gtid)) - -static void __kmp_depnode_list_free(kmp_info_t *thread, kmp_depnode_list *list); - enum { KMP_DEPHASH_OTHER_SIZE = 97, KMP_DEPHASH_MASTER_SIZE = 997 }; static inline kmp_int32 __kmp_dephash_hash(kmp_intptr_t addr, size_t hsize) { @@ -113,34 +94,6 @@ static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread, return h; } -void __kmp_dephash_free_entries(kmp_info_t *thread, kmp_dephash_t *h) { - for (size_t i = 0; i < h->size; i++) { - if (h->buckets[i]) { - kmp_dephash_entry_t *next; - for (kmp_dephash_entry_t *entry = h->buckets[i]; entry; entry = next) { - next = entry->next_in_bucket; - __kmp_depnode_list_free(thread, entry->last_ins); - __kmp_node_deref(thread, entry->last_out); -#if USE_FAST_MEMORY - __kmp_fast_free(thread, entry); -#else - __kmp_thread_free(thread, entry); -#endif - } - h->buckets[i] = 0; - } - } -} - -void __kmp_dephash_free(kmp_info_t *thread, kmp_dephash_t *h) { - __kmp_dephash_free_entries(thread, h); -#if USE_FAST_MEMORY - __kmp_fast_free(thread, h); -#else - __kmp_thread_free(thread, h); -#endif -} - static kmp_dephash_entry * __kmp_dephash_find(kmp_info_t *thread, kmp_dephash_t *h, kmp_intptr_t addr) { kmp_int32 bucket = __kmp_dephash_hash(addr, h->size); @@ -192,22 +145,6 @@ static kmp_depnode_list_t *__kmp_add_node(kmp_info_t *thread, return new_head; } -static void __kmp_depnode_list_free(kmp_info_t *thread, - kmp_depnode_list *list) { - kmp_depnode_list *next; - - for (; list; list = next) { - next = list->next; - - __kmp_node_deref(thread, list->node); -#if USE_FAST_MEMORY - __kmp_fast_free(thread, list); -#else - __kmp_thread_free(thread, list); -#endif - } -} - static inline void __kmp_track_dependence(kmp_depnode_t *source, kmp_depnode_t *sink, kmp_task_t *sink_task) { @@ -386,63 +323,6 @@ static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node, return npredecessors > 0 ? true : false; } -void __kmp_release_deps(kmp_int32 gtid, kmp_taskdata_t *task) { - kmp_info_t *thread = __kmp_threads[gtid]; - kmp_depnode_t *node = task->td_depnode; - - if (task->td_dephash) { - KA_TRACE( - 40, ("__kmp_release_deps: T#%d freeing dependencies hash of task %p.\n", - gtid, task)); - __kmp_dephash_free(thread, task->td_dephash); - task->td_dephash = NULL; - } - - if (!node) - return; - - KA_TRACE(20, ("__kmp_release_deps: T#%d notifying successors of task %p.\n", - gtid, task)); - - KMP_ACQUIRE_DEPNODE(gtid, node); - node->dn.task = - NULL; // mark this task as finished, so no new dependencies are generated - KMP_RELEASE_DEPNODE(gtid, node); - - kmp_depnode_list_t *next; - for (kmp_depnode_list_t *p = node->dn.successors; p; p = next) { - kmp_depnode_t *successor = p->node; - kmp_int32 npredecessors = KMP_ATOMIC_DEC(&successor->dn.npredecessors) - 1; - - // successor task can be NULL for wait_depends or because deps are still - // being processed - if (npredecessors == 0) { - KMP_MB(); - if (successor->dn.task) { - KA_TRACE(20, ("__kmp_release_deps: T#%d successor %p of %p scheduled " - "for execution.\n", - gtid, successor->dn.task, task)); - __kmp_omp_task(gtid, successor->dn.task, false); - } - } - - next = p->next; - __kmp_node_deref(thread, p->node); -#if USE_FAST_MEMORY - __kmp_fast_free(thread, p); -#else - __kmp_thread_free(thread, p); -#endif - } - - __kmp_node_deref(thread, node); - - KA_TRACE( - 20, - ("__kmp_release_deps: T#%d all successors of %p notified of completion\n", - gtid, task)); -} - /*! @ingroup TASKING @param loc_ref location of the original task directive diff --git a/openmp/runtime/src/kmp_taskdeps.h b/openmp/runtime/src/kmp_taskdeps.h new file mode 100644 index 00000000000..2e79b1cafc9 --- /dev/null +++ b/openmp/runtime/src/kmp_taskdeps.h @@ -0,0 +1,145 @@ +/* + * kmp_taskdeps.h + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#ifndef KMP_TASKDEPS_H +#define KMP_TASKDEPS_H + +#include "kmp.h" + +#if OMP_40_ENABLED + +#define KMP_ACQUIRE_DEPNODE(gtid, n) __kmp_acquire_lock(&(n)->dn.lock, (gtid)) +#define KMP_RELEASE_DEPNODE(gtid, n) __kmp_release_lock(&(n)->dn.lock, (gtid)) + +static inline void __kmp_node_deref(kmp_info_t *thread, kmp_depnode_t *node) { + if (!node) + return; + + kmp_int32 n = KMP_ATOMIC_DEC(&node->dn.nrefs) - 1; + if (n == 0) { + KMP_ASSERT(node->dn.nrefs == 0); +#if USE_FAST_MEMORY + __kmp_fast_free(thread, node); +#else + __kmp_thread_free(thread, node); +#endif + } +} + +static inline void __kmp_depnode_list_free(kmp_info_t *thread, + kmp_depnode_list *list) { + kmp_depnode_list *next; + + for (; list; list = next) { + next = list->next; + + __kmp_node_deref(thread, list->node); +#if USE_FAST_MEMORY + __kmp_fast_free(thread, list); +#else + __kmp_thread_free(thread, list); +#endif + } +} + +static inline void __kmp_dephash_free_entries(kmp_info_t *thread, + kmp_dephash_t *h) { + for (size_t i = 0; i < h->size; i++) { + if (h->buckets[i]) { + kmp_dephash_entry_t *next; + for (kmp_dephash_entry_t *entry = h->buckets[i]; entry; entry = next) { + next = entry->next_in_bucket; + __kmp_depnode_list_free(thread, entry->last_ins); + __kmp_node_deref(thread, entry->last_out); +#if USE_FAST_MEMORY + __kmp_fast_free(thread, entry); +#else + __kmp_thread_free(thread, entry); +#endif + } + h->buckets[i] = 0; + } + } +} + +static inline void __kmp_dephash_free(kmp_info_t *thread, kmp_dephash_t *h) { + __kmp_dephash_free_entries(thread, h); +#if USE_FAST_MEMORY + __kmp_fast_free(thread, h); +#else + __kmp_thread_free(thread, h); +#endif +} + +static inline void __kmp_release_deps(kmp_int32 gtid, kmp_taskdata_t *task) { + kmp_info_t *thread = __kmp_threads[gtid]; + kmp_depnode_t *node = task->td_depnode; + + if (task->td_dephash) { + KA_TRACE( + 40, ("__kmp_release_deps: T#%d freeing dependencies hash of task %p.\n", + gtid, task)); + __kmp_dephash_free(thread, task->td_dephash); + task->td_dephash = NULL; + } + + if (!node) + return; + + KA_TRACE(20, ("__kmp_release_deps: T#%d notifying successors of task %p.\n", + gtid, task)); + + KMP_ACQUIRE_DEPNODE(gtid, node); + node->dn.task = + NULL; // mark this task as finished, so no new dependencies are generated + KMP_RELEASE_DEPNODE(gtid, node); + + kmp_depnode_list_t *next; + for (kmp_depnode_list_t *p = node->dn.successors; p; p = next) { + kmp_depnode_t *successor = p->node; + kmp_int32 npredecessors = KMP_ATOMIC_DEC(&successor->dn.npredecessors) - 1; + + // successor task can be NULL for wait_depends or because deps are still + // being processed + if (npredecessors == 0) { + KMP_MB(); + if (successor->dn.task) { + KA_TRACE(20, ("__kmp_release_deps: T#%d successor %p of %p scheduled " + "for execution.\n", + gtid, successor->dn.task, task)); + __kmp_omp_task(gtid, successor->dn.task, false); + } + } + + next = p->next; + __kmp_node_deref(thread, p->node); +#if USE_FAST_MEMORY + __kmp_fast_free(thread, p); +#else + __kmp_thread_free(thread, p); +#endif + } + + __kmp_node_deref(thread, node); + + KA_TRACE( + 20, + ("__kmp_release_deps: T#%d all successors of %p notified of completion\n", + gtid, task)); +} + +#endif // OMP_40_ENABLED + +#endif // KMP_TASKDEPS_H diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index 9028686bea1..7179d2c3149 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -16,6 +16,7 @@ #include "kmp_itt.h" #include "kmp_stats.h" #include "kmp_wait_release.h" +#include "kmp_taskdeps.h" #if OMPT_SUPPORT #include "ompt-specific.h" @@ -764,15 +765,14 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task, #if OMP_40_ENABLED if (taskdata->td_taskgroup) KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count); + __kmp_release_deps(gtid, taskdata); #if OMP_45_ENABLED - } - // if we found proxy tasks there could exist a dependency chain - // with the proxy task as origin - if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) || - (task_team && task_team->tt.tt_found_proxy_tasks)) { -#endif + } else if (task_team && task_team->tt.tt_found_proxy_tasks) { + // if we found proxy tasks there could exist a dependency chain + // with the proxy task as origin __kmp_release_deps(gtid, taskdata); -#endif +#endif // OMP_45_ENABLED +#endif // OMP_40_ENABLED } // td_flags.executing must be marked as 0 after __kmp_release_deps has been |

