summaryrefslogtreecommitdiffstats
path: root/openmp/runtime/src
diff options
context:
space:
mode:
authorJonathan Peyton <jonathan.l.peyton@intel.com>2018-09-26 20:24:39 +0000
committerJonathan Peyton <jonathan.l.peyton@intel.com>2018-09-26 20:24:39 +0000
commitcf27e31bdd424b3f13537a09418d539272e7bb36 (patch)
tree951e148b7417802940b639aecbf6e9781f881b67 /openmp/runtime/src
parent60eec6fecb74eb225c76645a9a5a96b3bd2c9684 (diff)
downloadbcm5719-llvm-cf27e31bdd424b3f13537a09418d539272e7bb36.tar.gz
bcm5719-llvm-cf27e31bdd424b3f13537a09418d539272e7bb36.zip
[OpenMP] Fix performance issue from 376.kdtree
This change improves the performance of 376.kdtree by giving the compiler an opportunity to do inlining and other optimizations for the call path, __kmpc_omp_task_complete_if0()->__kmp_task_finish(), which is one of the hot paths in the program; some functions in kmp_taskdeps.cpp were moved to the new header file, kmp_taskdeps.h to achieve this. Patch by Hansang Bae Differential Revision: https://reviews.llvm.org/D51889 llvm-svn: 343138
Diffstat (limited to 'openmp/runtime/src')
-rw-r--r--openmp/runtime/src/kmp.h3
-rw-r--r--openmp/runtime/src/kmp_taskdeps.cpp122
-rw-r--r--openmp/runtime/src/kmp_taskdeps.h145
-rw-r--r--openmp/runtime/src/kmp_tasking.cpp14
4 files changed, 153 insertions, 131 deletions
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 97908e1aadb..34c722ac7ac 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -3748,9 +3748,6 @@ KMP_EXPORT void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid,
kmp_depend_info_t *dep_list,
kmp_int32 ndeps_noalias,
kmp_depend_info_t *noalias_dep_list);
-extern void __kmp_release_deps(kmp_int32 gtid, kmp_taskdata_t *task);
-extern void __kmp_dephash_free_entries(kmp_info_t *thread, kmp_dephash_t *h);
-extern void __kmp_dephash_free(kmp_info_t *thread, kmp_dephash_t *h);
extern kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
bool serialize_immediate);
diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp
index 34ea3793d1d..b5d53f1ae68 100644
--- a/openmp/runtime/src/kmp_taskdeps.cpp
+++ b/openmp/runtime/src/kmp_taskdeps.cpp
@@ -16,6 +16,7 @@
#include "kmp.h"
#include "kmp_io.h"
#include "kmp_wait_release.h"
+#include "kmp_taskdeps.h"
#if OMPT_SUPPORT
#include "ompt-specific.h"
#endif
@@ -52,26 +53,6 @@ static inline kmp_depnode_t *__kmp_node_ref(kmp_depnode_t *node) {
return node;
}
-static inline void __kmp_node_deref(kmp_info_t *thread, kmp_depnode_t *node) {
- if (!node)
- return;
-
- kmp_int32 n = KMP_ATOMIC_DEC(&node->dn.nrefs) - 1;
- if (n == 0) {
- KMP_ASSERT(node->dn.nrefs == 0);
-#if USE_FAST_MEMORY
- __kmp_fast_free(thread, node);
-#else
- __kmp_thread_free(thread, node);
-#endif
- }
-}
-
-#define KMP_ACQUIRE_DEPNODE(gtid, n) __kmp_acquire_lock(&(n)->dn.lock, (gtid))
-#define KMP_RELEASE_DEPNODE(gtid, n) __kmp_release_lock(&(n)->dn.lock, (gtid))
-
-static void __kmp_depnode_list_free(kmp_info_t *thread, kmp_depnode_list *list);
-
enum { KMP_DEPHASH_OTHER_SIZE = 97, KMP_DEPHASH_MASTER_SIZE = 997 };
static inline kmp_int32 __kmp_dephash_hash(kmp_intptr_t addr, size_t hsize) {
@@ -113,34 +94,6 @@ static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread,
return h;
}
-void __kmp_dephash_free_entries(kmp_info_t *thread, kmp_dephash_t *h) {
- for (size_t i = 0; i < h->size; i++) {
- if (h->buckets[i]) {
- kmp_dephash_entry_t *next;
- for (kmp_dephash_entry_t *entry = h->buckets[i]; entry; entry = next) {
- next = entry->next_in_bucket;
- __kmp_depnode_list_free(thread, entry->last_ins);
- __kmp_node_deref(thread, entry->last_out);
-#if USE_FAST_MEMORY
- __kmp_fast_free(thread, entry);
-#else
- __kmp_thread_free(thread, entry);
-#endif
- }
- h->buckets[i] = 0;
- }
- }
-}
-
-void __kmp_dephash_free(kmp_info_t *thread, kmp_dephash_t *h) {
- __kmp_dephash_free_entries(thread, h);
-#if USE_FAST_MEMORY
- __kmp_fast_free(thread, h);
-#else
- __kmp_thread_free(thread, h);
-#endif
-}
-
static kmp_dephash_entry *
__kmp_dephash_find(kmp_info_t *thread, kmp_dephash_t *h, kmp_intptr_t addr) {
kmp_int32 bucket = __kmp_dephash_hash(addr, h->size);
@@ -192,22 +145,6 @@ static kmp_depnode_list_t *__kmp_add_node(kmp_info_t *thread,
return new_head;
}
-static void __kmp_depnode_list_free(kmp_info_t *thread,
- kmp_depnode_list *list) {
- kmp_depnode_list *next;
-
- for (; list; list = next) {
- next = list->next;
-
- __kmp_node_deref(thread, list->node);
-#if USE_FAST_MEMORY
- __kmp_fast_free(thread, list);
-#else
- __kmp_thread_free(thread, list);
-#endif
- }
-}
-
static inline void __kmp_track_dependence(kmp_depnode_t *source,
kmp_depnode_t *sink,
kmp_task_t *sink_task) {
@@ -386,63 +323,6 @@ static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node,
return npredecessors > 0 ? true : false;
}
-void __kmp_release_deps(kmp_int32 gtid, kmp_taskdata_t *task) {
- kmp_info_t *thread = __kmp_threads[gtid];
- kmp_depnode_t *node = task->td_depnode;
-
- if (task->td_dephash) {
- KA_TRACE(
- 40, ("__kmp_release_deps: T#%d freeing dependencies hash of task %p.\n",
- gtid, task));
- __kmp_dephash_free(thread, task->td_dephash);
- task->td_dephash = NULL;
- }
-
- if (!node)
- return;
-
- KA_TRACE(20, ("__kmp_release_deps: T#%d notifying successors of task %p.\n",
- gtid, task));
-
- KMP_ACQUIRE_DEPNODE(gtid, node);
- node->dn.task =
- NULL; // mark this task as finished, so no new dependencies are generated
- KMP_RELEASE_DEPNODE(gtid, node);
-
- kmp_depnode_list_t *next;
- for (kmp_depnode_list_t *p = node->dn.successors; p; p = next) {
- kmp_depnode_t *successor = p->node;
- kmp_int32 npredecessors = KMP_ATOMIC_DEC(&successor->dn.npredecessors) - 1;
-
- // successor task can be NULL for wait_depends or because deps are still
- // being processed
- if (npredecessors == 0) {
- KMP_MB();
- if (successor->dn.task) {
- KA_TRACE(20, ("__kmp_release_deps: T#%d successor %p of %p scheduled "
- "for execution.\n",
- gtid, successor->dn.task, task));
- __kmp_omp_task(gtid, successor->dn.task, false);
- }
- }
-
- next = p->next;
- __kmp_node_deref(thread, p->node);
-#if USE_FAST_MEMORY
- __kmp_fast_free(thread, p);
-#else
- __kmp_thread_free(thread, p);
-#endif
- }
-
- __kmp_node_deref(thread, node);
-
- KA_TRACE(
- 20,
- ("__kmp_release_deps: T#%d all successors of %p notified of completion\n",
- gtid, task));
-}
-
/*!
@ingroup TASKING
@param loc_ref location of the original task directive
diff --git a/openmp/runtime/src/kmp_taskdeps.h b/openmp/runtime/src/kmp_taskdeps.h
new file mode 100644
index 00000000000..2e79b1cafc9
--- /dev/null
+++ b/openmp/runtime/src/kmp_taskdeps.h
@@ -0,0 +1,145 @@
+/*
+ * kmp_taskdeps.h
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef KMP_TASKDEPS_H
+#define KMP_TASKDEPS_H
+
+#include "kmp.h"
+
+#if OMP_40_ENABLED
+
+#define KMP_ACQUIRE_DEPNODE(gtid, n) __kmp_acquire_lock(&(n)->dn.lock, (gtid))
+#define KMP_RELEASE_DEPNODE(gtid, n) __kmp_release_lock(&(n)->dn.lock, (gtid))
+
+static inline void __kmp_node_deref(kmp_info_t *thread, kmp_depnode_t *node) {
+ if (!node)
+ return;
+
+ kmp_int32 n = KMP_ATOMIC_DEC(&node->dn.nrefs) - 1;
+ if (n == 0) {
+ KMP_ASSERT(node->dn.nrefs == 0);
+#if USE_FAST_MEMORY
+ __kmp_fast_free(thread, node);
+#else
+ __kmp_thread_free(thread, node);
+#endif
+ }
+}
+
+static inline void __kmp_depnode_list_free(kmp_info_t *thread,
+ kmp_depnode_list *list) {
+ kmp_depnode_list *next;
+
+ for (; list; list = next) {
+ next = list->next;
+
+ __kmp_node_deref(thread, list->node);
+#if USE_FAST_MEMORY
+ __kmp_fast_free(thread, list);
+#else
+ __kmp_thread_free(thread, list);
+#endif
+ }
+}
+
+static inline void __kmp_dephash_free_entries(kmp_info_t *thread,
+ kmp_dephash_t *h) {
+ for (size_t i = 0; i < h->size; i++) {
+ if (h->buckets[i]) {
+ kmp_dephash_entry_t *next;
+ for (kmp_dephash_entry_t *entry = h->buckets[i]; entry; entry = next) {
+ next = entry->next_in_bucket;
+ __kmp_depnode_list_free(thread, entry->last_ins);
+ __kmp_node_deref(thread, entry->last_out);
+#if USE_FAST_MEMORY
+ __kmp_fast_free(thread, entry);
+#else
+ __kmp_thread_free(thread, entry);
+#endif
+ }
+ h->buckets[i] = 0;
+ }
+ }
+}
+
+static inline void __kmp_dephash_free(kmp_info_t *thread, kmp_dephash_t *h) {
+ __kmp_dephash_free_entries(thread, h);
+#if USE_FAST_MEMORY
+ __kmp_fast_free(thread, h);
+#else
+ __kmp_thread_free(thread, h);
+#endif
+}
+
+static inline void __kmp_release_deps(kmp_int32 gtid, kmp_taskdata_t *task) {
+ kmp_info_t *thread = __kmp_threads[gtid];
+ kmp_depnode_t *node = task->td_depnode;
+
+ if (task->td_dephash) {
+ KA_TRACE(
+ 40, ("__kmp_release_deps: T#%d freeing dependencies hash of task %p.\n",
+ gtid, task));
+ __kmp_dephash_free(thread, task->td_dephash);
+ task->td_dephash = NULL;
+ }
+
+ if (!node)
+ return;
+
+ KA_TRACE(20, ("__kmp_release_deps: T#%d notifying successors of task %p.\n",
+ gtid, task));
+
+ KMP_ACQUIRE_DEPNODE(gtid, node);
+ node->dn.task =
+ NULL; // mark this task as finished, so no new dependencies are generated
+ KMP_RELEASE_DEPNODE(gtid, node);
+
+ kmp_depnode_list_t *next;
+ for (kmp_depnode_list_t *p = node->dn.successors; p; p = next) {
+ kmp_depnode_t *successor = p->node;
+ kmp_int32 npredecessors = KMP_ATOMIC_DEC(&successor->dn.npredecessors) - 1;
+
+ // successor task can be NULL for wait_depends or because deps are still
+ // being processed
+ if (npredecessors == 0) {
+ KMP_MB();
+ if (successor->dn.task) {
+ KA_TRACE(20, ("__kmp_release_deps: T#%d successor %p of %p scheduled "
+ "for execution.\n",
+ gtid, successor->dn.task, task));
+ __kmp_omp_task(gtid, successor->dn.task, false);
+ }
+ }
+
+ next = p->next;
+ __kmp_node_deref(thread, p->node);
+#if USE_FAST_MEMORY
+ __kmp_fast_free(thread, p);
+#else
+ __kmp_thread_free(thread, p);
+#endif
+ }
+
+ __kmp_node_deref(thread, node);
+
+ KA_TRACE(
+ 20,
+ ("__kmp_release_deps: T#%d all successors of %p notified of completion\n",
+ gtid, task));
+}
+
+#endif // OMP_40_ENABLED
+
+#endif // KMP_TASKDEPS_H
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index 9028686bea1..7179d2c3149 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -16,6 +16,7 @@
#include "kmp_itt.h"
#include "kmp_stats.h"
#include "kmp_wait_release.h"
+#include "kmp_taskdeps.h"
#if OMPT_SUPPORT
#include "ompt-specific.h"
@@ -764,15 +765,14 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
#if OMP_40_ENABLED
if (taskdata->td_taskgroup)
KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
+ __kmp_release_deps(gtid, taskdata);
#if OMP_45_ENABLED
- }
- // if we found proxy tasks there could exist a dependency chain
- // with the proxy task as origin
- if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) ||
- (task_team && task_team->tt.tt_found_proxy_tasks)) {
-#endif
+ } else if (task_team && task_team->tt.tt_found_proxy_tasks) {
+ // if we found proxy tasks there could exist a dependency chain
+ // with the proxy task as origin
__kmp_release_deps(gtid, taskdata);
-#endif
+#endif // OMP_45_ENABLED
+#endif // OMP_40_ENABLED
}
// td_flags.executing must be marked as 0 after __kmp_release_deps has been
OpenPOWER on IntegriCloud