summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJonathan Peyton <jonathan.l.peyton@intel.com>2017-01-27 17:54:31 +0000
committerJonathan Peyton <jonathan.l.peyton@intel.com>2017-01-27 17:54:31 +0000
commit2208a851012a2d0bc09a375972cbcc8248487766 (patch)
treec983670bc0e8157ba92bc5ee600490da22a0075d
parent3b34a9b5d9fb08130124f17cb969c2c6bc0419f9 (diff)
downloadbcm5719-llvm-2208a851012a2d0bc09a375972cbcc8248487766.tar.gz
bcm5719-llvm-2208a851012a2d0bc09a375972cbcc8248487766.zip
Fix performance issue incurred by removing monitor thread.
When the monitor thread is used, most threads in the team directly go to sleep if the copy of bt_intervals/bt_set is not available in the cache, and this happens at least once per thread in the wait function, making the overall performance slightly better. This change tries to mimic this behavior by using the bt_intervals cache, which simply keeps the blocktime interval in terms of the platform-dependent ticks or nanoseconds. Patch by Hansang Bae Differential Revision: https://reviews.llvm.org/D28906 llvm-svn: 293312
-rw-r--r--openmp/runtime/src/kmp.h18
-rw-r--r--openmp/runtime/src/kmp_barrier.cpp12
-rw-r--r--openmp/runtime/src/kmp_wait_release.h18
3 files changed, 27 insertions, 21 deletions
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 07fc8642836..5859c51f365 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -889,6 +889,20 @@ extern int __kmp_place_num_threads_per_core;
#define KMP_INTERVALS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \
( ( (blocktime) + (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)) - 1 ) / \
(KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)) )
+#else
+# if KMP_OS_UNIX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+ // HW TSC is used to reduce overhead (clock tick instead of nanosecond).
+ extern double __kmp_ticks_per_nsec;
+# define KMP_NOW() __kmp_hardware_timestamp()
+# define KMP_BLOCKTIME_INTERVAL() (__kmp_dflt_blocktime * KMP_USEC_PER_SEC * __kmp_ticks_per_nsec)
+# define KMP_BLOCKING(goal, count) ((goal) > KMP_NOW())
+# else
+ // System time is retrieved sporadically while blocking.
+ extern kmp_uint64 __kmp_now_nsec();
+# define KMP_NOW() __kmp_now_nsec()
+# define KMP_BLOCKTIME_INTERVAL() (__kmp_dflt_blocktime * KMP_USEC_PER_SEC)
+# define KMP_BLOCKING(goal, count) ((count) % 1000 != 0 || (goal) > KMP_NOW())
+# endif
#endif // KMP_USE_MONITOR
#define KMP_MIN_STATSCOLS 40
@@ -2220,8 +2234,10 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info {
/* to exist (from the POV of worker threads). */
#if KMP_USE_MONITOR
int th_team_bt_intervals;
-#endif
int th_team_bt_set;
+#else
+ kmp_uint64 th_team_bt_intervals;
+#endif
#if KMP_AFFINITY_SUPPORTED
diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp
index 5e776146cd1..41062453f92 100644
--- a/openmp/runtime/src/kmp_barrier.cpp
+++ b/openmp/runtime/src/kmp_barrier.cpp
@@ -1130,8 +1130,10 @@ __kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size,
if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
#if KMP_USE_MONITOR
this_thr->th.th_team_bt_intervals = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
-#endif
this_thr->th.th_team_bt_set = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
+#else
+ this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL();
+#endif
}
#if USE_ITT_BUILD
@@ -1453,8 +1455,10 @@ __kmp_join_barrier(int gtid)
if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
#if KMP_USE_MONITOR
this_thr->th.th_team_bt_intervals = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
-#endif
this_thr->th.th_team_bt_set = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
+#else
+ this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL();
+#endif
}
#if USE_ITT_BUILD
@@ -1644,8 +1648,10 @@ __kmp_fork_barrier(int gtid, int tid)
if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
#if KMP_USE_MONITOR
this_thr->th.th_team_bt_intervals = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
-#endif
this_thr->th.th_team_bt_set = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
+#else
+ this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL();
+#endif
}
} // master
diff --git a/openmp/runtime/src/kmp_wait_release.h b/openmp/runtime/src/kmp_wait_release.h
index 22ff8e84224..4d177897d6e 100644
--- a/openmp/runtime/src/kmp_wait_release.h
+++ b/openmp/runtime/src/kmp_wait_release.h
@@ -84,22 +84,6 @@ class kmp_flag {
*/
};
-#if ! KMP_USE_MONITOR
-# if KMP_OS_UNIX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
- // HW TSC is used to reduce overhead (clock tick instead of nanosecond).
- extern double __kmp_ticks_per_nsec;
-# define KMP_NOW() __kmp_hardware_timestamp()
-# define KMP_BLOCKTIME_INTERVAL() (__kmp_dflt_blocktime * KMP_USEC_PER_SEC * __kmp_ticks_per_nsec)
-# define KMP_BLOCKING(goal, count) ((goal) > KMP_NOW())
-# else
- // System time is retrieved sporadically while blocking.
- extern kmp_uint64 __kmp_now_nsec();
-# define KMP_NOW() __kmp_now_nsec()
-# define KMP_BLOCKTIME_INTERVAL() (__kmp_dflt_blocktime * KMP_USEC_PER_SEC)
-# define KMP_BLOCKING(goal, count) ((count) % 1000 != 0 || (goal) > KMP_NOW())
-# endif
-#endif
-
/* Spin wait loop that first does pause, then yield, then sleep. A thread that calls __kmp_wait_*
must make certain that another thread calls __kmp_release to wake it back up to prevent deadlocks! */
template <class C>
@@ -187,7 +171,7 @@ __kmp_wait_template(kmp_info_t *this_thr, C *flag, int final_spin
th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
hibernate - __kmp_global.g.g_time.dt.t_value));
#else
- hibernate_goal = KMP_NOW() + KMP_BLOCKTIME_INTERVAL();
+ hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
poll_count = 0;
#endif // KMP_USE_MONITOR
}
OpenPOWER on IntegriCloud