summaryrefslogtreecommitdiffstats
path: root/openmp/runtime/src/kmp_lock.cpp
diff options
context:
space:
mode:
authorJonathan Peyton <jonathan.l.peyton@intel.com>2016-04-14 16:00:37 +0000
committerJonathan Peyton <jonathan.l.peyton@intel.com>2016-04-14 16:00:37 +0000
commit377aa40d84fefff341525efb2b60a805dbf96c1e (patch)
tree92881a2b4378d0e5d48c51608e10c484e21a451e /openmp/runtime/src/kmp_lock.cpp
parentc1c14a227e1391a9ee59000643d4bb96986a135e (diff)
downloadbcm5719-llvm-377aa40d84fefff341525efb2b60a805dbf96c1e.tar.gz
bcm5719-llvm-377aa40d84fefff341525efb2b60a805dbf96c1e.zip
Exponential back off logic for test-and-set lock
This change adds back off logic in the test and set lock for better contended lock performance. It uses a simple truncated binary exponential back off function. The default back off parameters are tuned for x86. The main back off logic has a two loop structure where each is controlled by a user-level parameter: max_backoff - limits the outer loop number of iterations. This parameter should be a power of 2. min_ticks - the inner spin wait loop number of "ticks" which is system dependent and should be tuned for your system if you so choose. The "ticks" on x86 correspond to the time stamp counter, but on other architectures ticks is a timestamp derived from gettimeofday(). The user can modify these via the environment variable: KMP_SPIN_BACKOFF_PARAMS=max_backoff[,min_ticks] Currently, since the default user lock is a queuing lock, one would have to also specify KMP_LOCK_KIND=tas to use the test-and-set locks. Differential Revision: http://reviews.llvm.org/D19020 llvm-svn: 266329
Diffstat (limited to 'openmp/runtime/src/kmp_lock.cpp')
-rw-r--r--openmp/runtime/src/kmp_lock.cpp46
1 files changed, 43 insertions, 3 deletions
diff --git a/openmp/runtime/src/kmp_lock.cpp b/openmp/runtime/src/kmp_lock.cpp
index 3f9382f0d8b..53f4d9847c2 100644
--- a/openmp/runtime/src/kmp_lock.cpp
+++ b/openmp/runtime/src/kmp_lock.cpp
@@ -113,11 +113,11 @@ __kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid )
KMP_YIELD_SPIN( spins );
}
+ kmp_backoff_t backoff = __kmp_spin_backoff_params;
while ( ( lck->lk.poll != KMP_LOCK_FREE(tas) ) ||
( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) ) {
- //
- // FIXME - use exponential backoff here
- //
+
+ __kmp_spin_backoff(&backoff);
if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
__kmp_xproc ) ) {
KMP_YIELD( TRUE );
@@ -3008,6 +3008,46 @@ __kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags )
lck->lk.flags = flags;
}
+// Time stamp counter
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+# define __kmp_tsc() __kmp_hardware_timestamp()
+// Runtime's default backoff parameters
+kmp_backoff_t __kmp_spin_backoff_params = { 1, 4096, 100 };
+#else
+// Use nanoseconds for other platforms
+extern kmp_uint64 __kmp_now_nsec();
+kmp_backoff_t __kmp_spin_backoff_params = { 1, 256, 100 };
+# define __kmp_tsc() __kmp_now_nsec()
+#endif
+
+// A useful predicate for dealing with timestamps that may wrap.
+// Is a before b?
+// Since the timestamps may wrap, this is asking whether it's
+// shorter to go clockwise from a to b around the clock-face, or anti-clockwise.
+// Times where going clockwise is less distance than going anti-clockwise
+// are in the future, others are in the past.
+// e.g.) a = MAX-1, b = MAX+1 (=0), then a > b (true) does not mean a reached b
+// whereas signed(a) = -2, signed(b) = 0 captures the actual difference
+static inline bool before(kmp_uint64 a, kmp_uint64 b)
+{
+ return ((kmp_int64)b - (kmp_int64)a) > 0;
+}
+
+// Truncated binary exponential backoff function
+void
+__kmp_spin_backoff(kmp_backoff_t *boff)
+{
+ // We could flatten this loop, but making it a nested loop gives better result.
+ kmp_uint32 i;
+ for (i = boff->step; i > 0; i--) {
+ kmp_uint64 goal = __kmp_tsc() + boff->min_tick;
+ do {
+ KMP_CPU_PAUSE();
+ } while (before(__kmp_tsc(), goal));
+ }
+ boff->step = (boff->step<<1 | 1) & (boff->max_backoff-1);
+}
+
#if KMP_USE_DYNAMIC_LOCK
// Direct lock initializers. It simply writes a tag to the low 8 bits of the lock word.
OpenPOWER on IntegriCloud