diff options
| author | Jonathan Peyton <jonathan.l.peyton@intel.com> | 2019-02-28 19:11:29 +0000 |
|---|---|---|
| committer | Jonathan Peyton <jonathan.l.peyton@intel.com> | 2019-02-28 19:11:29 +0000 |
| commit | e47d32f165c01e6cb3dcf71f6a9963ce66ea0972 (patch) | |
| tree | 347e0a18aaba4342d3388b03ce5421e25a40edf0 /openmp/runtime/src/kmp_lock.cpp | |
| parent | 23452e1c85ce6add4657584f4c6b3fa264ff8028 (diff) | |
| download | bcm5719-llvm-e47d32f165c01e6cb3dcf71f6a9963ce66ea0972.tar.gz bcm5719-llvm-e47d32f165c01e6cb3dcf71f6a9963ce66ea0972.zip | |
[OpenMP] Make use of sched_yield optional in runtime
This patch cleans up the yielding code and makes it optional. An
environment variable, KMP_USE_YIELD, was added. Yielding is still
on by default (KMP_USE_YIELD=1), but can be turned off completely
(KMP_USE_YIELD=0), or turned on only when oversubscription is detected
(KMP_USE_YIELD=2). Note that oversubscription cannot always be detected
by the runtime (for example, when the runtime is initialized and the
process forks, oversubscription cannot be detected currently over
multiple instances of the runtime).
Because yielding can be controlled by user now, the library mode
settings (from KMP_LIBRARY) for throughput and turnaround have been
adjusted by altering blocktime, unless that was also explicitly set.
In the original code, there were a number of places where a double yield
might have been done under oversubscription. This version checks
oversubscription and if that's not going to yield, then it does
the spin check.
Patch by Terry Wilmarth
Differential Revision: https://reviews.llvm.org/D58148
llvm-svn: 355120
Diffstat (limited to 'openmp/runtime/src/kmp_lock.cpp')
| -rw-r--r-- | openmp/runtime/src/kmp_lock.cpp | 62 |
1 files changed, 20 insertions, 42 deletions
diff --git a/openmp/runtime/src/kmp_lock.cpp b/openmp/runtime/src/kmp_lock.cpp index af916449cf9..78d63c67852 100644 --- a/openmp/runtime/src/kmp_lock.cpp +++ b/openmp/runtime/src/kmp_lock.cpp @@ -100,23 +100,12 @@ __kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) { kmp_uint32 spins; KMP_FSYNC_PREPARE(lck); KMP_INIT_YIELD(spins); - if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { - KMP_YIELD(TRUE); - } else { - KMP_YIELD_SPIN(spins); - } - kmp_backoff_t backoff = __kmp_spin_backoff_params; - while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free || - !__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) { + do { __kmp_spin_backoff(&backoff); - if (TCR_4(__kmp_nth) > - (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { - KMP_YIELD(TRUE); - } else { - KMP_YIELD_SPIN(spins); - } - } + KMP_YIELD_OVERSUB_ELSE_SPIN(spins); + } while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free || + !__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)); KMP_FSYNC_ACQUIRED(lck); return KMP_LOCK_ACQUIRED_FIRST; } @@ -169,8 +158,7 @@ int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(tas)); KMP_MB(); /* Flush all pending memory write invalidates. */ - KMP_YIELD(TCR_4(__kmp_nth) > - (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); + KMP_YIELD_OVERSUB(); return KMP_LOCK_RELEASED; } @@ -474,8 +462,7 @@ int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck, lck->lk.poll, gtid)); - KMP_YIELD(TCR_4(__kmp_nth) > - (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); + KMP_YIELD_OVERSUB(); return KMP_LOCK_RELEASED; } @@ -651,7 +638,7 @@ __kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t *lck, std::memory_order_acquire) == my_ticket) { return KMP_LOCK_ACQUIRED_FIRST; } - KMP_WAIT_YIELD_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck); + KMP_WAIT_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck); return KMP_LOCK_ACQUIRED_FIRST; } @@ -1249,10 +1236,9 @@ __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid)); - /* ToDo: May want to consider using __kmp_wait_sleep or something that - sleeps for throughput only here. */ KMP_MB(); - KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck); + // ToDo: Use __kmp_wait_sleep or similar when blocktime != inf + KMP_WAIT(spin_here_p, FALSE, KMP_EQ, lck); #ifdef DEBUG_QUEUING_LOCKS TRACE_LOCK(gtid + 1, "acq spin"); @@ -1282,8 +1268,8 @@ __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck, /* Yield if number of threads > number of logical processors */ /* ToDo: Not sure why this should only be in oversubscription case, maybe should be traditional YIELD_INIT/YIELD_WHEN loop */ - KMP_YIELD(TCR_4(__kmp_nth) > - (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); + KMP_YIELD_OVERSUB(); + #ifdef DEBUG_QUEUING_LOCKS TRACE_LOCK(gtid + 1, "acq retry"); #endif @@ -1462,8 +1448,8 @@ int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { KMP_MB(); /* make sure enqueuing thread has time to update next waiting thread * field */ - *head_id_p = KMP_WAIT_YIELD((volatile kmp_uint32 *)waiting_id_p, 0, - KMP_NEQ, NULL); + *head_id_p = + KMP_WAIT((volatile kmp_uint32 *)waiting_id_p, 0, KMP_NEQ, NULL); #ifdef DEBUG_QUEUING_LOCKS TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)"); #endif @@ -2131,7 +2117,7 @@ static void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t *lck, // lock from now on. while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { KMP_INC_STAT(lck, lemmingYields); - __kmp_yield(TRUE); + KMP_YIELD(TRUE); } if (__kmp_test_adaptive_lock_only(lck, gtid)) @@ -2259,23 +2245,14 @@ __kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { // polling area has been reconfigured. Unless it is reconfigured, the // reloads stay in L1 cache and are cheap. // - // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.cpp !!! - // - // The current implementation of KMP_WAIT_YIELD doesn't allow for mask + // Keep this code in sync with KMP_WAIT, in kmp_dispatch.cpp !!! + // The current implementation of KMP_WAIT doesn't allow for mask // and poll to be re-read every spin iteration. kmp_uint32 spins; - KMP_FSYNC_PREPARE(lck); KMP_INIT_YIELD(spins); while (polls[ticket & mask] < ticket) { // atomic load - // If we are oversubscribed, - // or have waited a bit (and KMP_LIBRARY=turnaround), then yield. - // CPU Pause is in the macros for yield. - // - KMP_YIELD(TCR_4(__kmp_nth) > - (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); - KMP_YIELD_SPIN(spins); - + KMP_YIELD_OVERSUB_ELSE_SPIN(spins); // Re-read the mask and the poll pointer from the lock structure. // // Make certain that "mask" is read before "polls" !!! @@ -2807,8 +2784,9 @@ static void __kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { } if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) { // Wait until lock becomes free - while (!__kmp_is_unlocked_queuing_lock(lck)) - __kmp_yield(TRUE); + while (!__kmp_is_unlocked_queuing_lock(lck)) { + KMP_YIELD(TRUE); + } } else if (!(status & _XABORT_RETRY)) break; } while (retries--); |

