[OpenMP] Make use of sched_yield optional in runtime

This patch cleans up the yielding code and makes it optional. An environment variable, KMP_USE_YIELD, was added. Yielding is still on by default (KMP_USE_YIELD=1), but can be turned off completely (KMP_USE_YIELD=0), or turned on only when oversubscription is detected (KMP_USE_YIELD=2). Note that oversubscription cannot always be detected by the runtime (for example, when the runtime is initialized and the process forks, oversubscription cannot be detected currently over multiple instances of the runtime). Because yielding can be controlled by user now, the library mode settings (from KMP_LIBRARY) for throughput and turnaround have been adjusted by altering blocktime, unless that was also explicitly set. In the original code, there were a number of places where a double yield might have been done under oversubscription. This version checks oversubscription and if that's not going to yield, then it does the spin check. Patch by Terry Wilmarth Differential Revision: https://reviews.llvm.org/D58148 llvm-svn: 355120
author: Jonathan Peyton <jonathan.l.peyton@intel.com> 2019-02-28 19:11:29 +0000
committer: Jonathan Peyton <jonathan.l.peyton@intel.com> 2019-02-28 19:11:29 +0000
commit: e47d32f165c01e6cb3dcf71f6a9963ce66ea0972 (patch)
tree: 347e0a18aaba4342d3388b03ce5421e25a40edf0 /openmp/runtime/src/kmp_lock.cpp
parent: 23452e1c85ce6add4657584f4c6b3fa264ff8028 (diff)
download: bcm5719-llvm-e47d32f165c01e6cb3dcf71f6a9963ce66ea0972.tar.gz
bcm5719-llvm-e47d32f165c01e6cb3dcf71f6a9963ce66ea0972.zip
1 files changed, 20 insertions, 42 deletions
diff --git a/openmp/runtime/src/kmp_lock.cpp b/openmp/runtime/src/kmp_lock.cpp
index af916449cf9..78d63c67852 100644
--- a/openmp/runtime/src/kmp_lock.cpp
+++ b/openmp/runtime/src/kmp_lock.cpp
@@ -100,23 +100,12 @@ __kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) {
   kmp_uint32 spins;
   KMP_FSYNC_PREPARE(lck);
   KMP_INIT_YIELD(spins);
-  if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
-    KMP_YIELD(TRUE);
-  } else {
-    KMP_YIELD_SPIN(spins);
-  }
-
   kmp_backoff_t backoff = __kmp_spin_backoff_params;
-  while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free ||
-         !__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
+  do {
     __kmp_spin_backoff(&backoff);
-    if (TCR_4(__kmp_nth) >
-        (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
-      KMP_YIELD(TRUE);
-    } else {
-      KMP_YIELD_SPIN(spins);
-    }
-  }
+    KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
+  } while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free ||
+           !__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy));
   KMP_FSYNC_ACQUIRED(lck);
   return KMP_LOCK_ACQUIRED_FIRST;
 }
@@ -169,8 +158,7 @@ int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
   KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(tas));
   KMP_MB(); /* Flush all pending memory write invalidates.  */
 
-  KMP_YIELD(TCR_4(__kmp_nth) >
-            (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
+  KMP_YIELD_OVERSUB();
   return KMP_LOCK_RELEASED;
 }
 
@@ -474,8 +462,7 @@ int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
   KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
                   lck->lk.poll, gtid));
 
-  KMP_YIELD(TCR_4(__kmp_nth) >
-            (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
+  KMP_YIELD_OVERSUB();
   return KMP_LOCK_RELEASED;
 }
 
@@ -651,7 +638,7 @@ __kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t *lck,
                                 std::memory_order_acquire) == my_ticket) {
     return KMP_LOCK_ACQUIRED_FIRST;
   }
-  KMP_WAIT_YIELD_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck);
+  KMP_WAIT_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck);
   return KMP_LOCK_ACQUIRED_FIRST;
 }
 
@@ -1249,10 +1236,9 @@ __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck,
                ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n",
                 lck, gtid));
 
-      /* ToDo: May want to consider using __kmp_wait_sleep  or something that
-         sleeps for throughput only here. */
       KMP_MB();
-      KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck);
+      // ToDo: Use __kmp_wait_sleep or similar when blocktime != inf
+      KMP_WAIT(spin_here_p, FALSE, KMP_EQ, lck);
 
 #ifdef DEBUG_QUEUING_LOCKS
       TRACE_LOCK(gtid + 1, "acq spin");
@@ -1282,8 +1268,8 @@ __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck,
     /* Yield if number of threads > number of logical processors */
     /* ToDo: Not sure why this should only be in oversubscription case,
        maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
-    KMP_YIELD(TCR_4(__kmp_nth) >
-              (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
+    KMP_YIELD_OVERSUB();
+
 #ifdef DEBUG_QUEUING_LOCKS
     TRACE_LOCK(gtid + 1, "acq retry");
 #endif
@@ -1462,8 +1448,8 @@ int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
         KMP_MB();
         /* make sure enqueuing thread has time to update next waiting thread
          * field */
-        *head_id_p = KMP_WAIT_YIELD((volatile kmp_uint32 *)waiting_id_p, 0,
-                                    KMP_NEQ, NULL);
+        *head_id_p =
+            KMP_WAIT((volatile kmp_uint32 *)waiting_id_p, 0, KMP_NEQ, NULL);
 #ifdef DEBUG_QUEUING_LOCKS
         TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)");
 #endif
@@ -2131,7 +2117,7 @@ static void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t *lck,
       // lock from now on.
       while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
         KMP_INC_STAT(lck, lemmingYields);
-        __kmp_yield(TRUE);
+        KMP_YIELD(TRUE);
       }
 
       if (__kmp_test_adaptive_lock_only(lck, gtid))
@@ -2259,23 +2245,14 @@ __kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
   // polling area has been reconfigured.  Unless it is reconfigured, the
   // reloads stay in L1 cache and are cheap.
   //
-  // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.cpp !!!
-  //
-  // The current implementation of KMP_WAIT_YIELD doesn't allow for mask
+  // Keep this code in sync with KMP_WAIT, in kmp_dispatch.cpp !!!
+  // The current implementation of KMP_WAIT doesn't allow for mask
   // and poll to be re-read every spin iteration.
   kmp_uint32 spins;
-
   KMP_FSYNC_PREPARE(lck);
   KMP_INIT_YIELD(spins);
   while (polls[ticket & mask] < ticket) { // atomic load
-    // If we are oversubscribed,
-    // or have waited a bit (and KMP_LIBRARY=turnaround), then yield.
-    // CPU Pause is in the macros for yield.
-    //
-    KMP_YIELD(TCR_4(__kmp_nth) >
-              (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
-    KMP_YIELD_SPIN(spins);
-
+    KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
     // Re-read the mask and the poll pointer from the lock structure.
     //
     // Make certain that "mask" is read before "polls" !!!
@@ -2807,8 +2784,9 @@ static void __kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
     }
     if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
       // Wait until lock becomes free
-      while (!__kmp_is_unlocked_queuing_lock(lck))
-        __kmp_yield(TRUE);
+      while (!__kmp_is_unlocked_queuing_lock(lck)) {
+        KMP_YIELD(TRUE);
+      }
     } else if (!(status & _XABORT_RETRY))
       break;
   } while (retries--);
author	Jonathan Peyton <jonathan.l.peyton@intel.com>	2019-02-28 19:11:29 +0000
committer	Jonathan Peyton <jonathan.l.peyton@intel.com>	2019-02-28 19:11:29 +0000
commit	e47d32f165c01e6cb3dcf71f6a9963ce66ea0972 (patch)
tree	347e0a18aaba4342d3388b03ce5421e25a40edf0 /openmp/runtime/src/kmp_lock.cpp
parent	23452e1c85ce6add4657584f4c6b3fa264ff8028 (diff)
download	bcm5719-llvm-e47d32f165c01e6cb3dcf71f6a9963ce66ea0972.tar.gz bcm5719-llvm-e47d32f165c01e6cb3dcf71f6a9963ce66ea0972.zip