diff options
Diffstat (limited to 'kernel/locking')
-rw-r--r-- | kernel/locking/lock_events_list.h | 1 | ||||
-rw-r--r-- | kernel/locking/rwsem.c | 173 |
2 files changed, 144 insertions, 30 deletions
diff --git a/kernel/locking/lock_events_list.h b/kernel/locking/lock_events_list.h index ca954e4e00e4..baa998401052 100644 --- a/kernel/locking/lock_events_list.h +++ b/kernel/locking/lock_events_list.h @@ -59,6 +59,7 @@ LOCK_EVENT(rwsem_wake_writer) /* # of writer wakeups */ LOCK_EVENT(rwsem_opt_rlock) /* # of read locks opt-spin acquired */ LOCK_EVENT(rwsem_opt_wlock) /* # of write locks opt-spin acquired */ LOCK_EVENT(rwsem_opt_fail) /* # of failed opt-spinnings */ +LOCK_EVENT(rwsem_opt_nospin) /* # of disabled reader opt-spinnings */ LOCK_EVENT(rwsem_rlock) /* # of read locks acquired */ LOCK_EVENT(rwsem_rlock_fast) /* # of fast read locks acquired */ LOCK_EVENT(rwsem_rlock_fail) /* # of failed read lock acquisitions */ diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index fae557be8334..2d7cabcfca50 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -23,6 +23,7 @@ #include <linux/sched/debug.h> #include <linux/sched/wake_q.h> #include <linux/sched/signal.h> +#include <linux/sched/clock.h> #include <linux/export.h> #include <linux/rwsem.h> #include <linux/atomic.h> @@ -31,24 +32,28 @@ #include "lock_events.h" /* - * The least significant 2 bits of the owner value has the following + * The least significant 3 bits of the owner value has the following * meanings when set. * - Bit 0: RWSEM_READER_OWNED - The rwsem is owned by readers - * - Bit 1: RWSEM_NONSPINNABLE - Waiters cannot spin on the rwsem - * The rwsem is anonymously owned, i.e. the owner(s) cannot be - * readily determined. It can be reader owned or the owning writer - * is indeterminate. + * - Bit 1: RWSEM_RD_NONSPINNABLE - Readers cannot spin on this lock. + * - Bit 2: RWSEM_WR_NONSPINNABLE - Writers cannot spin on this lock. * + * When the rwsem is either owned by an anonymous writer, or it is + * reader-owned, but a spinning writer has timed out, both nonspinnable + * bits will be set to disable optimistic spinning by readers and writers. + * In the later case, the last unlocking reader should then check the + * writer nonspinnable bit and clear it only to give writers preference + * to acquire the lock via optimistic spinning, but not readers. Similar + * action is also done in the reader slowpath. + * When a writer acquires a rwsem, it puts its task_struct pointer * into the owner field. It is cleared after an unlock. * * When a reader acquires a rwsem, it will also puts its task_struct - * pointer into the owner field with both the RWSEM_READER_OWNED and - * RWSEM_NONSPINNABLE bits set. On unlock, the owner field will - * largely be left untouched. So for a free or reader-owned rwsem, - * the owner value may contain information about the last reader that - * acquires the rwsem. The anonymous bit is set because that particular - * reader may or may not still own the lock. + * pointer into the owner field with the RWSEM_READER_OWNED bit set. + * On unlock, the owner field will largely be left untouched. So + * for a free or reader-owned rwsem, the owner value may contain + * information about the last reader that acquires the rwsem. * * That information may be helpful in debugging cases where the system * seems to hang on a reader owned rwsem especially if only one reader @@ -56,7 +61,9 @@ * a rwsem, but the overhead is simply too big. */ #define RWSEM_READER_OWNED (1UL << 0) -#define RWSEM_NONSPINNABLE (1UL << 1) +#define RWSEM_RD_NONSPINNABLE (1UL << 1) +#define RWSEM_WR_NONSPINNABLE (1UL << 2) +#define RWSEM_NONSPINNABLE (RWSEM_RD_NONSPINNABLE | RWSEM_WR_NONSPINNABLE) #define RWSEM_OWNER_FLAGS_MASK (RWSEM_READER_OWNED | RWSEM_NONSPINNABLE) #ifdef CONFIG_DEBUG_RWSEMS @@ -141,7 +148,7 @@ static inline bool rwsem_test_oflags(struct rw_semaphore *sem, long flags) static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem, struct task_struct *owner) { - unsigned long val = (unsigned long)owner | RWSEM_READER_OWNED | RWSEM_NONSPINNABLE; + unsigned long val = (unsigned long)owner | RWSEM_READER_OWNED; atomic_long_set(&sem->owner, val); } @@ -192,6 +199,23 @@ static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem) #endif /* + * Set the RWSEM_NONSPINNABLE bits if the RWSEM_READER_OWNED flag + * remains set. Otherwise, the operation will be aborted. + */ +static inline void rwsem_set_nonspinnable(struct rw_semaphore *sem) +{ + unsigned long owner = atomic_long_read(&sem->owner); + + do { + if (!(owner & RWSEM_READER_OWNED)) + break; + if (owner & RWSEM_NONSPINNABLE) + break; + } while (!atomic_long_try_cmpxchg(&sem->owner, &owner, + owner | RWSEM_NONSPINNABLE)); +} + +/* * Return just the real task structure pointer of the owner */ static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem) @@ -546,7 +570,8 @@ static inline bool owner_on_cpu(struct task_struct *owner) return owner->on_cpu && !vcpu_is_preempted(task_cpu(owner)); } -static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) +static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem, + unsigned long nonspinnable) { struct task_struct *owner; unsigned long flags; @@ -562,7 +587,7 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) preempt_disable(); rcu_read_lock(); owner = rwsem_owner_flags(sem, &flags); - if ((flags & RWSEM_NONSPINNABLE) || (owner && !owner_on_cpu(owner))) + if ((flags & nonspinnable) || (owner && !owner_on_cpu(owner))) ret = false; rcu_read_unlock(); preempt_enable(); @@ -588,12 +613,12 @@ enum owner_state { OWNER_READER = 1 << 2, OWNER_NONSPINNABLE = 1 << 3, }; -#define OWNER_SPINNABLE (OWNER_NULL | OWNER_WRITER) +#define OWNER_SPINNABLE (OWNER_NULL | OWNER_WRITER | OWNER_READER) static inline enum owner_state -rwsem_owner_state(struct task_struct *owner, unsigned long flags) +rwsem_owner_state(struct task_struct *owner, unsigned long flags, unsigned long nonspinnable) { - if (flags & RWSEM_NONSPINNABLE) + if (flags & nonspinnable) return OWNER_NONSPINNABLE; if (flags & RWSEM_READER_OWNED) @@ -602,14 +627,15 @@ rwsem_owner_state(struct task_struct *owner, unsigned long flags) return owner ? OWNER_WRITER : OWNER_NULL; } -static noinline enum owner_state rwsem_spin_on_owner(struct rw_semaphore *sem) +static noinline enum owner_state +rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable) { struct task_struct *new, *owner; unsigned long flags, new_flags; enum owner_state state; owner = rwsem_owner_flags(sem, &flags); - state = rwsem_owner_state(owner, flags); + state = rwsem_owner_state(owner, flags, nonspinnable); if (state != OWNER_WRITER) return state; @@ -622,7 +648,7 @@ static noinline enum owner_state rwsem_spin_on_owner(struct rw_semaphore *sem) new = rwsem_owner_flags(sem, &new_flags); if ((new != owner) || (new_flags != flags)) { - state = rwsem_owner_state(new, new_flags); + state = rwsem_owner_state(new, new_flags, nonspinnable); break; } @@ -646,10 +672,39 @@ static noinline enum owner_state rwsem_spin_on_owner(struct rw_semaphore *sem) return state; } +/* + * Calculate reader-owned rwsem spinning threshold for writer + * + * The more readers own the rwsem, the longer it will take for them to + * wind down and free the rwsem. So the empirical formula used to + * determine the actual spinning time limit here is: + * + * Spinning threshold = (10 + nr_readers/2)us + * + * The limit is capped to a maximum of 25us (30 readers). This is just + * a heuristic and is subjected to change in the future. + */ +static inline u64 rwsem_rspin_threshold(struct rw_semaphore *sem) +{ + long count = atomic_long_read(&sem->count); + int readers = count >> RWSEM_READER_SHIFT; + u64 delta; + + if (readers > 30) + readers = 30; + delta = (20 + readers) * NSEC_PER_USEC / 2; + + return sched_clock() + delta; +} + static bool rwsem_optimistic_spin(struct rw_semaphore *sem, bool wlock) { bool taken = false; int prev_owner_state = OWNER_NULL; + int loop = 0; + u64 rspin_threshold = 0; + unsigned long nonspinnable = wlock ? RWSEM_WR_NONSPINNABLE + : RWSEM_RD_NONSPINNABLE; preempt_disable(); @@ -661,12 +716,12 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem, bool wlock) * Optimistically spin on the owner field and attempt to acquire the * lock whenever the owner changes. Spinning will be stopped when: * 1) the owning writer isn't running; or - * 2) readers own the lock as we can't determine if they are - * actively running or not. + * 2) readers own the lock and spinning time has exceeded limit. */ for (;;) { - enum owner_state owner_state = rwsem_spin_on_owner(sem); + enum owner_state owner_state; + owner_state = rwsem_spin_on_owner(sem, nonspinnable); if (!(owner_state & OWNER_SPINNABLE)) break; @@ -680,6 +735,38 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem, bool wlock) break; /* + * Time-based reader-owned rwsem optimistic spinning + */ + if (wlock && (owner_state == OWNER_READER)) { + /* + * Re-initialize rspin_threshold every time when + * the owner state changes from non-reader to reader. + * This allows a writer to steal the lock in between + * 2 reader phases and have the threshold reset at + * the beginning of the 2nd reader phase. + */ + if (prev_owner_state != OWNER_READER) { + if (rwsem_test_oflags(sem, nonspinnable)) + break; + rspin_threshold = rwsem_rspin_threshold(sem); + loop = 0; + } + + /* + * Check time threshold once every 16 iterations to + * avoid calling sched_clock() too frequently so + * as to reduce the average latency between the times + * when the lock becomes free and when the spinner + * is ready to do a trylock. + */ + else if (!(++loop & 0xf) && (sched_clock() > rspin_threshold)) { + rwsem_set_nonspinnable(sem); + lockevent_inc(rwsem_opt_nospin); + break; + } + } + + /* * An RT task cannot do optimistic spinning if it cannot * be sure the lock holder is running or live-lock may * happen if the current task and the lock holder happen @@ -733,8 +820,25 @@ done: lockevent_cond_inc(rwsem_opt_fail, !taken); return taken; } + +/* + * Clear the owner's RWSEM_WR_NONSPINNABLE bit if it is set. This should + * only be called when the reader count reaches 0. + * + * This give writers better chance to acquire the rwsem first before + * readers when the rwsem was being held by readers for a relatively long + * period of time. Race can happen that an optimistic spinner may have + * just stolen the rwsem and set the owner, but just clearing the + * RWSEM_WR_NONSPINNABLE bit will do no harm anyway. + */ +static inline void clear_wr_nonspinnable(struct rw_semaphore *sem) +{ + if (rwsem_test_oflags(sem, RWSEM_WR_NONSPINNABLE)) + atomic_long_andnot(RWSEM_WR_NONSPINNABLE, &sem->owner); +} #else -static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) +static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem, + unsigned long nonspinnable) { return false; } @@ -743,6 +847,8 @@ static inline bool rwsem_optimistic_spin(struct rw_semaphore *sem, bool wlock) { return false; } + +static inline void clear_wr_nonspinnable(struct rw_semaphore *sem) { } #endif /* @@ -752,10 +858,11 @@ static struct rw_semaphore __sched * rwsem_down_read_slowpath(struct rw_semaphore *sem, int state) { long count, adjustment = -RWSEM_READER_BIAS; + bool wake = false; struct rwsem_waiter waiter; DEFINE_WAKE_Q(wake_q); - if (!rwsem_can_spin_on_owner(sem)) + if (!rwsem_can_spin_on_owner(sem, RWSEM_RD_NONSPINNABLE)) goto queue; /* @@ -815,8 +922,12 @@ queue: * If there are no writers and we are first in the queue, * wake our own waiter to join the existing active readers ! */ - if (!(count & RWSEM_LOCK_MASK) || - (!(count & RWSEM_WRITER_MASK) && (adjustment & RWSEM_FLAG_WAITERS))) + if (!(count & RWSEM_LOCK_MASK)) { + clear_wr_nonspinnable(sem); + wake = true; + } + if (wake || (!(count & RWSEM_WRITER_MASK) && + (adjustment & RWSEM_FLAG_WAITERS))) rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); raw_spin_unlock_irq(&sem->wait_lock); @@ -866,7 +977,7 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state) DEFINE_WAKE_Q(wake_q); /* do optimistic spinning and steal lock if possible */ - if (rwsem_can_spin_on_owner(sem) && + if (rwsem_can_spin_on_owner(sem, RWSEM_WR_NONSPINNABLE) && rwsem_optimistic_spin(sem, true)) return sem; @@ -1124,8 +1235,10 @@ inline void __up_read(struct rw_semaphore *sem) rwsem_clear_reader_owned(sem); tmp = atomic_long_add_return_release(-RWSEM_READER_BIAS, &sem->count); if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS)) == - RWSEM_FLAG_WAITERS)) + RWSEM_FLAG_WAITERS)) { + clear_wr_nonspinnable(sem); rwsem_wake(sem, tmp); + } } /* |