diff options
Diffstat (limited to 'kernel/locking')
-rw-r--r-- | kernel/locking/lockdep.c | 174 | ||||
-rw-r--r-- | kernel/locking/lockdep_internals.h | 9 | ||||
-rw-r--r-- | kernel/locking/lockdep_proc.c | 11 | ||||
-rw-r--r-- | kernel/locking/mutex.c | 37 | ||||
-rw-r--r-- | kernel/locking/rtmutex.c | 6 | ||||
-rw-r--r-- | kernel/locking/rwsem.c | 82 |
6 files changed, 248 insertions, 71 deletions
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 341f52117f88..233459c03b5a 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -448,34 +448,102 @@ static void print_lockdep_off(const char *bug_msg) unsigned long nr_stack_trace_entries; -#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) +#ifdef CONFIG_PROVE_LOCKING +/** + * struct lock_trace - single stack backtrace + * @hash_entry: Entry in a stack_trace_hash[] list. + * @hash: jhash() of @entries. + * @nr_entries: Number of entries in @entries. + * @entries: Actual stack backtrace. + */ +struct lock_trace { + struct hlist_node hash_entry; + u32 hash; + u32 nr_entries; + unsigned long entries[0] __aligned(sizeof(unsigned long)); +}; +#define LOCK_TRACE_SIZE_IN_LONGS \ + (sizeof(struct lock_trace) / sizeof(unsigned long)) /* - * Stack-trace: tightly packed array of stack backtrace - * addresses. Protected by the graph_lock. + * Stack-trace: sequence of lock_trace structures. Protected by the graph_lock. */ static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES]; +static struct hlist_head stack_trace_hash[STACK_TRACE_HASH_SIZE]; + +static bool traces_identical(struct lock_trace *t1, struct lock_trace *t2) +{ + return t1->hash == t2->hash && t1->nr_entries == t2->nr_entries && + memcmp(t1->entries, t2->entries, + t1->nr_entries * sizeof(t1->entries[0])) == 0; +} -static int save_trace(struct lock_trace *trace) +static struct lock_trace *save_trace(void) { - unsigned long *entries = stack_trace + nr_stack_trace_entries; + struct lock_trace *trace, *t2; + struct hlist_head *hash_head; + u32 hash; unsigned int max_entries; - trace->offset = nr_stack_trace_entries; - max_entries = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries; - trace->nr_entries = stack_trace_save(entries, max_entries, 3); - nr_stack_trace_entries += trace->nr_entries; + BUILD_BUG_ON_NOT_POWER_OF_2(STACK_TRACE_HASH_SIZE); + BUILD_BUG_ON(LOCK_TRACE_SIZE_IN_LONGS >= MAX_STACK_TRACE_ENTRIES); - if (nr_stack_trace_entries >= MAX_STACK_TRACE_ENTRIES-1) { + trace = (struct lock_trace *)(stack_trace + nr_stack_trace_entries); + max_entries = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries - + LOCK_TRACE_SIZE_IN_LONGS; + trace->nr_entries = stack_trace_save(trace->entries, max_entries, 3); + + if (nr_stack_trace_entries >= MAX_STACK_TRACE_ENTRIES - + LOCK_TRACE_SIZE_IN_LONGS - 1) { if (!debug_locks_off_graph_unlock()) - return 0; + return NULL; print_lockdep_off("BUG: MAX_STACK_TRACE_ENTRIES too low!"); dump_stack(); - return 0; + return NULL; } - return 1; + hash = jhash(trace->entries, trace->nr_entries * + sizeof(trace->entries[0]), 0); + trace->hash = hash; + hash_head = stack_trace_hash + (hash & (STACK_TRACE_HASH_SIZE - 1)); + hlist_for_each_entry(t2, hash_head, hash_entry) { + if (traces_identical(trace, t2)) + return t2; + } + nr_stack_trace_entries += LOCK_TRACE_SIZE_IN_LONGS + trace->nr_entries; + hlist_add_head(&trace->hash_entry, hash_head); + + return trace; +} + +/* Return the number of stack traces in the stack_trace[] array. */ +u64 lockdep_stack_trace_count(void) +{ + struct lock_trace *trace; + u64 c = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(stack_trace_hash); i++) { + hlist_for_each_entry(trace, &stack_trace_hash[i], hash_entry) { + c++; + } + } + + return c; +} + +/* Return the number of stack hash chains that have at least one stack trace. */ +u64 lockdep_stack_hash_count(void) +{ + u64 c = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(stack_trace_hash); i++) + if (!hlist_empty(&stack_trace_hash[i])) + c++; + + return c; } #endif @@ -491,7 +559,7 @@ unsigned int max_lockdep_depth; DEFINE_PER_CPU(struct lockdep_stats, lockdep_stats); #endif -#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) +#ifdef CONFIG_PROVE_LOCKING /* * Locking printouts: */ @@ -511,7 +579,7 @@ static const char *usage_str[] = }; #endif -const char * __get_key_name(struct lockdep_subclass_key *key, char *str) +const char *__get_key_name(const struct lockdep_subclass_key *key, char *str) { return kallsyms_lookup((unsigned long)key, NULL, NULL, NULL, str); } @@ -620,7 +688,7 @@ static void print_lock(struct held_lock *hlock) return; } - printk(KERN_CONT "%p", hlock->instance); + printk(KERN_CONT "%px", hlock->instance); print_lock_name(lock); printk(KERN_CONT ", at: %pS\n", (void *)hlock->acquire_ip); } @@ -1235,7 +1303,7 @@ static struct lock_list *alloc_list_entry(void) static int add_lock_to_list(struct lock_class *this, struct lock_class *links_to, struct list_head *head, unsigned long ip, int distance, - struct lock_trace *trace) + const struct lock_trace *trace) { struct lock_list *entry; /* @@ -1249,7 +1317,7 @@ static int add_lock_to_list(struct lock_class *this, entry->class = this; entry->links_to = links_to; entry->distance = distance; - entry->trace = *trace; + entry->trace = trace; /* * Both allocation and removal are done under the graph lock; but * iteration is under RCU-sched; see look_up_lock_class() and @@ -1470,11 +1538,10 @@ static inline int __bfs_backwards(struct lock_list *src_entry, } -static void print_lock_trace(struct lock_trace *trace, unsigned int spaces) +static void print_lock_trace(const struct lock_trace *trace, + unsigned int spaces) { - unsigned long *entries = stack_trace + trace->offset; - - stack_trace_print(entries, trace->nr_entries, spaces); + stack_trace_print(trace->entries, trace->nr_entries, spaces); } /* @@ -1489,7 +1556,7 @@ print_circular_bug_entry(struct lock_list *target, int depth) printk("\n-> #%u", depth); print_lock_name(target->class); printk(KERN_CONT ":\n"); - print_lock_trace(&target->trace, 6); + print_lock_trace(target->trace, 6); } static void @@ -1592,7 +1659,8 @@ static noinline void print_circular_bug(struct lock_list *this, if (!debug_locks_off_graph_unlock() || debug_locks_silent) return; - if (!save_trace(&this->trace)) + this->trace = save_trace(); + if (!this->trace) return; depth = get_lock_depth(target); @@ -1715,7 +1783,7 @@ check_path(struct lock_class *target, struct lock_list *src_entry, */ static noinline int check_noncircular(struct held_lock *src, struct held_lock *target, - struct lock_trace *trace) + struct lock_trace **const trace) { int ret; struct lock_list *uninitialized_var(target_entry); @@ -1729,13 +1797,13 @@ check_noncircular(struct held_lock *src, struct held_lock *target, ret = check_path(hlock_class(target), &src_entry, &target_entry); if (unlikely(!ret)) { - if (!trace->nr_entries) { + if (!*trace) { /* * If save_trace fails here, the printing might * trigger a WARN but because of the !nr_entries it * should not do bad things. */ - save_trace(trace); + *trace = save_trace(); } print_circular_bug(&src_entry, target_entry, src, target); @@ -1859,7 +1927,7 @@ static void print_lock_class_header(struct lock_class *class, int depth) len += printk("%*s %s", depth, "", usage_str[bit]); len += printk(KERN_CONT " at:\n"); - print_lock_trace(class->usage_traces + bit, len); + print_lock_trace(class->usage_traces[bit], len); } } printk("%*s }\n", depth, ""); @@ -1884,7 +1952,7 @@ print_shortest_lock_dependencies(struct lock_list *leaf, do { print_lock_class_header(entry->class, depth); printk("%*s ... acquired at:\n", depth, ""); - print_lock_trace(&entry->trace, 2); + print_lock_trace(entry->trace, 2); printk("\n"); if (depth == 0 && (entry != root)) { @@ -1995,14 +2063,14 @@ print_bad_irq_dependency(struct task_struct *curr, print_lock_name(backwards_entry->class); pr_warn("\n... which became %s-irq-safe at:\n", irqclass); - print_lock_trace(backwards_entry->class->usage_traces + bit1, 1); + print_lock_trace(backwards_entry->class->usage_traces[bit1], 1); pr_warn("\nto a %s-irq-unsafe lock:\n", irqclass); print_lock_name(forwards_entry->class); pr_warn("\n... which became %s-irq-unsafe at:\n", irqclass); pr_warn("..."); - print_lock_trace(forwards_entry->class->usage_traces + bit2, 1); + print_lock_trace(forwards_entry->class->usage_traces[bit2], 1); pr_warn("\nother info that might help us debug this:\n\n"); print_irq_lock_scenario(backwards_entry, forwards_entry, @@ -2011,13 +2079,15 @@ print_bad_irq_dependency(struct task_struct *curr, lockdep_print_held_locks(curr); pr_warn("\nthe dependencies between %s-irq-safe lock and the holding lock:\n", irqclass); - if (!save_trace(&prev_root->trace)) + prev_root->trace = save_trace(); + if (!prev_root->trace) return; print_shortest_lock_dependencies(backwards_entry, prev_root); pr_warn("\nthe dependencies between the lock to be acquired"); pr_warn(" and %s-irq-unsafe lock:\n", irqclass); - if (!save_trace(&next_root->trace)) + next_root->trace = save_trace(); + if (!next_root->trace) return; print_shortest_lock_dependencies(forwards_entry, next_root); @@ -2369,7 +2439,8 @@ check_deadlock(struct task_struct *curr, struct held_lock *next) */ static int check_prev_add(struct task_struct *curr, struct held_lock *prev, - struct held_lock *next, int distance, struct lock_trace *trace) + struct held_lock *next, int distance, + struct lock_trace **const trace) { struct lock_list *entry; int ret; @@ -2444,8 +2515,11 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, return ret; #endif - if (!trace->nr_entries && !save_trace(trace)) - return 0; + if (!*trace) { + *trace = save_trace(); + if (!*trace) + return 0; + } /* * Ok, all validations passed, add the new lock @@ -2453,14 +2527,14 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, */ ret = add_lock_to_list(hlock_class(next), hlock_class(prev), &hlock_class(prev)->locks_after, - next->acquire_ip, distance, trace); + next->acquire_ip, distance, *trace); if (!ret) return 0; ret = add_lock_to_list(hlock_class(prev), hlock_class(next), &hlock_class(next)->locks_before, - next->acquire_ip, distance, trace); + next->acquire_ip, distance, *trace); if (!ret) return 0; @@ -2476,7 +2550,7 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, static int check_prevs_add(struct task_struct *curr, struct held_lock *next) { - struct lock_trace trace = { .nr_entries = 0 }; + struct lock_trace *trace = NULL; int depth = curr->lockdep_depth; struct held_lock *hlock; @@ -2969,7 +3043,7 @@ static void check_chain_key(struct task_struct *curr) #endif } -#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) +#ifdef CONFIG_PROVE_LOCKING static int mark_lock(struct task_struct *curr, struct held_lock *this, enum lock_usage_bit new_bit); @@ -3015,7 +3089,7 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this, print_lock(this); pr_warn("{%s} state was registered at:\n", usage_str[prev_bit]); - print_lock_trace(hlock_class(this)->usage_traces + prev_bit, 1); + print_lock_trace(hlock_class(this)->usage_traces[prev_bit], 1); print_irqtrace_events(curr); pr_warn("\nother info that might help us debug this:\n"); @@ -3096,7 +3170,8 @@ print_irq_inversion_bug(struct task_struct *curr, lockdep_print_held_locks(curr); pr_warn("\nthe shortest dependencies between 2nd lock and 1st lock:\n"); - if (!save_trace(&root->trace)) + root->trace = save_trace(); + if (!root->trace) return; print_shortest_lock_dependencies(other, root); @@ -3580,7 +3655,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, hlock_class(this)->usage_mask |= new_mask; - if (!save_trace(hlock_class(this)->usage_traces + new_bit)) + if (!(hlock_class(this)->usage_traces[new_bit] = save_trace())) return 0; switch (new_bit) { @@ -3608,7 +3683,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, return ret; } -#else /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */ +#else /* CONFIG_PROVE_LOCKING */ static inline int mark_usage(struct task_struct *curr, struct held_lock *hlock, int check) @@ -3627,7 +3702,7 @@ static inline int separate_irq_context(struct task_struct *curr, return 0; } -#endif /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */ +#endif /* CONFIG_PROVE_LOCKING */ /* * Initialize a lock instance's lock-class mapping info: @@ -4321,8 +4396,7 @@ static void __lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie cookie */ static void check_flags(unsigned long flags) { -#if defined(CONFIG_PROVE_LOCKING) && defined(CONFIG_DEBUG_LOCKDEP) && \ - defined(CONFIG_TRACE_IRQFLAGS) +#if defined(CONFIG_PROVE_LOCKING) && defined(CONFIG_DEBUG_LOCKDEP) if (!debug_locks) return; @@ -5158,6 +5232,12 @@ void __init lockdep_init(void) ) / 1024 ); +#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) + printk(" memory used for stack traces: %zu kB\n", + (sizeof(stack_trace) + sizeof(stack_trace_hash)) / 1024 + ); +#endif + printk(" per task-struct memory footprint: %zu bytes\n", sizeof(((struct task_struct *)NULL)->held_locks)); } diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h index cc83568d5012..18d85aebbb57 100644 --- a/kernel/locking/lockdep_internals.h +++ b/kernel/locking/lockdep_internals.h @@ -92,6 +92,7 @@ static const unsigned long LOCKF_USED_IN_IRQ_READ = #define MAX_LOCKDEP_ENTRIES 16384UL #define MAX_LOCKDEP_CHAINS_BITS 15 #define MAX_STACK_TRACE_ENTRIES 262144UL +#define STACK_TRACE_HASH_SIZE 8192 #else #define MAX_LOCKDEP_ENTRIES 32768UL @@ -102,6 +103,7 @@ static const unsigned long LOCKF_USED_IN_IRQ_READ = * addresses. Protected by the hash_lock. */ #define MAX_STACK_TRACE_ENTRIES 524288UL +#define STACK_TRACE_HASH_SIZE 16384 #endif #define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) @@ -116,7 +118,8 @@ extern struct lock_chain lock_chains[]; extern void get_usage_chars(struct lock_class *class, char usage[LOCK_USAGE_CHARS]); -extern const char * __get_key_name(struct lockdep_subclass_key *key, char *str); +extern const char *__get_key_name(const struct lockdep_subclass_key *key, + char *str); struct lock_class *lock_chain_get_class(struct lock_chain *chain, int i); @@ -137,6 +140,10 @@ extern unsigned int max_bfs_queue_depth; #ifdef CONFIG_PROVE_LOCKING extern unsigned long lockdep_count_forward_deps(struct lock_class *); extern unsigned long lockdep_count_backward_deps(struct lock_class *); +#ifdef CONFIG_TRACE_IRQFLAGS +u64 lockdep_stack_trace_count(void); +u64 lockdep_stack_hash_count(void); +#endif #else static inline unsigned long lockdep_count_forward_deps(struct lock_class *class) diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c index 65b6a1600c8f..dadb7b7fba37 100644 --- a/kernel/locking/lockdep_proc.c +++ b/kernel/locking/lockdep_proc.c @@ -200,7 +200,6 @@ static void lockdep_stats_debug_show(struct seq_file *m) static int lockdep_stats_show(struct seq_file *m, void *v) { - struct lock_class *class; unsigned long nr_unused = 0, nr_uncategorized = 0, nr_irq_safe = 0, nr_irq_unsafe = 0, nr_softirq_safe = 0, nr_softirq_unsafe = 0, @@ -211,6 +210,8 @@ static int lockdep_stats_show(struct seq_file *m, void *v) sum_forward_deps = 0; #ifdef CONFIG_PROVE_LOCKING + struct lock_class *class; + list_for_each_entry(class, &all_lock_classes, lock_entry) { if (class->usage_mask == 0) @@ -284,6 +285,12 @@ static int lockdep_stats_show(struct seq_file *m, void *v) nr_process_chains); seq_printf(m, " stack-trace entries: %11lu [max: %lu]\n", nr_stack_trace_entries, MAX_STACK_TRACE_ENTRIES); +#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) + seq_printf(m, " number of stack traces: %llu\n", + lockdep_stack_trace_count()); + seq_printf(m, " number of stack hash chains: %llu\n", + lockdep_stack_hash_count()); +#endif seq_printf(m, " combined max dependencies: %11u\n", (nr_hardirq_chains + 1) * (nr_softirq_chains + 1) * @@ -398,7 +405,7 @@ static void seq_lock_time(struct seq_file *m, struct lock_time *lt) static void seq_stats(struct seq_file *m, struct lock_stat_data *data) { - struct lockdep_subclass_key *ckey; + const struct lockdep_subclass_key *ckey; struct lock_class_stats *stats; struct lock_class *class; const char *cname; diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index edd1c082dbf5..468a9b8422e3 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -65,11 +65,37 @@ EXPORT_SYMBOL(__mutex_init); #define MUTEX_FLAGS 0x07 +/* + * Internal helper function; C doesn't allow us to hide it :/ + * + * DO NOT USE (outside of mutex code). + */ +static inline struct task_struct *__mutex_owner(struct mutex *lock) +{ + return (struct task_struct *)(atomic_long_read(&lock->owner) & ~MUTEX_FLAGS); +} + static inline struct task_struct *__owner_task(unsigned long owner) { return (struct task_struct *)(owner & ~MUTEX_FLAGS); } +bool mutex_is_locked(struct mutex *lock) +{ + return __mutex_owner(lock) != NULL; +} +EXPORT_SYMBOL(mutex_is_locked); + +__must_check enum mutex_trylock_recursive_enum +mutex_trylock_recursive(struct mutex *lock) +{ + if (unlikely(__mutex_owner(lock) == current)) + return MUTEX_TRYLOCK_RECURSIVE; + + return mutex_trylock(lock); +} +EXPORT_SYMBOL(mutex_trylock_recursive); + static inline unsigned long __owner_flags(unsigned long owner) { return owner & MUTEX_FLAGS; @@ -908,6 +934,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, might_sleep(); +#ifdef CONFIG_DEBUG_MUTEXES + DEBUG_LOCKS_WARN_ON(lock->magic != lock); +#endif + ww = container_of(lock, struct ww_mutex, base); if (use_ww_ctx && ww_ctx) { if (unlikely(ww_ctx == READ_ONCE(ww->ctx))) @@ -1379,8 +1409,13 @@ __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock, */ int __sched mutex_trylock(struct mutex *lock) { - bool locked = __mutex_trylock(lock); + bool locked; + +#ifdef CONFIG_DEBUG_MUTEXES + DEBUG_LOCKS_WARN_ON(lock->magic != lock); +#endif + locked = __mutex_trylock(lock); if (locked) mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index fa83d36e30c6..2874bf556162 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -628,8 +628,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, } /* [10] Grab the next task, i.e. owner of @lock */ - task = rt_mutex_owner(lock); - get_task_struct(task); + task = get_task_struct(rt_mutex_owner(lock)); raw_spin_lock(&task->pi_lock); /* @@ -709,8 +708,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, } /* [10] Grab the next task, i.e. the owner of @lock */ - task = rt_mutex_owner(lock); - get_task_struct(task); + task = get_task_struct(rt_mutex_owner(lock)); raw_spin_lock(&task->pi_lock); /* [11] requeue the pi waiters if necessary */ diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 37524a47f002..eef04551eae7 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -105,8 +105,9 @@ #ifdef CONFIG_DEBUG_RWSEMS # define DEBUG_RWSEMS_WARN_ON(c, sem) do { \ if (!debug_locks_silent && \ - WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\ + WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, magic = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\ #c, atomic_long_read(&(sem)->count), \ + (unsigned long) sem->magic, \ atomic_long_read(&(sem)->owner), (long)current, \ list_empty(&(sem)->wait_list) ? "" : "not ")) \ debug_locks_off(); \ @@ -330,6 +331,9 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name, debug_check_no_locks_freed((void *)sem, sizeof(*sem)); lockdep_init_map(&sem->dep_map, name, key, 0); #endif +#ifdef CONFIG_DEBUG_RWSEMS + sem->magic = sem; +#endif atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE); raw_spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); @@ -666,7 +670,11 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem, preempt_disable(); rcu_read_lock(); owner = rwsem_owner_flags(sem, &flags); - if ((flags & nonspinnable) || (owner && !owner_on_cpu(owner))) + /* + * Don't check the read-owner as the entry may be stale. + */ + if ((flags & nonspinnable) || + (owner && !(flags & RWSEM_READER_OWNED) && !owner_on_cpu(owner))) ret = false; rcu_read_unlock(); preempt_enable(); @@ -720,11 +728,12 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable) rcu_read_lock(); for (;;) { - if (atomic_long_read(&sem->count) & RWSEM_FLAG_HANDOFF) { - state = OWNER_NONSPINNABLE; - break; - } - + /* + * When a waiting writer set the handoff flag, it may spin + * on the owner as well. Once that writer acquires the lock, + * we can spin on it. So we don't need to quit even when the + * handoff bit is set. + */ new = rwsem_owner_flags(sem, &new_flags); if ((new != owner) || (new_flags != flags)) { state = rwsem_owner_state(new, new_flags, nonspinnable); @@ -970,6 +979,13 @@ static inline bool rwsem_reader_phase_trylock(struct rw_semaphore *sem, { return false; } + +static inline int +rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable) +{ + return 0; +} +#define OWNER_NULL 1 #endif /* @@ -1000,6 +1016,7 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, int state) atomic_long_add(-RWSEM_READER_BIAS, &sem->count); adjustment = 0; if (rwsem_optimistic_spin(sem, false)) { + /* rwsem_optimistic_spin() implies ACQUIRE on success */ /* * Wake up other readers in the wait list if the front * waiter is a reader. @@ -1014,6 +1031,7 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, int state) } return sem; } else if (rwsem_reader_phase_trylock(sem, waiter.last_rowner)) { + /* rwsem_reader_phase_trylock() implies ACQUIRE on success */ return sem; } @@ -1032,6 +1050,8 @@ queue: */ if (adjustment && !(atomic_long_read(&sem->count) & (RWSEM_WRITER_MASK | RWSEM_FLAG_HANDOFF))) { + /* Provide lock ACQUIRE */ + smp_acquire__after_ctrl_dep(); raw_spin_unlock_irq(&sem->wait_lock); rwsem_set_reader_owned(sem); lockevent_inc(rwsem_rlock_fast); @@ -1065,15 +1085,18 @@ queue: wake_up_q(&wake_q); /* wait to be given the lock */ - while (true) { + for (;;) { set_current_state(state); - if (!waiter.task) + if (!smp_load_acquire(&waiter.task)) { + /* Matches rwsem_mark_wake()'s smp_store_release(). */ break; + } if (signal_pending_state(state, current)) { raw_spin_lock_irq(&sem->wait_lock); if (waiter.task) goto out_nolock; raw_spin_unlock_irq(&sem->wait_lock); + /* Ordered by sem->wait_lock against rwsem_mark_wake(). */ break; } schedule(); @@ -1083,6 +1106,7 @@ queue: __set_current_state(TASK_RUNNING); lockevent_inc(rwsem_rlock); return sem; + out_nolock: list_del(&waiter.list); if (list_empty(&sem->wait_list)) { @@ -1123,8 +1147,10 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state) /* do optimistic spinning and steal lock if possible */ if (rwsem_can_spin_on_owner(sem, RWSEM_WR_NONSPINNABLE) && - rwsem_optimistic_spin(sem, true)) + rwsem_optimistic_spin(sem, true)) { + /* rwsem_optimistic_spin() implies ACQUIRE on success */ return sem; + } /* * Disable reader optimistic spinning for this rwsem after @@ -1184,12 +1210,26 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state) wait: /* wait until we successfully acquire the lock */ set_current_state(state); - while (true) { - if (rwsem_try_write_lock(sem, wstate)) + for (;;) { + if (rwsem_try_write_lock(sem, wstate)) { + /* rwsem_try_write_lock() implies ACQUIRE on success */ break; + } raw_spin_unlock_irq(&sem->wait_lock); + /* + * After setting the handoff bit and failing to acquire + * the lock, attempt to spin on owner to accelerate lock + * transfer. If the previous owner is a on-cpu writer and it + * has just released the lock, OWNER_NULL will be returned. + * In this case, we attempt to acquire the lock again + * without sleeping. + */ + if ((wstate == WRITER_HANDOFF) && + (rwsem_spin_on_owner(sem, 0) == OWNER_NULL)) + goto trylock_again; + /* Block until there are no active lockers. */ for (;;) { if (signal_pending_state(state, current)) @@ -1224,7 +1264,7 @@ wait: break; } } - +trylock_again: raw_spin_lock_irq(&sem->wait_lock); } __set_current_state(TASK_RUNNING); @@ -1322,11 +1362,14 @@ static inline int __down_read_killable(struct rw_semaphore *sem) static inline int __down_read_trylock(struct rw_semaphore *sem) { + long tmp; + + DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem); + /* * Optimize for the case when the rwsem is not locked at all. */ - long tmp = RWSEM_UNLOCKED_VALUE; - + tmp = RWSEM_UNLOCKED_VALUE; do { if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp, tmp + RWSEM_READER_BIAS)) { @@ -1367,8 +1410,11 @@ static inline int __down_write_killable(struct rw_semaphore *sem) static inline int __down_write_trylock(struct rw_semaphore *sem) { - long tmp = RWSEM_UNLOCKED_VALUE; + long tmp; + + DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem); + tmp = RWSEM_UNLOCKED_VALUE; if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp, RWSEM_WRITER_LOCKED)) { rwsem_set_owner(sem); @@ -1384,7 +1430,9 @@ inline void __up_read(struct rw_semaphore *sem) { long tmp; + DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem); DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem); + rwsem_clear_reader_owned(sem); tmp = atomic_long_add_return_release(-RWSEM_READER_BIAS, &sem->count); DEBUG_RWSEMS_WARN_ON(tmp < 0, sem); @@ -1402,12 +1450,14 @@ static inline void __up_write(struct rw_semaphore *sem) { long tmp; + DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem); /* * sem->owner may differ from current if the ownership is transferred * to an anonymous writer by setting the RWSEM_NONSPINNABLE bits. */ DEBUG_RWSEMS_WARN_ON((rwsem_owner(sem) != current) && !rwsem_test_oflags(sem, RWSEM_NONSPINNABLE), sem); + rwsem_clear_owner(sem); tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count); if (unlikely(tmp & RWSEM_FLAG_WAITERS)) |