diff options
Diffstat (limited to 'kernel/locking')
-rw-r--r-- | kernel/locking/lockdep.c | 169 | ||||
-rw-r--r-- | kernel/locking/lockdep_internals.h | 9 | ||||
-rw-r--r-- | kernel/locking/lockdep_proc.c | 23 | ||||
-rw-r--r-- | kernel/locking/locktorture.c | 9 | ||||
-rw-r--r-- | kernel/locking/mutex.c | 30 | ||||
-rw-r--r-- | kernel/locking/osq_lock.c | 23 | ||||
-rw-r--r-- | kernel/locking/qspinlock.c | 13 | ||||
-rw-r--r-- | kernel/locking/qspinlock_paravirt.h | 2 | ||||
-rw-r--r-- | kernel/locking/rtmutex.c | 12 | ||||
-rw-r--r-- | kernel/locking/rwsem.c | 64 | ||||
-rw-r--r-- | kernel/locking/spinlock_debug.c | 32 |
11 files changed, 265 insertions, 121 deletions
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 4861cf8e274b..32406ef0d6a2 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -449,33 +449,100 @@ static void print_lockdep_off(const char *bug_msg) unsigned long nr_stack_trace_entries; #ifdef CONFIG_PROVE_LOCKING +/** + * struct lock_trace - single stack backtrace + * @hash_entry: Entry in a stack_trace_hash[] list. + * @hash: jhash() of @entries. + * @nr_entries: Number of entries in @entries. + * @entries: Actual stack backtrace. + */ +struct lock_trace { + struct hlist_node hash_entry; + u32 hash; + u32 nr_entries; + unsigned long entries[0] __aligned(sizeof(unsigned long)); +}; +#define LOCK_TRACE_SIZE_IN_LONGS \ + (sizeof(struct lock_trace) / sizeof(unsigned long)) /* - * Stack-trace: tightly packed array of stack backtrace - * addresses. Protected by the graph_lock. + * Stack-trace: sequence of lock_trace structures. Protected by the graph_lock. */ static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES]; +static struct hlist_head stack_trace_hash[STACK_TRACE_HASH_SIZE]; + +static bool traces_identical(struct lock_trace *t1, struct lock_trace *t2) +{ + return t1->hash == t2->hash && t1->nr_entries == t2->nr_entries && + memcmp(t1->entries, t2->entries, + t1->nr_entries * sizeof(t1->entries[0])) == 0; +} -static int save_trace(struct lock_trace *trace) +static struct lock_trace *save_trace(void) { - unsigned long *entries = stack_trace + nr_stack_trace_entries; - unsigned int max_entries; + struct lock_trace *trace, *t2; + struct hlist_head *hash_head; + u32 hash; + int max_entries; + + BUILD_BUG_ON_NOT_POWER_OF_2(STACK_TRACE_HASH_SIZE); + BUILD_BUG_ON(LOCK_TRACE_SIZE_IN_LONGS >= MAX_STACK_TRACE_ENTRIES); - trace->offset = nr_stack_trace_entries; - max_entries = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries; - trace->nr_entries = stack_trace_save(entries, max_entries, 3); - nr_stack_trace_entries += trace->nr_entries; + trace = (struct lock_trace *)(stack_trace + nr_stack_trace_entries); + max_entries = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries - + LOCK_TRACE_SIZE_IN_LONGS; - if (nr_stack_trace_entries >= MAX_STACK_TRACE_ENTRIES-1) { + if (max_entries <= 0) { if (!debug_locks_off_graph_unlock()) - return 0; + return NULL; print_lockdep_off("BUG: MAX_STACK_TRACE_ENTRIES too low!"); dump_stack(); - return 0; + return NULL; + } + trace->nr_entries = stack_trace_save(trace->entries, max_entries, 3); + + hash = jhash(trace->entries, trace->nr_entries * + sizeof(trace->entries[0]), 0); + trace->hash = hash; + hash_head = stack_trace_hash + (hash & (STACK_TRACE_HASH_SIZE - 1)); + hlist_for_each_entry(t2, hash_head, hash_entry) { + if (traces_identical(trace, t2)) + return t2; } + nr_stack_trace_entries += LOCK_TRACE_SIZE_IN_LONGS + trace->nr_entries; + hlist_add_head(&trace->hash_entry, hash_head); - return 1; + return trace; +} + +/* Return the number of stack traces in the stack_trace[] array. */ +u64 lockdep_stack_trace_count(void) +{ + struct lock_trace *trace; + u64 c = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(stack_trace_hash); i++) { + hlist_for_each_entry(trace, &stack_trace_hash[i], hash_entry) { + c++; + } + } + + return c; +} + +/* Return the number of stack hash chains that have at least one stack trace. */ +u64 lockdep_stack_hash_count(void) +{ + u64 c = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(stack_trace_hash); i++) + if (!hlist_empty(&stack_trace_hash[i])) + c++; + + return c; } #endif @@ -511,7 +578,7 @@ static const char *usage_str[] = }; #endif -const char * __get_key_name(struct lockdep_subclass_key *key, char *str) +const char *__get_key_name(const struct lockdep_subclass_key *key, char *str) { return kallsyms_lookup((unsigned long)key, NULL, NULL, NULL, str); } @@ -620,7 +687,7 @@ static void print_lock(struct held_lock *hlock) return; } - printk(KERN_CONT "%p", hlock->instance); + printk(KERN_CONT "%px", hlock->instance); print_lock_name(lock); printk(KERN_CONT ", at: %pS\n", (void *)hlock->acquire_ip); } @@ -1235,7 +1302,7 @@ static struct lock_list *alloc_list_entry(void) static int add_lock_to_list(struct lock_class *this, struct lock_class *links_to, struct list_head *head, unsigned long ip, int distance, - struct lock_trace *trace) + const struct lock_trace *trace) { struct lock_list *entry; /* @@ -1249,7 +1316,7 @@ static int add_lock_to_list(struct lock_class *this, entry->class = this; entry->links_to = links_to; entry->distance = distance; - entry->trace = *trace; + entry->trace = trace; /* * Both allocation and removal are done under the graph lock; but * iteration is under RCU-sched; see look_up_lock_class() and @@ -1470,11 +1537,10 @@ static inline int __bfs_backwards(struct lock_list *src_entry, } -static void print_lock_trace(struct lock_trace *trace, unsigned int spaces) +static void print_lock_trace(const struct lock_trace *trace, + unsigned int spaces) { - unsigned long *entries = stack_trace + trace->offset; - - stack_trace_print(entries, trace->nr_entries, spaces); + stack_trace_print(trace->entries, trace->nr_entries, spaces); } /* @@ -1489,7 +1555,7 @@ print_circular_bug_entry(struct lock_list *target, int depth) printk("\n-> #%u", depth); print_lock_name(target->class); printk(KERN_CONT ":\n"); - print_lock_trace(&target->trace, 6); + print_lock_trace(target->trace, 6); } static void @@ -1592,7 +1658,8 @@ static noinline void print_circular_bug(struct lock_list *this, if (!debug_locks_off_graph_unlock() || debug_locks_silent) return; - if (!save_trace(&this->trace)) + this->trace = save_trace(); + if (!this->trace) return; depth = get_lock_depth(target); @@ -1715,7 +1782,7 @@ check_path(struct lock_class *target, struct lock_list *src_entry, */ static noinline int check_noncircular(struct held_lock *src, struct held_lock *target, - struct lock_trace *trace) + struct lock_trace **const trace) { int ret; struct lock_list *uninitialized_var(target_entry); @@ -1729,13 +1796,13 @@ check_noncircular(struct held_lock *src, struct held_lock *target, ret = check_path(hlock_class(target), &src_entry, &target_entry); if (unlikely(!ret)) { - if (!trace->nr_entries) { + if (!*trace) { /* * If save_trace fails here, the printing might * trigger a WARN but because of the !nr_entries it * should not do bad things. */ - save_trace(trace); + *trace = save_trace(); } print_circular_bug(&src_entry, target_entry, src, target); @@ -1859,7 +1926,7 @@ static void print_lock_class_header(struct lock_class *class, int depth) len += printk("%*s %s", depth, "", usage_str[bit]); len += printk(KERN_CONT " at:\n"); - print_lock_trace(class->usage_traces + bit, len); + print_lock_trace(class->usage_traces[bit], len); } } printk("%*s }\n", depth, ""); @@ -1884,7 +1951,7 @@ print_shortest_lock_dependencies(struct lock_list *leaf, do { print_lock_class_header(entry->class, depth); printk("%*s ... acquired at:\n", depth, ""); - print_lock_trace(&entry->trace, 2); + print_lock_trace(entry->trace, 2); printk("\n"); if (depth == 0 && (entry != root)) { @@ -1995,14 +2062,14 @@ print_bad_irq_dependency(struct task_struct *curr, print_lock_name(backwards_entry->class); pr_warn("\n... which became %s-irq-safe at:\n", irqclass); - print_lock_trace(backwards_entry->class->usage_traces + bit1, 1); + print_lock_trace(backwards_entry->class->usage_traces[bit1], 1); pr_warn("\nto a %s-irq-unsafe lock:\n", irqclass); print_lock_name(forwards_entry->class); pr_warn("\n... which became %s-irq-unsafe at:\n", irqclass); pr_warn("..."); - print_lock_trace(forwards_entry->class->usage_traces + bit2, 1); + print_lock_trace(forwards_entry->class->usage_traces[bit2], 1); pr_warn("\nother info that might help us debug this:\n\n"); print_irq_lock_scenario(backwards_entry, forwards_entry, @@ -2011,13 +2078,15 @@ print_bad_irq_dependency(struct task_struct *curr, lockdep_print_held_locks(curr); pr_warn("\nthe dependencies between %s-irq-safe lock and the holding lock:\n", irqclass); - if (!save_trace(&prev_root->trace)) + prev_root->trace = save_trace(); + if (!prev_root->trace) return; print_shortest_lock_dependencies(backwards_entry, prev_root); pr_warn("\nthe dependencies between the lock to be acquired"); pr_warn(" and %s-irq-unsafe lock:\n", irqclass); - if (!save_trace(&next_root->trace)) + next_root->trace = save_trace(); + if (!next_root->trace) return; print_shortest_lock_dependencies(forwards_entry, next_root); @@ -2369,7 +2438,8 @@ check_deadlock(struct task_struct *curr, struct held_lock *next) */ static int check_prev_add(struct task_struct *curr, struct held_lock *prev, - struct held_lock *next, int distance, struct lock_trace *trace) + struct held_lock *next, int distance, + struct lock_trace **const trace) { struct lock_list *entry; int ret; @@ -2444,8 +2514,11 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, return ret; #endif - if (!trace->nr_entries && !save_trace(trace)) - return 0; + if (!*trace) { + *trace = save_trace(); + if (!*trace) + return 0; + } /* * Ok, all validations passed, add the new lock @@ -2453,14 +2526,14 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, */ ret = add_lock_to_list(hlock_class(next), hlock_class(prev), &hlock_class(prev)->locks_after, - next->acquire_ip, distance, trace); + next->acquire_ip, distance, *trace); if (!ret) return 0; ret = add_lock_to_list(hlock_class(prev), hlock_class(next), &hlock_class(next)->locks_before, - next->acquire_ip, distance, trace); + next->acquire_ip, distance, *trace); if (!ret) return 0; @@ -2476,7 +2549,7 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, static int check_prevs_add(struct task_struct *curr, struct held_lock *next) { - struct lock_trace trace = { .nr_entries = 0 }; + struct lock_trace *trace = NULL; int depth = curr->lockdep_depth; struct held_lock *hlock; @@ -3015,7 +3088,7 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this, print_lock(this); pr_warn("{%s} state was registered at:\n", usage_str[prev_bit]); - print_lock_trace(hlock_class(this)->usage_traces + prev_bit, 1); + print_lock_trace(hlock_class(this)->usage_traces[prev_bit], 1); print_irqtrace_events(curr); pr_warn("\nother info that might help us debug this:\n"); @@ -3096,7 +3169,8 @@ print_irq_inversion_bug(struct task_struct *curr, lockdep_print_held_locks(curr); pr_warn("\nthe shortest dependencies between 2nd lock and 1st lock:\n"); - if (!save_trace(&root->trace)) + root->trace = save_trace(); + if (!root->trace) return; print_shortest_lock_dependencies(other, root); @@ -3580,7 +3654,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, hlock_class(this)->usage_mask |= new_mask; - if (!save_trace(hlock_class(this)->usage_traces + new_bit)) + if (!(hlock_class(this)->usage_traces[new_bit] = save_trace())) return 0; switch (new_bit) { @@ -4133,11 +4207,9 @@ static int __lock_downgrade(struct lockdep_map *lock, unsigned long ip) } /* - * Remove the lock to the list of currently held locks - this gets + * Remove the lock from the list of currently held locks - this gets * called on mutex_unlock()/spin_unlock*() (or on a failed * mutex_lock_interruptible()). - * - * @nested is an hysterical artifact, needs a tree wide cleanup. */ static int __lock_release(struct lockdep_map *lock, unsigned long ip) @@ -4416,8 +4488,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, } EXPORT_SYMBOL_GPL(lock_acquire); -void lock_release(struct lockdep_map *lock, int nested, - unsigned long ip) +void lock_release(struct lockdep_map *lock, unsigned long ip) { unsigned long flags; @@ -5157,6 +5228,12 @@ void __init lockdep_init(void) ) / 1024 ); +#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) + printk(" memory used for stack traces: %zu kB\n", + (sizeof(stack_trace) + sizeof(stack_trace_hash)) / 1024 + ); +#endif + printk(" per task-struct memory footprint: %zu bytes\n", sizeof(((struct task_struct *)NULL)->held_locks)); } diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h index cc83568d5012..18d85aebbb57 100644 --- a/kernel/locking/lockdep_internals.h +++ b/kernel/locking/lockdep_internals.h @@ -92,6 +92,7 @@ static const unsigned long LOCKF_USED_IN_IRQ_READ = #define MAX_LOCKDEP_ENTRIES 16384UL #define MAX_LOCKDEP_CHAINS_BITS 15 #define MAX_STACK_TRACE_ENTRIES 262144UL +#define STACK_TRACE_HASH_SIZE 8192 #else #define MAX_LOCKDEP_ENTRIES 32768UL @@ -102,6 +103,7 @@ static const unsigned long LOCKF_USED_IN_IRQ_READ = * addresses. Protected by the hash_lock. */ #define MAX_STACK_TRACE_ENTRIES 524288UL +#define STACK_TRACE_HASH_SIZE 16384 #endif #define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) @@ -116,7 +118,8 @@ extern struct lock_chain lock_chains[]; extern void get_usage_chars(struct lock_class *class, char usage[LOCK_USAGE_CHARS]); -extern const char * __get_key_name(struct lockdep_subclass_key *key, char *str); +extern const char *__get_key_name(const struct lockdep_subclass_key *key, + char *str); struct lock_class *lock_chain_get_class(struct lock_chain *chain, int i); @@ -137,6 +140,10 @@ extern unsigned int max_bfs_queue_depth; #ifdef CONFIG_PROVE_LOCKING extern unsigned long lockdep_count_forward_deps(struct lock_class *); extern unsigned long lockdep_count_backward_deps(struct lock_class *); +#ifdef CONFIG_TRACE_IRQFLAGS +u64 lockdep_stack_trace_count(void); +u64 lockdep_stack_hash_count(void); +#endif #else static inline unsigned long lockdep_count_forward_deps(struct lock_class *class) diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c index bda006f8a88b..231684cfc5ae 100644 --- a/kernel/locking/lockdep_proc.c +++ b/kernel/locking/lockdep_proc.c @@ -285,6 +285,12 @@ static int lockdep_stats_show(struct seq_file *m, void *v) nr_process_chains); seq_printf(m, " stack-trace entries: %11lu [max: %lu]\n", nr_stack_trace_entries, MAX_STACK_TRACE_ENTRIES); +#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) + seq_printf(m, " number of stack traces: %11llu\n", + lockdep_stack_trace_count()); + seq_printf(m, " number of stack hash chains: %11llu\n", + lockdep_stack_hash_count()); +#endif seq_printf(m, " combined max dependencies: %11u\n", (nr_hardirq_chains + 1) * (nr_softirq_chains + 1) * @@ -399,7 +405,7 @@ static void seq_lock_time(struct seq_file *m, struct lock_time *lt) static void seq_stats(struct seq_file *m, struct lock_stat_data *data) { - struct lockdep_subclass_key *ckey; + const struct lockdep_subclass_key *ckey; struct lock_class_stats *stats; struct lock_class *class; const char *cname; @@ -637,12 +643,12 @@ static int lock_stat_release(struct inode *inode, struct file *file) return seq_release(inode, file); } -static const struct file_operations proc_lock_stat_operations = { - .open = lock_stat_open, - .write = lock_stat_write, - .read = seq_read, - .llseek = seq_lseek, - .release = lock_stat_release, +static const struct proc_ops lock_stat_proc_ops = { + .proc_open = lock_stat_open, + .proc_write = lock_stat_write, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = lock_stat_release, }; #endif /* CONFIG_LOCK_STAT */ @@ -654,8 +660,7 @@ static int __init lockdep_proc_init(void) #endif proc_create_single("lockdep_stats", S_IRUSR, NULL, lockdep_stats_show); #ifdef CONFIG_LOCK_STAT - proc_create("lock_stat", S_IRUSR | S_IWUSR, NULL, - &proc_lock_stat_operations); + proc_create("lock_stat", S_IRUSR | S_IWUSR, NULL, &lock_stat_proc_ops); #endif return 0; diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index c513031cd7e3..99475a66c94f 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -16,7 +16,6 @@ #include <linux/kthread.h> #include <linux/sched/rt.h> #include <linux/spinlock.h> -#include <linux/rwlock.h> #include <linux/mutex.h> #include <linux/rwsem.h> #include <linux/smp.h> @@ -889,16 +888,16 @@ static int __init lock_torture_init(void) cxt.nrealwriters_stress = 2 * num_online_cpus(); #ifdef CONFIG_DEBUG_MUTEXES - if (strncmp(torture_type, "mutex", 5) == 0) + if (str_has_prefix(torture_type, "mutex")) cxt.debug_lock = true; #endif #ifdef CONFIG_DEBUG_RT_MUTEXES - if (strncmp(torture_type, "rtmutex", 7) == 0) + if (str_has_prefix(torture_type, "rtmutex")) cxt.debug_lock = true; #endif #ifdef CONFIG_DEBUG_SPINLOCK - if ((strncmp(torture_type, "spin", 4) == 0) || - (strncmp(torture_type, "rw_lock", 7) == 0)) + if ((str_has_prefix(torture_type, "spin")) || + (str_has_prefix(torture_type, "rw_lock"))) cxt.debug_lock = true; #endif diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 5e069734363c..5352ce50a97e 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -65,11 +65,37 @@ EXPORT_SYMBOL(__mutex_init); #define MUTEX_FLAGS 0x07 +/* + * Internal helper function; C doesn't allow us to hide it :/ + * + * DO NOT USE (outside of mutex code). + */ +static inline struct task_struct *__mutex_owner(struct mutex *lock) +{ + return (struct task_struct *)(atomic_long_read(&lock->owner) & ~MUTEX_FLAGS); +} + static inline struct task_struct *__owner_task(unsigned long owner) { return (struct task_struct *)(owner & ~MUTEX_FLAGS); } +bool mutex_is_locked(struct mutex *lock) +{ + return __mutex_owner(lock) != NULL; +} +EXPORT_SYMBOL(mutex_is_locked); + +__must_check enum mutex_trylock_recursive_enum +mutex_trylock_recursive(struct mutex *lock) +{ + if (unlikely(__mutex_owner(lock) == current)) + return MUTEX_TRYLOCK_RECURSIVE; + + return mutex_trylock(lock); +} +EXPORT_SYMBOL(mutex_trylock_recursive); + static inline unsigned long __owner_flags(unsigned long owner) { return owner & MUTEX_FLAGS; @@ -1065,7 +1091,7 @@ err: err_early_kill: spin_unlock(&lock->wait_lock); debug_mutex_free_waiter(&waiter); - mutex_release(&lock->dep_map, 1, ip); + mutex_release(&lock->dep_map, ip); preempt_enable(); return ret; } @@ -1199,7 +1225,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne DEFINE_WAKE_Q(wake_q); unsigned long owner; - mutex_release(&lock->dep_map, 1, ip); + mutex_release(&lock->dep_map, ip); /* * Release the lock before (potentially) taking the spinlock such that diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c index 6ef600aa0f47..1f7734949ac8 100644 --- a/kernel/locking/osq_lock.c +++ b/kernel/locking/osq_lock.c @@ -134,20 +134,17 @@ bool osq_lock(struct optimistic_spin_queue *lock) * cmpxchg in an attempt to undo our queueing. */ - while (!READ_ONCE(node->locked)) { - /* - * If we need to reschedule bail... so we can block. - * Use vcpu_is_preempted() to avoid waiting for a preempted - * lock holder: - */ - if (need_resched() || vcpu_is_preempted(node_cpu(node->prev))) - goto unqueue; - - cpu_relax(); - } - return true; + /* + * Wait to acquire the lock or cancelation. Note that need_resched() + * will come with an IPI, which will wake smp_cond_load_relaxed() if it + * is implemented with a monitor-wait. vcpu_is_preempted() relies on + * polling, be careful. + */ + if (smp_cond_load_relaxed(&node->locked, VAL || need_resched() || + vcpu_is_preempted(node_cpu(node->prev)))) + return true; -unqueue: + /* unqueue */ /* * Step - A -- stabilize @prev * diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 2473f10c6956..b9515fcc9b29 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -31,14 +31,15 @@ /* * The basic principle of a queue-based spinlock can best be understood * by studying a classic queue-based spinlock implementation called the - * MCS lock. The paper below provides a good description for this kind - * of lock. + * MCS lock. A copy of the original MCS lock paper ("Algorithms for Scalable + * Synchronization on Shared-Memory Multiprocessors by Mellor-Crummey and + * Scott") is available at * - * http://www.cise.ufl.edu/tr/DOC/REP-1992-71.pdf + * https://bugzilla.kernel.org/show_bug.cgi?id=206115 * - * This queued spinlock implementation is based on the MCS lock, however to make - * it fit the 4 bytes we assume spinlock_t to be, and preserve its existing - * API, we must modify it somehow. + * This queued spinlock implementation is based on the MCS lock, however to + * make it fit the 4 bytes we assume spinlock_t to be, and preserve its + * existing API, we must modify it somehow. * * In particular; where the traditional MCS lock consists of a tail pointer * (8 bytes) and needs the next pointer (another 8 bytes) of its own node to diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 89bab079e7a4..e84d21aa0722 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -269,7 +269,7 @@ pv_wait_early(struct pv_node *prev, int loop) if ((loop & PV_PREV_CHECK_MASK) != 0) return false; - return READ_ONCE(prev->state) != vcpu_running || vcpu_is_preempted(prev->cpu); + return READ_ONCE(prev->state) != vcpu_running; } /* diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index fa83d36e30c6..851bbb10819d 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -628,8 +628,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, } /* [10] Grab the next task, i.e. owner of @lock */ - task = rt_mutex_owner(lock); - get_task_struct(task); + task = get_task_struct(rt_mutex_owner(lock)); raw_spin_lock(&task->pi_lock); /* @@ -709,8 +708,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, } /* [10] Grab the next task, i.e. the owner of @lock */ - task = rt_mutex_owner(lock); - get_task_struct(task); + task = get_task_struct(rt_mutex_owner(lock)); raw_spin_lock(&task->pi_lock); /* [11] requeue the pi waiters if necessary */ @@ -1519,7 +1517,7 @@ int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); ret = rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock); if (ret) - mutex_release(&lock->dep_map, 1, _RET_IP_); + mutex_release(&lock->dep_map, _RET_IP_); return ret; } @@ -1563,7 +1561,7 @@ rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout) RT_MUTEX_MIN_CHAINWALK, rt_mutex_slowlock); if (ret) - mutex_release(&lock->dep_map, 1, _RET_IP_); + mutex_release(&lock->dep_map, _RET_IP_); return ret; } @@ -1602,7 +1600,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_trylock); */ void __sched rt_mutex_unlock(struct rt_mutex *lock) { - mutex_release(&lock->dep_map, 1, _RET_IP_); + mutex_release(&lock->dep_map, _RET_IP_); rt_mutex_fastunlock(lock, rt_mutex_slowunlock); } EXPORT_SYMBOL_GPL(rt_mutex_unlock); diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index bd0f0d05724c..0d9b6be9ecc8 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -105,8 +105,9 @@ #ifdef CONFIG_DEBUG_RWSEMS # define DEBUG_RWSEMS_WARN_ON(c, sem) do { \ if (!debug_locks_silent && \ - WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\ + WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, magic = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\ #c, atomic_long_read(&(sem)->count), \ + (unsigned long) sem->magic, \ atomic_long_read(&(sem)->owner), (long)current, \ list_empty(&(sem)->wait_list) ? "" : "not ")) \ debug_locks_off(); \ @@ -330,6 +331,9 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name, debug_check_no_locks_freed((void *)sem, sizeof(*sem)); lockdep_init_map(&sem->dep_map, name, key, 0); #endif +#ifdef CONFIG_DEBUG_RWSEMS + sem->magic = sem; +#endif atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE); raw_spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); @@ -724,11 +728,12 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable) rcu_read_lock(); for (;;) { - if (atomic_long_read(&sem->count) & RWSEM_FLAG_HANDOFF) { - state = OWNER_NONSPINNABLE; - break; - } - + /* + * When a waiting writer set the handoff flag, it may spin + * on the owner as well. Once that writer acquires the lock, + * we can spin on it. So we don't need to quit even when the + * handoff bit is set. + */ new = rwsem_owner_flags(sem, &new_flags); if ((new != owner) || (new_flags != flags)) { state = rwsem_owner_state(new, new_flags, nonspinnable); @@ -974,6 +979,13 @@ static inline bool rwsem_reader_phase_trylock(struct rw_semaphore *sem, { return false; } + +static inline int +rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable) +{ + return 0; +} +#define OWNER_NULL 1 #endif /* @@ -1206,6 +1218,18 @@ wait: raw_spin_unlock_irq(&sem->wait_lock); + /* + * After setting the handoff bit and failing to acquire + * the lock, attempt to spin on owner to accelerate lock + * transfer. If the previous owner is a on-cpu writer and it + * has just released the lock, OWNER_NULL will be returned. + * In this case, we attempt to acquire the lock again + * without sleeping. + */ + if (wstate == WRITER_HANDOFF && + rwsem_spin_on_owner(sem, RWSEM_NONSPINNABLE) == OWNER_NULL) + goto trylock_again; + /* Block until there are no active lockers. */ for (;;) { if (signal_pending_state(state, current)) @@ -1240,7 +1264,7 @@ wait: break; } } - +trylock_again: raw_spin_lock_irq(&sem->wait_lock); } __set_current_state(TASK_RUNNING); @@ -1338,11 +1362,14 @@ static inline int __down_read_killable(struct rw_semaphore *sem) static inline int __down_read_trylock(struct rw_semaphore *sem) { + long tmp; + + DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem); + /* * Optimize for the case when the rwsem is not locked at all. */ - long tmp = RWSEM_UNLOCKED_VALUE; - + tmp = RWSEM_UNLOCKED_VALUE; do { if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp, tmp + RWSEM_READER_BIAS)) { @@ -1383,8 +1410,11 @@ static inline int __down_write_killable(struct rw_semaphore *sem) static inline int __down_write_trylock(struct rw_semaphore *sem) { - long tmp = RWSEM_UNLOCKED_VALUE; + long tmp; + DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem); + + tmp = RWSEM_UNLOCKED_VALUE; if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp, RWSEM_WRITER_LOCKED)) { rwsem_set_owner(sem); @@ -1400,7 +1430,9 @@ inline void __up_read(struct rw_semaphore *sem) { long tmp; + DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem); DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem); + rwsem_clear_reader_owned(sem); tmp = atomic_long_add_return_release(-RWSEM_READER_BIAS, &sem->count); DEBUG_RWSEMS_WARN_ON(tmp < 0, sem); @@ -1418,12 +1450,14 @@ static inline void __up_write(struct rw_semaphore *sem) { long tmp; + DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem); /* * sem->owner may differ from current if the ownership is transferred * to an anonymous writer by setting the RWSEM_NONSPINNABLE bits. */ DEBUG_RWSEMS_WARN_ON((rwsem_owner(sem) != current) && !rwsem_test_oflags(sem, RWSEM_NONSPINNABLE), sem); + rwsem_clear_owner(sem); tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count); if (unlikely(tmp & RWSEM_FLAG_WAITERS)) @@ -1470,7 +1504,7 @@ int __sched down_read_killable(struct rw_semaphore *sem) rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_killable)) { - rwsem_release(&sem->dep_map, 1, _RET_IP_); + rwsem_release(&sem->dep_map, _RET_IP_); return -EINTR; } @@ -1512,7 +1546,7 @@ int __sched down_write_killable(struct rw_semaphore *sem) if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock, __down_write_killable)) { - rwsem_release(&sem->dep_map, 1, _RET_IP_); + rwsem_release(&sem->dep_map, _RET_IP_); return -EINTR; } @@ -1539,7 +1573,7 @@ EXPORT_SYMBOL(down_write_trylock); */ void up_read(struct rw_semaphore *sem) { - rwsem_release(&sem->dep_map, 1, _RET_IP_); + rwsem_release(&sem->dep_map, _RET_IP_); __up_read(sem); } EXPORT_SYMBOL(up_read); @@ -1549,7 +1583,7 @@ EXPORT_SYMBOL(up_read); */ void up_write(struct rw_semaphore *sem) { - rwsem_release(&sem->dep_map, 1, _RET_IP_); + rwsem_release(&sem->dep_map, _RET_IP_); __up_write(sem); } EXPORT_SYMBOL(up_write); @@ -1605,7 +1639,7 @@ int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass) if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock, __down_write_killable)) { - rwsem_release(&sem->dep_map, 1, _RET_IP_); + rwsem_release(&sem->dep_map, _RET_IP_); return -EINTR; } diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c index 399669f7eba8..472dd462a40c 100644 --- a/kernel/locking/spinlock_debug.c +++ b/kernel/locking/spinlock_debug.c @@ -51,19 +51,19 @@ EXPORT_SYMBOL(__rwlock_init); static void spin_dump(raw_spinlock_t *lock, const char *msg) { - struct task_struct *owner = NULL; + struct task_struct *owner = READ_ONCE(lock->owner); - if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT) - owner = lock->owner; + if (owner == SPINLOCK_OWNER_INIT) + owner = NULL; printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n", msg, raw_smp_processor_id(), current->comm, task_pid_nr(current)); printk(KERN_EMERG " lock: %pS, .magic: %08x, .owner: %s/%d, " ".owner_cpu: %d\n", - lock, lock->magic, + lock, READ_ONCE(lock->magic), owner ? owner->comm : "<none>", owner ? task_pid_nr(owner) : -1, - lock->owner_cpu); + READ_ONCE(lock->owner_cpu)); dump_stack(); } @@ -80,16 +80,16 @@ static void spin_bug(raw_spinlock_t *lock, const char *msg) static inline void debug_spin_lock_before(raw_spinlock_t *lock) { - SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic"); - SPIN_BUG_ON(lock->owner == current, lock, "recursion"); - SPIN_BUG_ON(lock->owner_cpu == raw_smp_processor_id(), + SPIN_BUG_ON(READ_ONCE(lock->magic) != SPINLOCK_MAGIC, lock, "bad magic"); + SPIN_BUG_ON(READ_ONCE(lock->owner) == current, lock, "recursion"); + SPIN_BUG_ON(READ_ONCE(lock->owner_cpu) == raw_smp_processor_id(), lock, "cpu recursion"); } static inline void debug_spin_lock_after(raw_spinlock_t *lock) { - lock->owner_cpu = raw_smp_processor_id(); - lock->owner = current; + WRITE_ONCE(lock->owner_cpu, raw_smp_processor_id()); + WRITE_ONCE(lock->owner, current); } static inline void debug_spin_unlock(raw_spinlock_t *lock) @@ -99,8 +99,8 @@ static inline void debug_spin_unlock(raw_spinlock_t *lock) SPIN_BUG_ON(lock->owner != current, lock, "wrong owner"); SPIN_BUG_ON(lock->owner_cpu != raw_smp_processor_id(), lock, "wrong CPU"); - lock->owner = SPINLOCK_OWNER_INIT; - lock->owner_cpu = -1; + WRITE_ONCE(lock->owner, SPINLOCK_OWNER_INIT); + WRITE_ONCE(lock->owner_cpu, -1); } /* @@ -187,8 +187,8 @@ static inline void debug_write_lock_before(rwlock_t *lock) static inline void debug_write_lock_after(rwlock_t *lock) { - lock->owner_cpu = raw_smp_processor_id(); - lock->owner = current; + WRITE_ONCE(lock->owner_cpu, raw_smp_processor_id()); + WRITE_ONCE(lock->owner, current); } static inline void debug_write_unlock(rwlock_t *lock) @@ -197,8 +197,8 @@ static inline void debug_write_unlock(rwlock_t *lock) RWLOCK_BUG_ON(lock->owner != current, lock, "wrong owner"); RWLOCK_BUG_ON(lock->owner_cpu != raw_smp_processor_id(), lock, "wrong CPU"); - lock->owner = SPINLOCK_OWNER_INIT; - lock->owner_cpu = -1; + WRITE_ONCE(lock->owner, SPINLOCK_OWNER_INIT); + WRITE_ONCE(lock->owner_cpu, -1); } void do_raw_write_lock(rwlock_t *lock) |