diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Kconfig.debug | 19 | ||||
-rw-r--r-- | lib/Makefile | 1 | ||||
-rw-r--r-- | lib/klist.c | 26 | ||||
-rw-r--r-- | lib/radix-tree.c | 176 | ||||
-rw-r--r-- | lib/semaphore-sleepers.c | 177 |
5 files changed, 309 insertions, 90 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 299f7f3b5b08..3754c9a8f5c8 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -46,6 +46,25 @@ config LOG_BUF_SHIFT 13 => 8 KB 12 => 4 KB +config DETECT_SOFTLOCKUP + bool "Detect Soft Lockups" + depends on DEBUG_KERNEL + default y + help + Say Y here to enable the kernel to detect "soft lockups", + which are bugs that cause the kernel to loop in kernel + mode for more than 10 seconds, without giving other tasks a + chance to run. + + When a soft-lockup is detected, the kernel will print the + current stack trace (which you should report), but the + system will stay locked up. This feature has negligible + overhead. + + (Note that "hard lockups" are separate type of bugs that + can be detected via the NMI-watchdog, on platforms that + support it.) + config SCHEDSTATS bool "Collect scheduler statistics" depends on DEBUG_KERNEL && PROC_FS diff --git a/lib/Makefile b/lib/Makefile index 52f83380f704..3e2bd0df23bb 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -18,6 +18,7 @@ endif lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o +lib-$(CONFIG_SEMAPHORE_SLEEPERS) += semaphore-sleepers.o lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o diff --git a/lib/klist.c b/lib/klist.c index 738ab810160a..bb2f3551d50a 100644 --- a/lib/klist.c +++ b/lib/klist.c @@ -42,12 +42,23 @@ /** * klist_init - Initialize a klist structure. * @k: The klist we're initializing. + * @get: The get function for the embedding object (NULL if none) + * @put: The put function for the embedding object (NULL if none) + * + * Initialises the klist structure. If the klist_node structures are + * going to be embedded in refcounted objects (necessary for safe + * deletion) then the get/put arguments are used to initialise + * functions that take and release references on the embedding + * objects. */ -void klist_init(struct klist * k) +void klist_init(struct klist * k, void (*get)(struct klist_node *), + void (*put)(struct klist_node *)) { INIT_LIST_HEAD(&k->k_list); spin_lock_init(&k->k_lock); + k->get = get; + k->put = put; } EXPORT_SYMBOL_GPL(klist_init); @@ -74,16 +85,18 @@ static void klist_node_init(struct klist * k, struct klist_node * n) init_completion(&n->n_removed); kref_init(&n->n_ref); n->n_klist = k; + if (k->get) + k->get(n); } /** * klist_add_head - Initialize a klist_node and add it to front. - * @k: klist it's going on. * @n: node we're adding. + * @k: klist it's going on. */ -void klist_add_head(struct klist * k, struct klist_node * n) +void klist_add_head(struct klist_node * n, struct klist * k) { klist_node_init(k, n); add_head(k, n); @@ -94,11 +107,11 @@ EXPORT_SYMBOL_GPL(klist_add_head); /** * klist_add_tail - Initialize a klist_node and add it to back. - * @k: klist it's going on. * @n: node we're adding. + * @k: klist it's going on. */ -void klist_add_tail(struct klist * k, struct klist_node * n) +void klist_add_tail(struct klist_node * n, struct klist * k) { klist_node_init(k, n); add_tail(k, n); @@ -110,9 +123,12 @@ EXPORT_SYMBOL_GPL(klist_add_tail); static void klist_release(struct kref * kref) { struct klist_node * n = container_of(kref, struct klist_node, n_ref); + void (*put)(struct klist_node *) = n->n_klist->put; list_del(&n->n_node); complete(&n->n_removed); n->n_klist = NULL; + if (put) + put(n); } static int klist_dec_and_del(struct klist_node * n) diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 10bed1c8c3c3..b972dd29289d 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -1,6 +1,7 @@ /* * Copyright (C) 2001 Momchil Velikov * Portions Copyright (C) 2001 Christoph Hellwig + * Copyright (C) 2005 SGI, Christoph Lameter <clameter@sgi.com> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -51,7 +52,7 @@ struct radix_tree_node { }; struct radix_tree_path { - struct radix_tree_node *node, **slot; + struct radix_tree_node *node; int offset; }; @@ -227,7 +228,7 @@ out: int radix_tree_insert(struct radix_tree_root *root, unsigned long index, void *item) { - struct radix_tree_node *node = NULL, *tmp, **slot; + struct radix_tree_node *node = NULL, *slot; unsigned int height, shift; int offset; int error; @@ -240,38 +241,42 @@ int radix_tree_insert(struct radix_tree_root *root, return error; } - slot = &root->rnode; + slot = root->rnode; height = root->height; shift = (height-1) * RADIX_TREE_MAP_SHIFT; offset = 0; /* uninitialised var warning */ while (height > 0) { - if (*slot == NULL) { + if (slot == NULL) { /* Have to add a child node. */ - if (!(tmp = radix_tree_node_alloc(root))) + if (!(slot = radix_tree_node_alloc(root))) return -ENOMEM; - *slot = tmp; - if (node) + if (node) { + node->slots[offset] = slot; node->count++; + } else + root->rnode = slot; } /* Go a level down */ offset = (index >> shift) & RADIX_TREE_MAP_MASK; - node = *slot; - slot = (struct radix_tree_node **)(node->slots + offset); + node = slot; + slot = node->slots[offset]; shift -= RADIX_TREE_MAP_SHIFT; height--; } - if (*slot != NULL) + if (slot != NULL) return -EEXIST; + if (node) { node->count++; + node->slots[offset] = item; BUG_ON(tag_get(node, 0, offset)); BUG_ON(tag_get(node, 1, offset)); - } + } else + root->rnode = item; - *slot = item; return 0; } EXPORT_SYMBOL(radix_tree_insert); @@ -286,27 +291,25 @@ EXPORT_SYMBOL(radix_tree_insert); void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) { unsigned int height, shift; - struct radix_tree_node **slot; + struct radix_tree_node *slot; height = root->height; if (index > radix_tree_maxindex(height)) return NULL; shift = (height-1) * RADIX_TREE_MAP_SHIFT; - slot = &root->rnode; + slot = root->rnode; while (height > 0) { - if (*slot == NULL) + if (slot == NULL) return NULL; - slot = (struct radix_tree_node **) - ((*slot)->slots + - ((index >> shift) & RADIX_TREE_MAP_MASK)); + slot = slot->slots[(index >> shift) & RADIX_TREE_MAP_MASK]; shift -= RADIX_TREE_MAP_SHIFT; height--; } - return *slot; + return slot; } EXPORT_SYMBOL(radix_tree_lookup); @@ -326,27 +329,27 @@ void *radix_tree_tag_set(struct radix_tree_root *root, unsigned long index, int tag) { unsigned int height, shift; - struct radix_tree_node **slot; + struct radix_tree_node *slot; height = root->height; if (index > radix_tree_maxindex(height)) return NULL; shift = (height - 1) * RADIX_TREE_MAP_SHIFT; - slot = &root->rnode; + slot = root->rnode; while (height > 0) { int offset; offset = (index >> shift) & RADIX_TREE_MAP_MASK; - tag_set(*slot, tag, offset); - slot = (struct radix_tree_node **)((*slot)->slots + offset); - BUG_ON(*slot == NULL); + tag_set(slot, tag, offset); + slot = slot->slots[offset]; + BUG_ON(slot == NULL); shift -= RADIX_TREE_MAP_SHIFT; height--; } - return *slot; + return slot; } EXPORT_SYMBOL(radix_tree_tag_set); @@ -367,6 +370,7 @@ void *radix_tree_tag_clear(struct radix_tree_root *root, unsigned long index, int tag) { struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path; + struct radix_tree_node *slot; unsigned int height, shift; void *ret = NULL; @@ -376,38 +380,37 @@ void *radix_tree_tag_clear(struct radix_tree_root *root, shift = (height - 1) * RADIX_TREE_MAP_SHIFT; pathp->node = NULL; - pathp->slot = &root->rnode; + slot = root->rnode; while (height > 0) { int offset; - if (*pathp->slot == NULL) + if (slot == NULL) goto out; offset = (index >> shift) & RADIX_TREE_MAP_MASK; pathp[1].offset = offset; - pathp[1].node = *pathp[0].slot; - pathp[1].slot = (struct radix_tree_node **) - (pathp[1].node->slots + offset); + pathp[1].node = slot; + slot = slot->slots[offset]; pathp++; shift -= RADIX_TREE_MAP_SHIFT; height--; } - ret = *pathp[0].slot; + ret = slot; if (ret == NULL) goto out; do { int idx; - tag_clear(pathp[0].node, tag, pathp[0].offset); + tag_clear(pathp->node, tag, pathp->offset); for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { - if (pathp[0].node->tags[tag][idx]) + if (pathp->node->tags[tag][idx]) goto out; } pathp--; - } while (pathp[0].node); + } while (pathp->node); out: return ret; } @@ -415,21 +418,22 @@ EXPORT_SYMBOL(radix_tree_tag_clear); #ifndef __KERNEL__ /* Only the test harness uses this at present */ /** - * radix_tree_tag_get - get a tag on a radix tree node - * @root: radix tree root - * @index: index key - * @tag: tag index + * radix_tree_tag_get - get a tag on a radix tree node + * @root: radix tree root + * @index: index key + * @tag: tag index * - * Return the search tag corresponging to @index in the radix tree. + * Return values: * - * Returns zero if the tag is unset, or if there is no corresponding item - * in the tree. + * 0: tag not present + * 1: tag present, set + * -1: tag present, unset */ int radix_tree_tag_get(struct radix_tree_root *root, unsigned long index, int tag) { unsigned int height, shift; - struct radix_tree_node **slot; + struct radix_tree_node *slot; int saw_unset_tag = 0; height = root->height; @@ -437,12 +441,12 @@ int radix_tree_tag_get(struct radix_tree_root *root, return 0; shift = (height - 1) * RADIX_TREE_MAP_SHIFT; - slot = &root->rnode; + slot = root->rnode; for ( ; ; ) { int offset; - if (*slot == NULL) + if (slot == NULL) return 0; offset = (index >> shift) & RADIX_TREE_MAP_MASK; @@ -451,15 +455,15 @@ int radix_tree_tag_get(struct radix_tree_root *root, * This is just a debug check. Later, we can bale as soon as * we see an unset tag. */ - if (!tag_get(*slot, tag, offset)) + if (!tag_get(slot, tag, offset)) saw_unset_tag = 1; if (height == 1) { - int ret = tag_get(*slot, tag, offset); + int ret = tag_get(slot, tag, offset); BUG_ON(ret && saw_unset_tag); - return ret; + return ret ? 1 : -1; } - slot = (struct radix_tree_node **)((*slot)->slots + offset); + slot = slot->slots[offset]; shift -= RADIX_TREE_MAP_SHIFT; height--; } @@ -472,17 +476,21 @@ __lookup(struct radix_tree_root *root, void **results, unsigned long index, unsigned int max_items, unsigned long *next_index) { unsigned int nr_found = 0; - unsigned int shift; - unsigned int height = root->height; + unsigned int shift, height; struct radix_tree_node *slot; + unsigned long i; + + height = root->height; + if (height == 0) + goto out; shift = (height-1) * RADIX_TREE_MAP_SHIFT; slot = root->rnode; - while (height > 0) { - unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK; + for ( ; height > 1; height--) { - for ( ; i < RADIX_TREE_MAP_SIZE; i++) { + for (i = (index >> shift) & RADIX_TREE_MAP_MASK ; + i < RADIX_TREE_MAP_SIZE; i++) { if (slot->slots[i] != NULL) break; index &= ~((1UL << shift) - 1); @@ -492,22 +500,20 @@ __lookup(struct radix_tree_root *root, void **results, unsigned long index, } if (i == RADIX_TREE_MAP_SIZE) goto out; - height--; - if (height == 0) { /* Bottom level: grab some items */ - unsigned long j = index & RADIX_TREE_MAP_MASK; - for ( ; j < RADIX_TREE_MAP_SIZE; j++) { - index++; - if (slot->slots[j]) { - results[nr_found++] = slot->slots[j]; - if (nr_found == max_items) - goto out; - } - } - } shift -= RADIX_TREE_MAP_SHIFT; slot = slot->slots[i]; } + + /* Bottom level: grab some items */ + for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { + index++; + if (slot->slots[i]) { + results[nr_found++] = slot->slots[i]; + if (nr_found == max_items) + goto out; + } + } out: *next_index = index; return nr_found; @@ -655,6 +661,7 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) { struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path; struct radix_tree_path *orig_pathp; + struct radix_tree_node *slot; unsigned int height, shift; void *ret = NULL; char tags[RADIX_TREE_TAGS]; @@ -666,25 +673,23 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) shift = (height - 1) * RADIX_TREE_MAP_SHIFT; pathp->node = NULL; - pathp->slot = &root->rnode; + slot = root->rnode; - while (height > 0) { + for ( ; height > 0; height--) { int offset; - if (*pathp->slot == NULL) + if (slot == NULL) goto out; offset = (index >> shift) & RADIX_TREE_MAP_MASK; pathp[1].offset = offset; - pathp[1].node = *pathp[0].slot; - pathp[1].slot = (struct radix_tree_node **) - (pathp[1].node->slots + offset); + pathp[1].node = slot; + slot = slot->slots[offset]; pathp++; shift -= RADIX_TREE_MAP_SHIFT; - height--; } - ret = *pathp[0].slot; + ret = slot; if (ret == NULL) goto out; @@ -704,10 +709,10 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) if (tags[tag]) continue; - tag_clear(pathp[0].node, tag, pathp[0].offset); + tag_clear(pathp->node, tag, pathp->offset); for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { - if (pathp[0].node->tags[tag][idx]) { + if (pathp->node->tags[tag][idx]) { tags[tag] = 1; nr_cleared_tags--; break; @@ -715,18 +720,19 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) } } pathp--; - } while (pathp[0].node && nr_cleared_tags); + } while (pathp->node && nr_cleared_tags); - pathp = orig_pathp; - *pathp[0].slot = NULL; - while (pathp[0].node && --pathp[0].node->count == 0) { - pathp--; - BUG_ON(*pathp[0].slot == NULL); - *pathp[0].slot = NULL; - radix_tree_node_free(pathp[1].node); + /* Now free the nodes we do not need anymore */ + for (pathp = orig_pathp; pathp->node; pathp--) { + pathp->node->slots[pathp->offset] = NULL; + if (--pathp->node->count) + goto out; + + /* Node with zero slots in use so free it */ + radix_tree_node_free(pathp->node); } - if (root->rnode == NULL) - root->height = 0; + root->rnode = NULL; + root->height = 0; out: return ret; } diff --git a/lib/semaphore-sleepers.c b/lib/semaphore-sleepers.c new file mode 100644 index 000000000000..4d5f18889fa5 --- /dev/null +++ b/lib/semaphore-sleepers.c @@ -0,0 +1,177 @@ +/* + * i386 and x86-64 semaphore implementation. + * + * (C) Copyright 1999 Linus Torvalds + * + * Portions Copyright 1999 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org> + */ +#include <linux/config.h> +#include <linux/sched.h> +#include <linux/err.h> +#include <linux/init.h> +#include <asm/semaphore.h> + +/* + * Semaphores are implemented using a two-way counter: + * The "count" variable is decremented for each process + * that tries to acquire the semaphore, while the "sleeping" + * variable is a count of such acquires. + * + * Notably, the inline "up()" and "down()" functions can + * efficiently test if they need to do any extra work (up + * needs to do something only if count was negative before + * the increment operation. + * + * "sleeping" and the contention routine ordering is protected + * by the spinlock in the semaphore's waitqueue head. + * + * Note that these functions are only called when there is + * contention on the lock, and as such all this is the + * "non-critical" part of the whole semaphore business. The + * critical part is the inline stuff in <asm/semaphore.h> + * where we want to avoid any extra jumps and calls. + */ + +/* + * Logic: + * - only on a boundary condition do we need to care. When we go + * from a negative count to a non-negative, we wake people up. + * - when we go from a non-negative count to a negative do we + * (a) synchronize with the "sleeper" count and (b) make sure + * that we're on the wakeup list before we synchronize so that + * we cannot lose wakeup events. + */ + +fastcall void __up(struct semaphore *sem) +{ + wake_up(&sem->wait); +} + +fastcall void __sched __down(struct semaphore * sem) +{ + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + unsigned long flags; + + tsk->state = TASK_UNINTERRUPTIBLE; + spin_lock_irqsave(&sem->wait.lock, flags); + add_wait_queue_exclusive_locked(&sem->wait, &wait); + + sem->sleepers++; + for (;;) { + int sleepers = sem->sleepers; + + /* + * Add "everybody else" into it. They aren't + * playing, because we own the spinlock in + * the wait_queue_head. + */ + if (!atomic_add_negative(sleepers - 1, &sem->count)) { + sem->sleepers = 0; + break; + } + sem->sleepers = 1; /* us - see -1 above */ + spin_unlock_irqrestore(&sem->wait.lock, flags); + + schedule(); + + spin_lock_irqsave(&sem->wait.lock, flags); + tsk->state = TASK_UNINTERRUPTIBLE; + } + remove_wait_queue_locked(&sem->wait, &wait); + wake_up_locked(&sem->wait); + spin_unlock_irqrestore(&sem->wait.lock, flags); + tsk->state = TASK_RUNNING; +} + +fastcall int __sched __down_interruptible(struct semaphore * sem) +{ + int retval = 0; + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + unsigned long flags; + + tsk->state = TASK_INTERRUPTIBLE; + spin_lock_irqsave(&sem->wait.lock, flags); + add_wait_queue_exclusive_locked(&sem->wait, &wait); + + sem->sleepers++; + for (;;) { + int sleepers = sem->sleepers; + + /* + * With signals pending, this turns into + * the trylock failure case - we won't be + * sleeping, and we* can't get the lock as + * it has contention. Just correct the count + * and exit. + */ + if (signal_pending(current)) { + retval = -EINTR; + sem->sleepers = 0; + atomic_add(sleepers, &sem->count); + break; + } + + /* + * Add "everybody else" into it. They aren't + * playing, because we own the spinlock in + * wait_queue_head. The "-1" is because we're + * still hoping to get the semaphore. + */ + if (!atomic_add_negative(sleepers - 1, &sem->count)) { + sem->sleepers = 0; + break; + } + sem->sleepers = 1; /* us - see -1 above */ + spin_unlock_irqrestore(&sem->wait.lock, flags); + + schedule(); + + spin_lock_irqsave(&sem->wait.lock, flags); + tsk->state = TASK_INTERRUPTIBLE; + } + remove_wait_queue_locked(&sem->wait, &wait); + wake_up_locked(&sem->wait); + spin_unlock_irqrestore(&sem->wait.lock, flags); + + tsk->state = TASK_RUNNING; + return retval; +} + +/* + * Trylock failed - make sure we correct for + * having decremented the count. + * + * We could have done the trylock with a + * single "cmpxchg" without failure cases, + * but then it wouldn't work on a 386. + */ +fastcall int __down_trylock(struct semaphore * sem) +{ + int sleepers; + unsigned long flags; + + spin_lock_irqsave(&sem->wait.lock, flags); + sleepers = sem->sleepers + 1; + sem->sleepers = 0; + + /* + * Add "everybody else" and us into it. They aren't + * playing, because we own the spinlock in the + * wait_queue_head. + */ + if (!atomic_add_negative(sleepers, &sem->count)) { + wake_up_locked(&sem->wait); + } + + spin_unlock_irqrestore(&sem->wait.lock, flags); + return 1; +} |