diff options
Diffstat (limited to 'include')
72 files changed, 1880 insertions, 502 deletions
diff --git a/include/asm-alpha/rwsem.h b/include/asm-alpha/rwsem.h index fafdd4f7010a..1570c0b54336 100644 --- a/include/asm-alpha/rwsem.h +++ b/include/asm-alpha/rwsem.h @@ -36,20 +36,11 @@ struct rw_semaphore { #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) spinlock_t wait_lock; struct list_head wait_list; -#if RWSEM_DEBUG - int debug; -#endif }; -#if RWSEM_DEBUG -#define __RWSEM_DEBUG_INIT , 0 -#else -#define __RWSEM_DEBUG_INIT /* */ -#endif - #define __RWSEM_INITIALIZER(name) \ { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ - LIST_HEAD_INIT((name).wait_list) __RWSEM_DEBUG_INIT } + LIST_HEAD_INIT((name).wait_list) } #define DECLARE_RWSEM(name) \ struct rw_semaphore name = __RWSEM_INITIALIZER(name) @@ -59,9 +50,6 @@ static inline void init_rwsem(struct rw_semaphore *sem) sem->count = RWSEM_UNLOCKED_VALUE; spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); -#if RWSEM_DEBUG - sem->debug = 0; -#endif } static inline void __down_read(struct rw_semaphore *sem) diff --git a/include/asm-generic/mutex-null.h b/include/asm-generic/mutex-null.h index 5cf8b7ce0c45..254a126ede5c 100644 --- a/include/asm-generic/mutex-null.h +++ b/include/asm-generic/mutex-null.h @@ -10,15 +10,10 @@ #ifndef _ASM_GENERIC_MUTEX_NULL_H #define _ASM_GENERIC_MUTEX_NULL_H -/* extra parameter only needed for mutex debugging: */ -#ifndef __IP__ -# define __IP__ -#endif - -#define __mutex_fastpath_lock(count, fail_fn) fail_fn(count __RET_IP__) -#define __mutex_fastpath_lock_retval(count, fail_fn) fail_fn(count __RET_IP__) -#define __mutex_fastpath_unlock(count, fail_fn) fail_fn(count __RET_IP__) -#define __mutex_fastpath_trylock(count, fail_fn) fail_fn(count) -#define __mutex_slowpath_needs_to_unlock() 1 +#define __mutex_fastpath_lock(count, fail_fn) fail_fn(count) +#define __mutex_fastpath_lock_retval(count, fail_fn) fail_fn(count) +#define __mutex_fastpath_unlock(count, fail_fn) fail_fn(count) +#define __mutex_fastpath_trylock(count, fail_fn) fail_fn(count) +#define __mutex_slowpath_needs_to_unlock() 1 #endif diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index c74521157461..e160e04290fb 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -7,6 +7,8 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; +#define per_cpu_offset(x) (__per_cpu_offset[x]) + /* Separate out the type, so (int[3], foo) works. */ #define DEFINE_PER_CPU(type, name) \ __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name diff --git a/include/asm-i386/irqflags.h b/include/asm-i386/irqflags.h new file mode 100644 index 000000000000..e1bdb97c07fa --- /dev/null +++ b/include/asm-i386/irqflags.h @@ -0,0 +1,127 @@ +/* + * include/asm-i386/irqflags.h + * + * IRQ flags handling + * + * This file gets included from lowlevel asm headers too, to provide + * wrapped versions of the local_irq_*() APIs, based on the + * raw_local_irq_*() functions from the lowlevel headers. + */ +#ifndef _ASM_IRQFLAGS_H +#define _ASM_IRQFLAGS_H + +#ifndef __ASSEMBLY__ + +static inline unsigned long __raw_local_save_flags(void) +{ + unsigned long flags; + + __asm__ __volatile__( + "pushfl ; popl %0" + : "=g" (flags) + : /* no input */ + ); + + return flags; +} + +#define raw_local_save_flags(flags) \ + do { (flags) = __raw_local_save_flags(); } while (0) + +static inline void raw_local_irq_restore(unsigned long flags) +{ + __asm__ __volatile__( + "pushl %0 ; popfl" + : /* no output */ + :"g" (flags) + :"memory", "cc" + ); +} + +static inline void raw_local_irq_disable(void) +{ + __asm__ __volatile__("cli" : : : "memory"); +} + +static inline void raw_local_irq_enable(void) +{ + __asm__ __volatile__("sti" : : : "memory"); +} + +/* + * Used in the idle loop; sti takes one instruction cycle + * to complete: + */ +static inline void raw_safe_halt(void) +{ + __asm__ __volatile__("sti; hlt" : : : "memory"); +} + +/* + * Used when interrupts are already enabled or to + * shutdown the processor: + */ +static inline void halt(void) +{ + __asm__ __volatile__("hlt": : :"memory"); +} + +static inline int raw_irqs_disabled_flags(unsigned long flags) +{ + return !(flags & (1 << 9)); +} + +static inline int raw_irqs_disabled(void) +{ + unsigned long flags = __raw_local_save_flags(); + + return raw_irqs_disabled_flags(flags); +} + +/* + * For spinlocks, etc: + */ +static inline unsigned long __raw_local_irq_save(void) +{ + unsigned long flags = __raw_local_save_flags(); + + raw_local_irq_disable(); + + return flags; +} + +#define raw_local_irq_save(flags) \ + do { (flags) = __raw_local_irq_save(); } while (0) + +#endif /* __ASSEMBLY__ */ + +/* + * Do the CPU's IRQ-state tracing from assembly code. We call a + * C function, so save all the C-clobbered registers: + */ +#ifdef CONFIG_TRACE_IRQFLAGS + +# define TRACE_IRQS_ON \ + pushl %eax; \ + pushl %ecx; \ + pushl %edx; \ + call trace_hardirqs_on; \ + popl %edx; \ + popl %ecx; \ + popl %eax; + +# define TRACE_IRQS_OFF \ + pushl %eax; \ + pushl %ecx; \ + pushl %edx; \ + call trace_hardirqs_off; \ + popl %edx; \ + popl %ecx; \ + popl %eax; + +#else +# define TRACE_IRQS_ON +# define TRACE_IRQS_OFF +#endif + +#endif diff --git a/include/asm-i386/rwsem.h b/include/asm-i386/rwsem.h index be4ab859238e..2f07601562e7 100644 --- a/include/asm-i386/rwsem.h +++ b/include/asm-i386/rwsem.h @@ -40,6 +40,7 @@ #include <linux/list.h> #include <linux/spinlock.h> +#include <linux/lockdep.h> struct rwsem_waiter; @@ -61,36 +62,34 @@ struct rw_semaphore { #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) spinlock_t wait_lock; struct list_head wait_list; -#if RWSEM_DEBUG - int debug; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; #endif }; -/* - * initialisation - */ -#if RWSEM_DEBUG -#define __RWSEM_DEBUG_INIT , 0 +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } #else -#define __RWSEM_DEBUG_INIT /* */ +# define __RWSEM_DEP_MAP_INIT(lockname) #endif + #define __RWSEM_INITIALIZER(name) \ { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) \ - __RWSEM_DEBUG_INIT } + __RWSEM_DEP_MAP_INIT(name) } #define DECLARE_RWSEM(name) \ struct rw_semaphore name = __RWSEM_INITIALIZER(name) -static inline void init_rwsem(struct rw_semaphore *sem) -{ - sem->count = RWSEM_UNLOCKED_VALUE; - spin_lock_init(&sem->wait_lock); - INIT_LIST_HEAD(&sem->wait_list); -#if RWSEM_DEBUG - sem->debug = 0; -#endif -} +extern void __init_rwsem(struct rw_semaphore *sem, const char *name, + struct lock_class_key *key); + +#define init_rwsem(sem) \ +do { \ + static struct lock_class_key __key; \ + \ + __init_rwsem((sem), #sem, &__key); \ +} while (0) /* * lock for reading @@ -143,7 +142,7 @@ LOCK_PREFIX " cmpxchgl %2,%0\n\t" /* * lock for writing */ -static inline void __down_write(struct rw_semaphore *sem) +static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) { int tmp; @@ -167,6 +166,11 @@ LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns the : "memory", "cc"); } +static inline void __down_write(struct rw_semaphore *sem) +{ + __down_write_nested(sem, 0); +} + /* * trylock for writing -- returns 1 if successful, 0 if contention */ diff --git a/include/asm-i386/spinlock.h b/include/asm-i386/spinlock.h index 04ba30234c48..87c40f830653 100644 --- a/include/asm-i386/spinlock.h +++ b/include/asm-i386/spinlock.h @@ -31,6 +31,11 @@ "jmp 1b\n" \ "3:\n\t" +/* + * NOTE: there's an irqs-on section here, which normally would have to be + * irq-traced, but on CONFIG_TRACE_IRQFLAGS we never use + * __raw_spin_lock_string_flags(). + */ #define __raw_spin_lock_string_flags \ "\n1:\t" \ "lock ; decb %0\n\t" \ @@ -63,6 +68,12 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock) "=m" (lock->slock) : : "memory"); } +/* + * It is easier for the lock validator if interrupts are not re-enabled + * in the middle of a lock-acquire. This is a performance feature anyway + * so we turn it off: + */ +#ifndef CONFIG_PROVE_LOCKING static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags) { alternative_smp( @@ -70,6 +81,7 @@ static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long fla __raw_spin_lock_string_up, "=m" (lock->slock) : "r" (flags) : "memory"); } +#endif static inline int __raw_spin_trylock(raw_spinlock_t *lock) { diff --git a/include/asm-i386/system.h b/include/asm-i386/system.h index cab0180567f9..db398d88b1d9 100644 --- a/include/asm-i386/system.h +++ b/include/asm-i386/system.h @@ -456,25 +456,7 @@ static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long l #define set_wmb(var, value) do { var = value; wmb(); } while (0) -/* interrupt control.. */ -#define local_save_flags(x) do { typecheck(unsigned long,x); __asm__ __volatile__("pushfl ; popl %0":"=g" (x): /* no input */); } while (0) -#define local_irq_restore(x) do { typecheck(unsigned long,x); __asm__ __volatile__("pushl %0 ; popfl": /* no output */ :"g" (x):"memory", "cc"); } while (0) -#define local_irq_disable() __asm__ __volatile__("cli": : :"memory") -#define local_irq_enable() __asm__ __volatile__("sti": : :"memory") -/* used in the idle loop; sti takes one instruction cycle to complete */ -#define safe_halt() __asm__ __volatile__("sti; hlt": : :"memory") -/* used when interrupts are already enabled or to shutdown the processor */ -#define halt() __asm__ __volatile__("hlt": : :"memory") - -#define irqs_disabled() \ -({ \ - unsigned long flags; \ - local_save_flags(flags); \ - !(flags & (1<<9)); \ -}) - -/* For spinlocks etc */ -#define local_irq_save(x) __asm__ __volatile__("pushfl ; popl %0 ; cli":"=g" (x): /* no input */ :"memory") +#include <linux/irqflags.h> /* * disable hlt during certain critical i/o operations diff --git a/include/asm-ia64/irq.h b/include/asm-ia64/irq.h index 8acb00190d5a..79479e2c6966 100644 --- a/include/asm-ia64/irq.h +++ b/include/asm-ia64/irq.h @@ -14,8 +14,6 @@ #define NR_IRQS 256 #define NR_IRQ_VECTORS NR_IRQS -#define IRQF_PERCPU 0x02000000 - static __inline__ int irq_canonicalize (int irq) { diff --git a/include/asm-ia64/percpu.h b/include/asm-ia64/percpu.h index 24d898b650c5..fbe5cf3ab8dc 100644 --- a/include/asm-ia64/percpu.h +++ b/include/asm-ia64/percpu.h @@ -36,6 +36,7 @@ #ifdef CONFIG_SMP extern unsigned long __per_cpu_offset[NR_CPUS]; +#define per_cpu_offset(x) (__per_cpu_offset(x)) /* Equal to __per_cpu_offset[smp_processor_id()], but faster to access: */ DECLARE_PER_CPU(unsigned long, local_per_cpu_offset); diff --git a/include/asm-ia64/rwsem.h b/include/asm-ia64/rwsem.h index 1327c91ea39c..2d1640cc240a 100644 --- a/include/asm-ia64/rwsem.h +++ b/include/asm-ia64/rwsem.h @@ -33,9 +33,6 @@ struct rw_semaphore { signed long count; spinlock_t wait_lock; struct list_head wait_list; -#if RWSEM_DEBUG - int debug; -#endif }; #define RWSEM_UNLOCKED_VALUE __IA64_UL_CONST(0x0000000000000000) @@ -45,19 +42,9 @@ struct rw_semaphore { #define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) -/* - * initialization - */ -#if RWSEM_DEBUG -#define __RWSEM_DEBUG_INIT , 0 -#else -#define __RWSEM_DEBUG_INIT /* */ -#endif - #define __RWSEM_INITIALIZER(name) \ { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ - LIST_HEAD_INIT((name).wait_list) \ - __RWSEM_DEBUG_INIT } + LIST_HEAD_INIT((name).wait_list) } #define DECLARE_RWSEM(name) \ struct rw_semaphore name = __RWSEM_INITIALIZER(name) @@ -73,9 +60,6 @@ init_rwsem (struct rw_semaphore *sem) sem->count = RWSEM_UNLOCKED_VALUE; spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); -#if RWSEM_DEBUG - sem->debug = 0; -#endif } /* diff --git a/include/asm-ia64/thread_info.h b/include/asm-ia64/thread_info.h index 8bc9869e5765..8adcde0934ca 100644 --- a/include/asm-ia64/thread_info.h +++ b/include/asm-ia64/thread_info.h @@ -68,7 +68,7 @@ struct thread_info { #define end_of_stack(p) (unsigned long *)((void *)(p) + IA64_RBS_OFFSET) #define __HAVE_ARCH_TASK_STRUCT_ALLOCATOR -#define alloc_task_struct() ((task_t *)__get_free_pages(GFP_KERNEL | __GFP_COMP, KERNEL_STACK_SIZE_ORDER)) +#define alloc_task_struct() ((struct task_struct *)__get_free_pages(GFP_KERNEL | __GFP_COMP, KERNEL_STACK_SIZE_ORDER)) #define free_task_struct(tsk) free_pages((unsigned long) (tsk), KERNEL_STACK_SIZE_ORDER) #endif /* !__ASSEMBLY */ diff --git a/include/asm-m32r/system.h b/include/asm-m32r/system.h index 66c4742f09e7..311cebf44eff 100644 --- a/include/asm-m32r/system.h +++ b/include/asm-m32r/system.h @@ -18,7 +18,7 @@ * switch_to(prev, next) should switch from task `prev' to `next' * `prev' will never be the same as `next'. * - * `next' and `prev' should be task_t, but it isn't always defined + * `next' and `prev' should be struct task_struct, but it isn't always defined */ #define switch_to(prev, next, last) do { \ diff --git a/include/asm-powerpc/i8259.h b/include/asm-powerpc/i8259.h index 0392159e16e4..c80e113052cd 100644 --- a/include/asm-powerpc/i8259.h +++ b/include/asm-powerpc/i8259.h @@ -4,11 +4,13 @@ #include <linux/irq.h> -extern struct hw_interrupt_type i8259_pic; - +#ifdef CONFIG_PPC_MERGE +extern void i8259_init(struct device_node *node, unsigned long intack_addr); +extern unsigned int i8259_irq(struct pt_regs *regs); +#else extern void i8259_init(unsigned long intack_addr, int offset); extern int i8259_irq(struct pt_regs *regs); -extern int i8259_irq_cascade(struct pt_regs *regs, void *unused); +#endif #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_I8259_H */ diff --git a/include/asm-powerpc/irq.h b/include/asm-powerpc/irq.h index eb5f33e1977a..e05754752028 100644 --- a/include/asm-powerpc/irq.h +++ b/include/asm-powerpc/irq.h @@ -9,26 +9,14 @@ * 2 of the License, or (at your option) any later version. */ +#include <linux/config.h> #include <linux/threads.h> +#include <linux/list.h> +#include <linux/radix-tree.h> #include <asm/types.h> #include <asm/atomic.h> -/* this number is used when no interrupt has been assigned */ -#define NO_IRQ (-1) - -/* - * These constants are used for passing information about interrupt - * signal polarity and level/edge sensing to the low-level PIC chip - * drivers. - */ -#define IRQ_SENSE_MASK 0x1 -#define IRQ_SENSE_LEVEL 0x1 /* interrupt on active level */ -#define IRQ_SENSE_EDGE 0x0 /* interrupt triggered by edge */ - -#define IRQ_POLARITY_MASK 0x2 -#define IRQ_POLARITY_POSITIVE 0x2 /* high level or low->high edge */ -#define IRQ_POLARITY_NEGATIVE 0x0 /* low level or high->low edge */ #define get_irq_desc(irq) (&irq_desc[(irq)]) @@ -36,50 +24,325 @@ #define for_each_irq(i) \ for ((i) = 0; (i) < NR_IRQS; ++(i)) -#ifdef CONFIG_PPC64 +extern atomic_t ppc_n_lost_interrupts; -/* - * Maximum number of interrupt sources that we can handle. +#ifdef CONFIG_PPC_MERGE + +/* This number is used when no interrupt has been assigned */ +#define NO_IRQ (0) + +/* This is a special irq number to return from get_irq() to tell that + * no interrupt happened _and_ ignore it (don't count it as bad). Some + * platforms like iSeries rely on that. */ +#define NO_IRQ_IGNORE ((unsigned int)-1) + +/* Total number of virq in the platform (make it a CONFIG_* option ? */ #define NR_IRQS 512 -/* Interrupt numbers are virtual in case they are sparsely - * distributed by the hardware. +/* Number of irqs reserved for the legacy controller */ +#define NUM_ISA_INTERRUPTS 16 + +/* This type is the placeholder for a hardware interrupt number. It has to + * be big enough to enclose whatever representation is used by a given + * platform. + */ +typedef unsigned long irq_hw_number_t; + +/* Interrupt controller "host" data structure. This could be defined as a + * irq domain controller. That is, it handles the mapping between hardware + * and virtual interrupt numbers for a given interrupt domain. The host + * structure is generally created by the PIC code for a given PIC instance + * (though a host can cover more than one PIC if they have a flat number + * model). It's the host callbacks that are responsible for setting the + * irq_chip on a given irq_desc after it's been mapped. + * + * The host code and data structures are fairly agnostic to the fact that + * we use an open firmware device-tree. We do have references to struct + * device_node in two places: in irq_find_host() to find the host matching + * a given interrupt controller node, and of course as an argument to its + * counterpart host->ops->match() callback. However, those are treated as + * generic pointers by the core and the fact that it's actually a device-node + * pointer is purely a convention between callers and implementation. This + * code could thus be used on other architectures by replacing those two + * by some sort of arch-specific void * "token" used to identify interrupt + * controllers. */ -extern unsigned int virt_irq_to_real_map[NR_IRQS]; +struct irq_host; +struct radix_tree_root; -/* The maximum virtual IRQ number that we support. This - * can be set by the platform and will be reduced by the - * value of __irq_offset_value. It defaults to and is - * capped by (NR_IRQS - 1). +/* Functions below are provided by the host and called whenever a new mapping + * is created or an old mapping is disposed. The host can then proceed to + * whatever internal data structures management is required. It also needs + * to setup the irq_desc when returning from map(). */ -extern unsigned int virt_irq_max; +struct irq_host_ops { + /* Match an interrupt controller device node to a host, returns + * 1 on a match + */ + int (*match)(struct irq_host *h, struct device_node *node); + + /* Create or update a mapping between a virtual irq number and a hw + * irq number. This can be called several times for the same mapping + * but with different flags, though unmap shall always be called + * before the virq->hw mapping is changed. + */ + int (*map)(struct irq_host *h, unsigned int virq, + irq_hw_number_t hw, unsigned int flags); + + /* Dispose of such a mapping */ + void (*unmap)(struct irq_host *h, unsigned int virq); + + /* Translate device-tree interrupt specifier from raw format coming + * from the firmware to a irq_hw_number_t (interrupt line number) and + * trigger flags that can be passed to irq_create_mapping(). + * If no translation is provided, raw format is assumed to be one cell + * for interrupt line and default sense. + */ + int (*xlate)(struct irq_host *h, struct device_node *ctrler, + u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_flags); +}; + +struct irq_host { + struct list_head link; + + /* type of reverse mapping technique */ + unsigned int revmap_type; +#define IRQ_HOST_MAP_LEGACY 0 /* legacy 8259, gets irqs 1..15 */ +#define IRQ_HOST_MAP_NOMAP 1 /* no fast reverse mapping */ +#define IRQ_HOST_MAP_LINEAR 2 /* linear map of interrupts */ +#define IRQ_HOST_MAP_TREE 3 /* radix tree */ + union { + struct { + unsigned int size; + unsigned int *revmap; + } linear; + struct radix_tree_root tree; + } revmap_data; + struct irq_host_ops *ops; + void *host_data; + irq_hw_number_t inval_irq; +}; + +/* The main irq map itself is an array of NR_IRQ entries containing the + * associate host and irq number. An entry with a host of NULL is free. + * An entry can be allocated if it's free, the allocator always then sets + * hwirq first to the host's invalid irq number and then fills ops. + */ +struct irq_map_entry { + irq_hw_number_t hwirq; + struct irq_host *host; +}; + +extern struct irq_map_entry irq_map[NR_IRQS]; + -/* Create a mapping for a real_irq if it doesn't already exist. - * Return the virtual irq as a convenience. +/*** + * irq_alloc_host - Allocate a new irq_host data structure + * @node: device-tree node of the interrupt controller + * @revmap_type: type of reverse mapping to use + * @revmap_arg: for IRQ_HOST_MAP_LINEAR linear only: size of the map + * @ops: map/unmap host callbacks + * @inval_irq: provide a hw number in that host space that is always invalid + * + * Allocates and initialize and irq_host structure. Note that in the case of + * IRQ_HOST_MAP_LEGACY, the map() callback will be called before this returns + * for all legacy interrupts except 0 (which is always the invalid irq for + * a legacy controller). For a IRQ_HOST_MAP_LINEAR, the map is allocated by + * this call as well. For a IRQ_HOST_MAP_TREE, the radix tree will be allocated + * later during boot automatically (the reverse mapping will use the slow path + * until that happens). + */ +extern struct irq_host *irq_alloc_host(unsigned int revmap_type, + unsigned int revmap_arg, + struct irq_host_ops *ops, + irq_hw_number_t inval_irq); + + +/*** + * irq_find_host - Locates a host for a given device node + * @node: device-tree node of the interrupt controller + */ +extern struct irq_host *irq_find_host(struct device_node *node); + + +/*** + * irq_set_default_host - Set a "default" host + * @host: default host pointer + * + * For convenience, it's possible to set a "default" host that will be used + * whenever NULL is passed to irq_create_mapping(). It makes life easier for + * platforms that want to manipulate a few hard coded interrupt numbers that + * aren't properly represented in the device-tree. + */ +extern void irq_set_default_host(struct irq_host *host); + + +/*** + * irq_set_virq_count - Set the maximum number of virt irqs + * @count: number of linux virtual irqs, capped with NR_IRQS + * + * This is mainly for use by platforms like iSeries who want to program + * the virtual irq number in the controller to avoid the reverse mapping + */ +extern void irq_set_virq_count(unsigned int count); + + +/*** + * irq_create_mapping - Map a hardware interrupt into linux virq space + * @host: host owning this hardware interrupt or NULL for default host + * @hwirq: hardware irq number in that host space + * @flags: flags passed to the controller. contains the trigger type among + * others. Use IRQ_TYPE_* defined in include/linux/irq.h + * + * Only one mapping per hardware interrupt is permitted. Returns a linux + * virq number. The flags can be used to provide sense information to the + * controller (typically extracted from the device-tree). If no information + * is passed, the controller defaults will apply (for example, xics can only + * do edge so flags are irrelevant for some pseries specific irqs). + * + * The device-tree generally contains the trigger info in an encoding that is + * specific to a given type of controller. In that case, you can directly use + * host->ops->trigger_xlate() to translate that. + * + * It is recommended that new PICs that don't have existing OF bindings chose + * to use a representation of triggers identical to linux. + */ +extern unsigned int irq_create_mapping(struct irq_host *host, + irq_hw_number_t hwirq, + unsigned int flags); + + +/*** + * irq_dispose_mapping - Unmap an interrupt + * @virq: linux virq number of the interrupt to unmap + */ +extern void irq_dispose_mapping(unsigned int virq); + +/*** + * irq_find_mapping - Find a linux virq from an hw irq number. + * @host: host owning this hardware interrupt + * @hwirq: hardware irq number in that host space + * + * This is a slow path, for use by generic code. It's expected that an + * irq controller implementation directly calls the appropriate low level + * mapping function. */ -int virt_irq_create_mapping(unsigned int real_irq); -void virt_irq_init(void); +extern unsigned int irq_find_mapping(struct irq_host *host, + irq_hw_number_t hwirq); -static inline unsigned int virt_irq_to_real(unsigned int virt_irq) + +/*** + * irq_radix_revmap - Find a linux virq from a hw irq number. + * @host: host owning this hardware interrupt + * @hwirq: hardware irq number in that host space + * + * This is a fast path, for use by irq controller code that uses radix tree + * revmaps + */ +extern unsigned int irq_radix_revmap(struct irq_host *host, + irq_hw_number_t hwirq); + +/*** + * irq_linear_revmap - Find a linux virq from a hw irq number. + * @host: host owning this hardware interrupt + * @hwirq: hardware irq number in that host space + * + * This is a fast path, for use by irq controller code that uses linear + * revmaps. It does fallback to the slow path if the revmap doesn't exist + * yet and will create the revmap entry with appropriate locking + */ + +extern unsigned int irq_linear_revmap(struct irq_host *host, + irq_hw_number_t hwirq); + + + +/*** + * irq_alloc_virt - Allocate virtual irq numbers + * @host: host owning these new virtual irqs + * @count: number of consecutive numbers to allocate + * @hint: pass a hint number, the allocator will try to use a 1:1 mapping + * + * This is a low level function that is used internally by irq_create_mapping() + * and that can be used by some irq controllers implementations for things + * like allocating ranges of numbers for MSIs. The revmaps are left untouched. + */ +extern unsigned int irq_alloc_virt(struct irq_host *host, + unsigned int count, + unsigned int hint); + +/*** + * irq_free_virt - Free virtual irq numbers + * @virq: virtual irq number of the first interrupt to free + * @count: number of interrupts to free + * + * This function is the opposite of irq_alloc_virt. It will not clear reverse + * maps, this should be done previously by unmap'ing the interrupt. In fact, + * all interrupts covered by the range being freed should have been unmapped + * prior to calling this. + */ +extern void irq_free_virt(unsigned int virq, unsigned int count); + + +/* -- OF helpers -- */ + +/* irq_create_of_mapping - Map a hardware interrupt into linux virq space + * @controller: Device node of the interrupt controller + * @inspec: Interrupt specifier from the device-tree + * @intsize: Size of the interrupt specifier from the device-tree + * + * This function is identical to irq_create_mapping except that it takes + * as input informations straight from the device-tree (typically the results + * of the of_irq_map_*() functions + */ +extern unsigned int irq_create_of_mapping(struct device_node *controller, + u32 *intspec, unsigned int intsize); + + +/* irq_of_parse_and_map - Parse nad Map an interrupt into linux virq space + * @device: Device node of the device whose interrupt is to be mapped + * @index: Index of the interrupt to map + * + * This function is a wrapper that chains of_irq_map_one() and + * irq_create_of_mapping() to make things easier to callers + */ +extern unsigned int irq_of_parse_and_map(struct device_node *dev, int index); + +/* -- End OF helpers -- */ + +/*** + * irq_early_init - Init irq remapping subsystem + */ +extern void irq_early_init(void); + +static __inline__ int irq_canonicalize(int irq) { - return virt_irq_to_real_map[virt_irq]; + return irq; } -extern unsigned int real_irq_to_virt_slowpath(unsigned int real_irq); + +#else /* CONFIG_PPC_MERGE */ + +/* This number is used when no interrupt has been assigned */ +#define NO_IRQ (-1) +#define NO_IRQ_IGNORE (-2) + /* - * List of interrupt controllers. + * These constants are used for passing information about interrupt + * signal polarity and level/edge sensing to the low-level PIC chip + * drivers. */ -#define IC_INVALID 0 -#define IC_OPEN_PIC 1 -#define IC_PPC_XIC 2 -#define IC_CELL_PIC 3 -#define IC_ISERIES 4 +#define IRQ_SENSE_MASK 0x1 +#define IRQ_SENSE_LEVEL 0x1 /* interrupt on active level */ +#define IRQ_SENSE_EDGE 0x0 /* interrupt triggered by edge */ -extern u64 ppc64_interrupt_controller; +#define IRQ_POLARITY_MASK 0x2 +#define IRQ_POLARITY_POSITIVE 0x2 /* high level or low->high edge */ +#define IRQ_POLARITY_NEGATIVE 0x0 /* low level or high->low edge */ -#else /* 32-bit */ #if defined(CONFIG_40x) #include <asm/ibm4xx.h> @@ -512,16 +775,11 @@ extern u64 ppc64_interrupt_controller; #endif /* CONFIG_8260 */ -#endif +#endif /* Whatever way too big #ifdef */ #define NR_MASK_WORDS ((NR_IRQS + 31) / 32) /* pedantic: these are long because they are used with set_bit --RR */ extern unsigned long ppc_cached_irq_mask[NR_MASK_WORDS]; -extern atomic_t ppc_n_lost_interrupts; - -#define virt_irq_create_mapping(x) (x) - -#endif /* * Because many systems have two overlapping names spaces for @@ -560,6 +818,7 @@ static __inline__ int irq_canonicalize(int irq) irq = 9; return irq; } +#endif /* CONFIG_PPC_MERGE */ extern int distribute_irqs; @@ -579,9 +838,8 @@ extern struct thread_info *softirq_ctx[NR_CPUS]; extern void irq_ctx_init(void); extern void call_do_softirq(struct thread_info *tp); -extern int call___do_IRQ(int irq, struct pt_regs *regs, - struct thread_info *tp); - +extern int call_handle_irq(int irq, void *p1, void *p2, + struct thread_info *tp, void *func); #else #define irq_ctx_init() diff --git a/include/asm-powerpc/irqflags.h b/include/asm-powerpc/irqflags.h new file mode 100644 index 000000000000..7970cbaeaa54 --- /dev/null +++ b/include/asm-powerpc/irqflags.h @@ -0,0 +1,31 @@ +/* + * include/asm-powerpc/irqflags.h + * + * IRQ flags handling + * + * This file gets included from lowlevel asm headers too, to provide + * wrapped versions of the local_irq_*() APIs, based on the + * raw_local_irq_*() macros from the lowlevel headers. + */ +#ifndef _ASM_IRQFLAGS_H +#define _ASM_IRQFLAGS_H + +/* + * Get definitions for raw_local_save_flags(x), etc. + */ +#include <asm-powerpc/hw_irq.h> + +/* + * Do the CPU's IRQ-state tracing from assembly code. We call a + * C function, so save all the C-clobbered registers: + */ +#ifdef CONFIG_TRACE_IRQFLAGS + +#error No support on PowerPC yet for CONFIG_TRACE_IRQFLAGS + +#else +# define TRACE_IRQS_ON +# define TRACE_IRQS_OFF +#endif + +#endif diff --git a/include/asm-powerpc/machdep.h b/include/asm-powerpc/machdep.h index eba133d149a7..c17c13742401 100644 --- a/include/asm-powerpc/machdep.h +++ b/include/asm-powerpc/machdep.h @@ -97,7 +97,7 @@ struct machdep_calls { void (*show_percpuinfo)(struct seq_file *m, int i); void (*init_IRQ)(void); - int (*get_irq)(struct pt_regs *); + unsigned int (*get_irq)(struct pt_regs *); #ifdef CONFIG_KEXEC void (*kexec_cpu_down)(int crash_shutdown, int secondary); #endif diff --git a/include/asm-powerpc/mpic.h b/include/asm-powerpc/mpic.h index f0d22ac34b96..eb241c99c457 100644 --- a/include/asm-powerpc/mpic.h +++ b/include/asm-powerpc/mpic.h @@ -114,9 +114,6 @@ #define MPIC_VEC_TIMER_1 248 #define MPIC_VEC_TIMER_0 247 -/* Type definition of the cascade handler */ -typedef int (*mpic_cascade_t)(struct pt_regs *regs, void *data); - #ifdef CONFIG_MPIC_BROKEN_U3 /* Fixup table entry */ struct mpic_irq_fixup @@ -132,10 +129,19 @@ struct mpic_irq_fixup /* The instance data of a given MPIC */ struct mpic { + /* The device node of the interrupt controller */ + struct device_node *of_node; + + /* The remapper for this MPIC */ + struct irq_host *irqhost; + /* The "linux" controller struct */ - hw_irq_controller hc_irq; + struct irq_chip hc_irq; +#ifdef CONFIG_MPIC_BROKEN_U3 + struct irq_chip hc_ht_irq; +#endif #ifdef CONFIG_SMP - hw_irq_controller hc_ipi; + struct irq_chip hc_ipi; #endif const char *name; /* Flags */ @@ -144,20 +150,12 @@ struct mpic unsigned int isu_size; unsigned int isu_shift; unsigned int isu_mask; - /* Offset of irq vector numbers */ - unsigned int irq_offset; unsigned int irq_count; - /* Offset of ipi vector numbers */ - unsigned int ipi_offset; /* Number of sources */ unsigned int num_sources; /* Number of CPUs */ unsigned int num_cpus; - /* cascade handler */ - mpic_cascade_t cascade; - void *cascade_data; - unsigned int cascade_vec; - /* senses array */ + /* default senses array */ unsigned char *senses; unsigned int senses_count; @@ -213,14 +211,11 @@ struct mpic * The values in the array start at the first source of the MPIC, * that is senses[0] correspond to linux irq "irq_offset". */ -extern struct mpic *mpic_alloc(unsigned long phys_addr, +extern struct mpic *mpic_alloc(struct device_node *node, + unsigned long phys_addr, unsigned int flags, unsigned int isu_size, - unsigned int irq_offset, unsigned int irq_count, - unsigned int ipi_offset, - unsigned char *senses, - unsigned int senses_num, const char *name); /* Assign ISUs, to call before mpic_init() @@ -232,22 +227,27 @@ extern struct mpic *mpic_alloc(unsigned long phys_addr, extern void mpic_assign_isu(struct mpic *mpic, unsigned int isu_num, unsigned long phys_addr); +/* Set default sense codes + * + * @mpic: controller + * @senses: array of sense codes + * @count: size of above array + * + * Optionally provide an array (indexed on hardware interrupt numbers + * for this MPIC) of default sense codes for the chip. Those are linux + * sense codes IRQ_TYPE_* + * + * The driver gets ownership of the pointer, don't dispose of it or + * anything like that. __init only. + */ +extern void mpic_set_default_senses(struct mpic *mpic, u8 *senses, int count); + + /* Initialize the controller. After this has been called, none of the above * should be called again for this mpic */ extern void mpic_init(struct mpic *mpic); -/* Setup a cascade. Currently, only one cascade is supported this - * way, though you can always do a normal request_irq() and add - * other cascades this way. You should call this _after_ having - * added all the ISUs - * - * @irq_no: "linux" irq number of the cascade (that is offset'ed vector) - * @handler: cascade handler function - */ -extern void mpic_setup_cascade(unsigned int irq_no, mpic_cascade_t hanlder, - void *data); - /* * All of the following functions must only be used after the * ISUs have been assigned and the controller fully initialized @@ -284,9 +284,9 @@ extern void mpic_send_ipi(unsigned int ipi_no, unsigned int cpu_mask); void smp_mpic_message_pass(int target, int msg); /* Fetch interrupt from a given mpic */ -extern int mpic_get_one_irq(struct mpic *mpic, struct pt_regs *regs); +extern unsigned int mpic_get_one_irq(struct mpic *mpic, struct pt_regs *regs); /* This one gets to the primary mpic */ -extern int mpic_get_irq(struct pt_regs *regs); +extern unsigned int mpic_get_irq(struct pt_regs *regs); /* Set the EPIC clock ratio */ void mpic_set_clk_ratio(struct mpic *mpic, u32 clock_ratio); @@ -294,8 +294,5 @@ void mpic_set_clk_ratio(struct mpic *mpic, u32 clock_ratio); /* Enable/Disable EPIC serial interrupt mode */ void mpic_set_serial_int(struct mpic *mpic, int enable); -/* global mpic for pSeries */ -extern struct mpic *pSeries_mpic; - #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_MPIC_H */ diff --git a/include/asm-powerpc/percpu.h b/include/asm-powerpc/percpu.h index faa1fc703053..2f2e3024fa61 100644 --- a/include/asm-powerpc/percpu.h +++ b/include/asm-powerpc/percpu.h @@ -14,6 +14,7 @@ #define __per_cpu_offset(cpu) (paca[cpu].data_offset) #define __my_cpu_offset() get_paca()->data_offset +#define per_cpu_offset(x) (__per_cpu_offset(x)) /* Separate out the type, so (int[3], foo) works. */ #define DEFINE_PER_CPU(type, name) \ diff --git a/include/asm-powerpc/prom.h b/include/asm-powerpc/prom.h index 010d186d095b..b095a285c84b 100644 --- a/include/asm-powerpc/prom.h +++ b/include/asm-powerpc/prom.h @@ -64,11 +64,6 @@ struct boot_param_header typedef u32 phandle; typedef u32 ihandle; -struct interrupt_info { - int line; - int sense; /* +ve/-ve logic, edge or level, etc. */ -}; - struct property { char *name; int length; @@ -81,8 +76,6 @@ struct device_node { char *type; phandle node; phandle linux_phandle; - int n_intrs; - struct interrupt_info *intrs; char *full_name; struct property *properties; @@ -167,8 +160,8 @@ extern void unflatten_device_tree(void); extern void early_init_devtree(void *); extern int device_is_compatible(struct device_node *device, const char *); extern int machine_is_compatible(const char *compat); -extern unsigned char *get_property(struct device_node *node, const char *name, - int *lenp); +extern void *get_property(struct device_node *node, const char *name, + int *lenp); extern void print_properties(struct device_node *node); extern int prom_n_addr_cells(struct device_node* np); extern int prom_n_size_cells(struct device_node* np); @@ -204,6 +197,15 @@ extern int release_OF_resource(struct device_node* node, int index); */ +/* Helper to read a big number */ +static inline u64 of_read_number(u32 *cell, int size) +{ + u64 r = 0; + while (size--) + r = (r << 32) | *(cell++); + return r; +} + /* Translate an OF address block into a CPU physical address */ #define OF_BAD_ADDR ((u64)-1) @@ -240,5 +242,83 @@ extern void kdump_move_device_tree(void); /* CPU OF node matching */ struct device_node *of_get_cpu_node(int cpu, unsigned int *thread); + +/* + * OF interrupt mapping + */ + +/* This structure is returned when an interrupt is mapped. The controller + * field needs to be put() after use + */ + +#define OF_MAX_IRQ_SPEC 4 /* We handle specifiers of at most 4 cells */ + +struct of_irq { + struct device_node *controller; /* Interrupt controller node */ + u32 size; /* Specifier size */ + u32 specifier[OF_MAX_IRQ_SPEC]; /* Specifier copy */ +}; + +/*** + * of_irq_map_init - Initialize the irq remapper + * @flags: flags defining workarounds to enable + * + * Some machines have bugs in the device-tree which require certain workarounds + * to be applied. Call this before any interrupt mapping attempts to enable + * those workarounds. + */ +#define OF_IMAP_OLDWORLD_MAC 0x00000001 +#define OF_IMAP_NO_PHANDLE 0x00000002 + +extern void of_irq_map_init(unsigned int flags); + +/*** + * of_irq_map_raw - Low level interrupt tree parsing + * @parent: the device interrupt parent + * @intspec: interrupt specifier ("interrupts" property of the device) + * @addr: address specifier (start of "reg" property of the device) + * @out_irq: structure of_irq filled by this function + * + * Returns 0 on success and a negative number on error + * + * This function is a low-level interrupt tree walking function. It + * can be used to do a partial walk with synthetized reg and interrupts + * properties, for example when resolving PCI interrupts when no device + * node exist for the parent. + * + */ + +extern int of_irq_map_raw(struct device_node *parent, u32 *intspec, u32 *addr, + struct of_irq *out_irq); + + +/*** + * of_irq_map_one - Resolve an interrupt for a device + * @device: the device whose interrupt is to be resolved + * @index: index of the interrupt to resolve + * @out_irq: structure of_irq filled by this function + * + * This function resolves an interrupt, walking the tree, for a given + * device-tree node. It's the high level pendant to of_irq_map_raw(). + * It also implements the workarounds for OldWolrd Macs. + */ +extern int of_irq_map_one(struct device_node *device, int index, + struct of_irq *out_irq); + +/*** + * of_irq_map_pci - Resolve the interrupt for a PCI device + * @pdev: the device whose interrupt is to be resolved + * @out_irq: structure of_irq filled by this function + * + * This function resolves the PCI interrupt for a given PCI device. If a + * device-node exists for a given pci_dev, it will use normal OF tree + * walking. If not, it will implement standard swizzling and walk up the + * PCI tree until an device-node is found, at which point it will finish + * resolving using the OF tree walking. + */ +struct pci_dev; +extern int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq); + + #endif /* __KERNEL__ */ #endif /* _POWERPC_PROM_H */ diff --git a/include/asm-powerpc/rwsem.h b/include/asm-powerpc/rwsem.h index 2c2fe9647595..e929145e1e46 100644 --- a/include/asm-powerpc/rwsem.h +++ b/include/asm-powerpc/rwsem.h @@ -28,24 +28,11 @@ struct rw_semaphore { #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) spinlock_t wait_lock; struct list_head wait_list; -#if RWSEM_DEBUG - int debug; -#endif }; -/* - * initialisation - */ -#if RWSEM_DEBUG -#define __RWSEM_DEBUG_INIT , 0 -#else -#define __RWSEM_DEBUG_INIT /* */ -#endif - #define __RWSEM_INITIALIZER(name) \ { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ - LIST_HEAD_INIT((name).wait_list) \ - __RWSEM_DEBUG_INIT } + LIST_HEAD_INIT((name).wait_list) } #define DECLARE_RWSEM(name) \ struct rw_semaphore name = __RWSEM_INITIALIZER(name) @@ -60,9 +47,6 @@ static inline void init_rwsem(struct rw_semaphore *sem) sem->count = RWSEM_UNLOCKED_VALUE; spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); -#if RWSEM_DEBUG - sem->debug = 0; -#endif } /* diff --git a/include/asm-powerpc/spu.h b/include/asm-powerpc/spu.h index 9609d3ee8798..c02d105d8294 100644 --- a/include/asm-powerpc/spu.h +++ b/include/asm-powerpc/spu.h @@ -117,6 +117,7 @@ struct spu { struct list_head sched_list; int number; int nid; + unsigned int irqs[3]; u32 isrc; u32 node; u64 flags; diff --git a/include/asm-s390/irqflags.h b/include/asm-s390/irqflags.h new file mode 100644 index 000000000000..65f4db627e7a --- /dev/null +++ b/include/asm-s390/irqflags.h @@ -0,0 +1,50 @@ +/* + * include/asm-s390/irqflags.h + * + * Copyright (C) IBM Corp. 2006 + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + */ + +#ifndef __ASM_IRQFLAGS_H +#define __ASM_IRQFLAGS_H + +#ifdef __KERNEL__ + +/* interrupt control.. */ +#define raw_local_irq_enable() ({ \ + unsigned long __dummy; \ + __asm__ __volatile__ ( \ + "stosm 0(%1),0x03" \ + : "=m" (__dummy) : "a" (&__dummy) : "memory" ); \ + }) + +#define raw_local_irq_disable() ({ \ + unsigned long __flags; \ + __asm__ __volatile__ ( \ + "stnsm 0(%1),0xfc" : "=m" (__flags) : "a" (&__flags) ); \ + __flags; \ + }) + +#define raw_local_save_flags(x) \ + __asm__ __volatile__("stosm 0(%1),0" : "=m" (x) : "a" (&x), "m" (x) ) + +#define raw_local_irq_restore(x) \ + __asm__ __volatile__("ssm 0(%0)" : : "a" (&x), "m" (x) : "memory") + +#define raw_irqs_disabled() \ +({ \ + unsigned long flags; \ + local_save_flags(flags); \ + !((flags >> __FLAG_SHIFT) & 3); \ +}) + +static inline int raw_irqs_disabled_flags(unsigned long flags) +{ + return !((flags >> __FLAG_SHIFT) & 3); +} + +/* For spinlocks etc */ +#define raw_local_irq_save(x) ((x) = raw_local_irq_disable()) + +#endif /* __KERNEL__ */ +#endif /* __ASM_IRQFLAGS_H */ diff --git a/include/asm-s390/percpu.h b/include/asm-s390/percpu.h index d9a8cca9b653..28b3517e787c 100644 --- a/include/asm-s390/percpu.h +++ b/include/asm-s390/percpu.h @@ -42,6 +42,7 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; #define __get_cpu_var(var) __reloc_hide(var,S390_lowcore.percpu_offset) #define __raw_get_cpu_var(var) __reloc_hide(var,S390_lowcore.percpu_offset) #define per_cpu(var,cpu) __reloc_hide(var,__per_cpu_offset[cpu]) +#define per_cpu_offset(x) (__per_cpu_offset[x]) /* A macro to avoid #include hell... */ #define percpu_modcopy(pcpudst, src, size) \ diff --git a/include/asm-s390/rwsem.h b/include/asm-s390/rwsem.h index 0422a085dd56..13ec16965150 100644 --- a/include/asm-s390/rwsem.h +++ b/include/asm-s390/rwsem.h @@ -61,6 +61,9 @@ struct rw_semaphore { signed long count; spinlock_t wait_lock; struct list_head wait_list; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif }; #ifndef __s390x__ @@ -80,8 +83,16 @@ struct rw_semaphore { /* * initialisation */ + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } +#else +# define __RWSEM_DEP_MAP_INIT(lockname) +#endif + #define __RWSEM_INITIALIZER(name) \ -{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) } +{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) \ + __RWSEM_DEP_MAP_INIT(name) } #define DECLARE_RWSEM(name) \ struct rw_semaphore name = __RWSEM_INITIALIZER(name) @@ -93,6 +104,17 @@ static inline void init_rwsem(struct rw_semaphore *sem) INIT_LIST_HEAD(&sem->wait_list); } +extern void __init_rwsem(struct rw_semaphore *sem, const char *name, + struct lock_class_key *key); + +#define init_rwsem(sem) \ +do { \ + static struct lock_class_key __key; \ + \ + __init_rwsem((sem), #sem, &__key); \ +} while (0) + + /* * lock for reading */ @@ -155,7 +177,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) /* * lock for writing */ -static inline void __down_write(struct rw_semaphore *sem) +static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) { signed long old, new, tmp; @@ -181,6 +203,11 @@ static inline void __down_write(struct rw_semaphore *sem) rwsem_down_write_failed(sem); } +static inline void __down_write(struct rw_semaphore *sem) +{ + __down_write_nested(sem, 0); +} + /* * trylock for writing -- returns 1 if successful, 0 if contention */ diff --git a/include/asm-s390/semaphore.h b/include/asm-s390/semaphore.h index 702cf436698c..32cdc69f39f4 100644 --- a/include/asm-s390/semaphore.h +++ b/include/asm-s390/semaphore.h @@ -37,7 +37,8 @@ struct semaphore { static inline void sema_init (struct semaphore *sem, int val) { - *sem = (struct semaphore) __SEMAPHORE_INITIALIZER((*sem),val); + atomic_set(&sem->count, val); + init_waitqueue_head(&sem->wait); } static inline void init_MUTEX (struct semaphore *sem) diff --git a/include/asm-s390/system.h b/include/asm-s390/system.h index 71a0732cd518..9ab186ffde23 100644 --- a/include/asm-s390/system.h +++ b/include/asm-s390/system.h @@ -301,34 +301,6 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size) #define set_mb(var, value) do { var = value; mb(); } while (0) #define set_wmb(var, value) do { var = value; wmb(); } while (0) -/* interrupt control.. */ -#define local_irq_enable() ({ \ - unsigned long __dummy; \ - __asm__ __volatile__ ( \ - "stosm 0(%1),0x03" \ - : "=m" (__dummy) : "a" (&__dummy) : "memory" ); \ - }) - -#define local_irq_disable() ({ \ - unsigned long __flags; \ - __asm__ __volatile__ ( \ - "stnsm 0(%1),0xfc" : "=m" (__flags) : "a" (&__flags) ); \ - __flags; \ - }) - -#define local_save_flags(x) \ - __asm__ __volatile__("stosm 0(%1),0" : "=m" (x) : "a" (&x), "m" (x) ) - -#define local_irq_restore(x) \ - __asm__ __volatile__("ssm 0(%0)" : : "a" (&x), "m" (x) : "memory") - -#define irqs_disabled() \ -({ \ - unsigned long flags; \ - local_save_flags(flags); \ - !((flags >> __FLAG_SHIFT) & 3); \ -}) - #ifdef __s390x__ #define __ctl_load(array, low, high) ({ \ @@ -442,8 +414,7 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size) }) #endif /* __s390x__ */ -/* For spinlocks etc */ -#define local_irq_save(x) ((x) = local_irq_disable()) +#include <linux/irqflags.h> /* * Use to set psw mask except for the first byte which @@ -482,4 +453,3 @@ extern void (*_machine_power_off)(void); #endif /* __KERNEL__ */ #endif - diff --git a/include/asm-sh/rwsem.h b/include/asm-sh/rwsem.h index 0262d3d1e5e0..9d2aea5e8488 100644 --- a/include/asm-sh/rwsem.h +++ b/include/asm-sh/rwsem.h @@ -25,24 +25,11 @@ struct rw_semaphore { #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) spinlock_t wait_lock; struct list_head wait_list; -#if RWSEM_DEBUG - int debug; -#endif }; -/* - * initialisation - */ -#if RWSEM_DEBUG -#define __RWSEM_DEBUG_INIT , 0 -#else -#define __RWSEM_DEBUG_INIT /* */ -#endif - #define __RWSEM_INITIALIZER(name) \ { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ - LIST_HEAD_INIT((name).wait_list) \ - __RWSEM_DEBUG_INIT } + LIST_HEAD_INIT((name).wait_list) } #define DECLARE_RWSEM(name) \ struct rw_semaphore name = __RWSEM_INITIALIZER(name) @@ -57,9 +44,6 @@ static inline void init_rwsem(struct rw_semaphore *sem) sem->count = RWSEM_UNLOCKED_VALUE; spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); -#if RWSEM_DEBUG - sem->debug = 0; -#endif } /* diff --git a/include/asm-sh/system.h b/include/asm-sh/system.h index b752e5cbb830..ce2e60664a86 100644 --- a/include/asm-sh/system.h +++ b/include/asm-sh/system.h @@ -12,7 +12,7 @@ */ #define switch_to(prev, next, last) do { \ - task_t *__last; \ + struct task_struct *__last; \ register unsigned long *__ts1 __asm__ ("r1") = &prev->thread.sp; \ register unsigned long *__ts2 __asm__ ("r2") = &prev->thread.pc; \ register unsigned long *__ts4 __asm__ ("r4") = (unsigned long *)prev; \ diff --git a/include/asm-sparc64/percpu.h b/include/asm-sparc64/percpu.h index a6ece06b83db..ced8cbde046d 100644 --- a/include/asm-sparc64/percpu.h +++ b/include/asm-sparc64/percpu.h @@ -11,6 +11,7 @@ extern unsigned long __per_cpu_base; extern unsigned long __per_cpu_shift; #define __per_cpu_offset(__cpu) \ (__per_cpu_base + ((unsigned long)(__cpu) << __per_cpu_shift)) +#define per_cpu_offset(x) (__per_cpu_offset(x)) /* Separate out the type, so (int[3], foo) works. */ #define DEFINE_PER_CPU(type, name) \ diff --git a/include/asm-x86_64/irqflags.h b/include/asm-x86_64/irqflags.h new file mode 100644 index 000000000000..cce6937e87c0 --- /dev/null +++ b/include/asm-x86_64/irqflags.h @@ -0,0 +1,141 @@ +/* + * include/asm-x86_64/irqflags.h + * + * IRQ flags handling + * + * This file gets included from lowlevel asm headers too, to provide + * wrapped versions of the local_irq_*() APIs, based on the + * raw_local_irq_*() functions from the lowlevel headers. + */ +#ifndef _ASM_IRQFLAGS_H +#define _ASM_IRQFLAGS_H + +#ifndef __ASSEMBLY__ +/* + * Interrupt control: + */ + +static inline unsigned long __raw_local_save_flags(void) +{ + unsigned long flags; + + __asm__ __volatile__( + "# __raw_save_flags\n\t" + "pushfq ; popq %q0" + : "=g" (flags) + : /* no input */ + : "memory" + ); + + return flags; +} + +#define raw_local_save_flags(flags) \ + do { (flags) = __raw_local_save_flags(); } while (0) + +static inline void raw_local_irq_restore(unsigned long flags) +{ + __asm__ __volatile__( + "pushq %0 ; popfq" + : /* no output */ + :"g" (flags) + :"memory", "cc" + ); +} + +#ifdef CONFIG_X86_VSMP + +/* + * Interrupt control for the VSMP architecture: + */ + +static inline void raw_local_irq_disable(void) +{ + unsigned long flags = __raw_local_save_flags(); + + raw_local_irq_restore((flags & ~(1 << 9)) | (1 << 18)); +} + +static inline void raw_local_irq_enable(void) +{ + unsigned long flags = __raw_local_save_flags(); + + raw_local_irq_restore((flags | (1 << 9)) & ~(1 << 18)); +} + +static inline int raw_irqs_disabled_flags(unsigned long flags) +{ + return !(flags & (1<<9)) || (flags & (1 << 18)); +} + +#else /* CONFIG_X86_VSMP */ + +static inline void raw_local_irq_disable(void) +{ + __asm__ __volatile__("cli" : : : "memory"); +} + +static inline void raw_local_irq_enable(void) +{ + __asm__ __volatile__("sti" : : : "memory"); +} + +static inline int raw_irqs_disabled_flags(unsigned long flags) +{ + return !(flags & (1 << 9)); +} + +#endif + +/* + * For spinlocks, etc.: + */ + +static inline unsigned long __raw_local_irq_save(void) +{ + unsigned long flags = __raw_local_save_flags(); + + raw_local_irq_disable(); + + return flags; +} + +#define raw_local_irq_save(flags) \ + do { (flags) = __raw_local_irq_save(); } while (0) + +static inline int raw_irqs_disabled(void) +{ + unsigned long flags = __raw_local_save_flags(); + + return raw_irqs_disabled_flags(flags); +} + +/* + * Used in the idle loop; sti takes one instruction cycle + * to complete: + */ +static inline void raw_safe_halt(void) +{ + __asm__ __volatile__("sti; hlt" : : : "memory"); +} + +/* + * Used when interrupts are already enabled or to + * shutdown the processor: + */ +static inline void halt(void) +{ + __asm__ __volatile__("hlt": : :"memory"); +} + +#else /* __ASSEMBLY__: */ +# ifdef CONFIG_TRACE_IRQFLAGS +# define TRACE_IRQS_ON call trace_hardirqs_on_thunk +# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk +# else +# define TRACE_IRQS_ON +# define TRACE_IRQS_OFF +# endif +#endif + +#endif diff --git a/include/asm-x86_64/kdebug.h b/include/asm-x86_64/kdebug.h index cd52c7f33bca..2b0c088e2957 100644 --- a/include/asm-x86_64/kdebug.h +++ b/include/asm-x86_64/kdebug.h @@ -49,7 +49,7 @@ static inline int notify_die(enum die_val val, const char *str, return atomic_notifier_call_chain(&die_chain, val, &args); } -extern int printk_address(unsigned long address); +extern void printk_address(unsigned long address); extern void die(const char *,struct pt_regs *,long); extern void __die(const char *,struct pt_regs *,long); extern void show_registers(struct pt_regs *regs); diff --git a/include/asm-x86_64/percpu.h b/include/asm-x86_64/percpu.h index 549eb929b2c0..08dd9f9dda81 100644 --- a/include/asm-x86_64/percpu.h +++ b/include/asm-x86_64/percpu.h @@ -14,6 +14,8 @@ #define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset) #define __my_cpu_offset() read_pda(data_offset) +#define per_cpu_offset(x) (__per_cpu_offset(x)) + /* Separate out the type, so (int[3], foo) works. */ #define DEFINE_PER_CPU(type, name) \ __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name diff --git a/include/asm-x86_64/system.h b/include/asm-x86_64/system.h index 68e559f3631c..f67f2873a922 100644 --- a/include/asm-x86_64/system.h +++ b/include/asm-x86_64/system.h @@ -244,43 +244,7 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, #define warn_if_not_ulong(x) do { unsigned long foo; (void) (&(x) == &foo); } while (0) -/* interrupt control.. */ -#define local_save_flags(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# save_flags \n\t pushfq ; popq %q0":"=g" (x): /* no input */ :"memory"); } while (0) -#define local_irq_restore(x) __asm__ __volatile__("# restore_flags \n\t pushq %0 ; popfq": /* no output */ :"g" (x):"memory", "cc") - -#ifdef CONFIG_X86_VSMP -/* Interrupt control for VSMP architecture */ -#define local_irq_disable() do { unsigned long flags; local_save_flags(flags); local_irq_restore((flags & ~(1 << 9)) | (1 << 18)); } while (0) -#define local_irq_enable() do { unsigned long flags; local_save_flags(flags); local_irq_restore((flags | (1 << 9)) & ~(1 << 18)); } while (0) - -#define irqs_disabled() \ -({ \ - unsigned long flags; \ - local_save_flags(flags); \ - (flags & (1<<18)) || !(flags & (1<<9)); \ -}) - -/* For spinlocks etc */ -#define local_irq_save(x) do { local_save_flags(x); local_irq_restore((x & ~(1 << 9)) | (1 << 18)); } while (0) -#else /* CONFIG_X86_VSMP */ -#define local_irq_disable() __asm__ __volatile__("cli": : :"memory") -#define local_irq_enable() __asm__ __volatile__("sti": : :"memory") - -#define irqs_disabled() \ -({ \ - unsigned long flags; \ - local_save_flags(flags); \ - !(flags & (1<<9)); \ -}) - -/* For spinlocks etc */ -#define local_irq_save(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# local_irq_save \n\t pushfq ; popq %0 ; cli":"=g" (x): /* no input */ :"memory"); } while (0) -#endif - -/* used in the idle loop; sti takes one instruction cycle to complete */ -#define safe_halt() __asm__ __volatile__("sti; hlt": : :"memory") -/* used when interrupts are already enabled or to shutdown the processor */ -#define halt() __asm__ __volatile__("hlt": : :"memory") +#include <linux/irqflags.h> void cpu_idle_wait(void); diff --git a/include/asm-xtensa/rwsem.h b/include/asm-xtensa/rwsem.h index abcd86dc5ab9..0aad3a587551 100644 --- a/include/asm-xtensa/rwsem.h +++ b/include/asm-xtensa/rwsem.h @@ -31,24 +31,11 @@ struct rw_semaphore { #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) spinlock_t wait_lock; struct list_head wait_list; -#if RWSEM_DEBUG - int debug; -#endif }; -/* - * initialisation - */ -#if RWSEM_DEBUG -#define __RWSEM_DEBUG_INIT , 0 -#else -#define __RWSEM_DEBUG_INIT /* */ -#endif - #define __RWSEM_INITIALIZER(name) \ { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ - LIST_HEAD_INIT((name).wait_list) \ - __RWSEM_DEBUG_INIT } + LIST_HEAD_INIT((name).wait_list) } #define DECLARE_RWSEM(name) \ struct rw_semaphore name = __RWSEM_INITIALIZER(name) @@ -63,9 +50,6 @@ static inline void init_rwsem(struct rw_semaphore *sem) sem->count = RWSEM_UNLOCKED_VALUE; spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); -#if RWSEM_DEBUG - sem->debug = 0; -#endif } /* diff --git a/include/linux/completion.h b/include/linux/completion.h index 90663ad217f9..251c41e3ddd5 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -21,6 +21,18 @@ struct completion { #define DECLARE_COMPLETION(work) \ struct completion work = COMPLETION_INITIALIZER(work) +/* + * Lockdep needs to run a non-constant initializer for on-stack + * completions - so we use the _ONSTACK() variant for those that + * are on the kernel stack: + */ +#ifdef CONFIG_LOCKDEP +# define DECLARE_COMPLETION_ONSTACK(work) \ + struct completion work = ({ init_completion(&work); work; }) +#else +# define DECLARE_COMPLETION_ONSTACK(work) DECLARE_COMPLETION(work) +#endif + static inline void init_completion(struct completion *x) { x->done = 0; diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 0dd1610a94a9..471781ffeab1 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -114,6 +114,18 @@ struct dentry { unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */ }; +/* + * dentry->d_lock spinlock nesting subclasses: + * + * 0: normal + * 1: nested + */ +enum dentry_d_lock_class +{ + DENTRY_D_LOCK_NORMAL, /* implicitly used by plain spin_lock() APIs. */ + DENTRY_D_LOCK_NESTED +}; + struct dentry_operations { int (*d_revalidate)(struct dentry *, struct nameidata *); int (*d_hash) (struct dentry *, struct qstr *); diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h new file mode 100644 index 000000000000..6a7047851e48 --- /dev/null +++ b/include/linux/debug_locks.h @@ -0,0 +1,69 @@ +#ifndef __LINUX_DEBUG_LOCKING_H +#define __LINUX_DEBUG_LOCKING_H + +extern int debug_locks; +extern int debug_locks_silent; + +/* + * Generic 'turn off all lock debugging' function: + */ +extern int debug_locks_off(void); + +/* + * In the debug case we carry the caller's instruction pointer into + * other functions, but we dont want the function argument overhead + * in the nondebug case - hence these macros: + */ +#define _RET_IP_ (unsigned long)__builtin_return_address(0) +#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) + +#define DEBUG_LOCKS_WARN_ON(c) \ +({ \ + int __ret = 0; \ + \ + if (unlikely(c)) { \ + if (debug_locks_off()) \ + WARN_ON(1); \ + __ret = 1; \ + } \ + __ret; \ +}) + +#ifdef CONFIG_SMP +# define SMP_DEBUG_LOCKS_WARN_ON(c) DEBUG_LOCKS_WARN_ON(c) +#else +# define SMP_DEBUG_LOCKS_WARN_ON(c) do { } while (0) +#endif + +#ifdef CONFIG_DEBUG_LOCKING_API_SELFTESTS + extern void locking_selftest(void); +#else +# define locking_selftest() do { } while (0) +#endif + +#ifdef CONFIG_LOCKDEP +extern void debug_show_all_locks(void); +extern void debug_show_held_locks(struct task_struct *task); +extern void debug_check_no_locks_freed(const void *from, unsigned long len); +extern void debug_check_no_locks_held(struct task_struct *task); +#else +static inline void debug_show_all_locks(void) +{ +} + +static inline void debug_show_held_locks(struct task_struct *task) +{ +} + +static inline void +debug_check_no_locks_freed(const void *from, unsigned long len) +{ +} + +static inline void +debug_check_no_locks_held(struct task_struct *task) +{ +} +#endif + +#endif diff --git a/include/linux/fs.h b/include/linux/fs.h index e04a5cfe874f..134b32068246 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -436,6 +436,21 @@ struct block_device { }; /* + * bdev->bd_mutex nesting subclasses for the lock validator: + * + * 0: normal + * 1: 'whole' + * 2: 'partition' + */ +enum bdev_bd_mutex_lock_class +{ + BD_MUTEX_NORMAL, + BD_MUTEX_WHOLE, + BD_MUTEX_PARTITION +}; + + +/* * Radix-tree tags, for tagging dirty and writeback pages within the pagecache * radix trees */ @@ -543,6 +558,25 @@ struct inode { }; /* + * inode->i_mutex nesting subclasses for the lock validator: + * + * 0: the object of the current VFS operation + * 1: parent + * 2: child/target + * 3: quota file + * + * The locking order between these classes is + * parent -> child -> normal -> quota + */ +enum inode_i_mutex_lock_class +{ + I_MUTEX_NORMAL, + I_MUTEX_PARENT, + I_MUTEX_CHILD, + I_MUTEX_QUOTA +}; + +/* * NOTE: in a 32bit arch with a preemptable kernel and * an UP compile the i_size_read/write must be atomic * with respect to the local cpu (unlike with preempt disabled), @@ -1276,6 +1310,8 @@ struct file_system_type { struct module *owner; struct file_system_type * next; struct list_head fs_supers; + struct lock_class_key s_lock_key; + struct lock_class_key s_umount_key; }; extern int get_sb_bdev(struct file_system_type *fs_type, @@ -1404,6 +1440,7 @@ extern void bd_set_size(struct block_device *, loff_t size); extern void bd_forget(struct inode *inode); extern void bdput(struct block_device *); extern struct block_device *open_by_devnum(dev_t, unsigned); +extern struct block_device *open_partition_by_devnum(dev_t, unsigned); extern const struct file_operations def_blk_fops; extern const struct address_space_operations def_blk_aops; extern const struct file_operations def_chr_fops; @@ -1414,6 +1451,7 @@ extern int blkdev_ioctl(struct inode *, struct file *, unsigned, unsigned long); extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); extern int blkdev_get(struct block_device *, mode_t, unsigned); extern int blkdev_put(struct block_device *); +extern int blkdev_put_partition(struct block_device *); extern int bd_claim(struct block_device *, void *); extern void bd_release(struct block_device *); #ifdef CONFIG_SYSFS diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 114ae583cca9..50d8b5744cf6 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -3,6 +3,7 @@ #include <linux/preempt.h> #include <linux/smp_lock.h> +#include <linux/lockdep.h> #include <asm/hardirq.h> #include <asm/system.h> @@ -86,9 +87,6 @@ extern void synchronize_irq(unsigned int irq); # define synchronize_irq(irq) barrier() #endif -#define nmi_enter() irq_enter() -#define nmi_exit() sub_preempt_count(HARDIRQ_OFFSET) - struct task_struct; #ifndef CONFIG_VIRT_CPU_ACCOUNTING @@ -97,12 +95,35 @@ static inline void account_system_vtime(struct task_struct *tsk) } #endif +/* + * It is safe to do non-atomic ops on ->hardirq_context, + * because NMI handlers may not preempt and the ops are + * always balanced, so the interrupted value of ->hardirq_context + * will always be restored. + */ #define irq_enter() \ do { \ account_system_vtime(current); \ add_preempt_count(HARDIRQ_OFFSET); \ + trace_hardirq_enter(); \ + } while (0) + +/* + * Exit irq context without processing softirqs: + */ +#define __irq_exit() \ + do { \ + trace_hardirq_exit(); \ + account_system_vtime(current); \ + sub_preempt_count(HARDIRQ_OFFSET); \ } while (0) +/* + * Exit irq context and process softirqs if needed: + */ extern void irq_exit(void); +#define nmi_enter() do { lockdep_off(); irq_enter(); } while (0) +#define nmi_exit() do { __irq_exit(); lockdep_on(); } while (0) + #endif /* LINUX_HARDIRQ_H */ diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 07d7305f131e..e4bccbcc2750 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -91,6 +91,7 @@ struct hrtimer_base { ktime_t (*get_softirq_time)(void); struct hrtimer *curr_timer; ktime_t softirq_time; + struct lock_class_key lock_key; }; /* diff --git a/include/linux/ide.h b/include/linux/ide.h index 285316c836b5..dc7abef10965 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1359,7 +1359,7 @@ extern struct semaphore ide_cfg_sem; * ide_drive_t->hwif: constant, no locking */ -#define local_irq_set(flags) do { local_save_flags((flags)); local_irq_enable(); } while (0) +#define local_irq_set(flags) do { local_save_flags((flags)); local_irq_enable_in_hardirq(); } while (0) extern struct bus_type ide_bus_type; diff --git a/include/linux/idr.h b/include/linux/idr.h index f559a719dbe8..826803449db7 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -66,7 +66,7 @@ struct idr { .id_free = NULL, \ .layers = 0, \ .id_free_cnt = 0, \ - .lock = SPIN_LOCK_UNLOCKED, \ + .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ } #define DEFINE_IDR(name) struct idr name = IDR_INIT(name) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 3a256957fb56..60aac2cea0cf 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -3,6 +3,8 @@ #include <linux/file.h> #include <linux/rcupdate.h> +#include <linux/irqflags.h> +#include <linux/lockdep.h> #define INIT_FDTABLE \ { \ @@ -21,7 +23,7 @@ .count = ATOMIC_INIT(1), \ .fdt = &init_files.fdtab, \ .fdtab = INIT_FDTABLE, \ - .file_lock = SPIN_LOCK_UNLOCKED, \ + .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), \ .next_fd = 0, \ .close_on_exec_init = { { 0, } }, \ .open_fds_init = { { 0, } }, \ @@ -36,7 +38,7 @@ .user_id = 0, \ .next = NULL, \ .wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.wait), \ - .ctx_lock = SPIN_LOCK_UNLOCKED, \ + .ctx_lock = __SPIN_LOCK_UNLOCKED(name.ctx_lock), \ .reqs_active = 0U, \ .max_reqs = ~0U, \ } @@ -48,7 +50,7 @@ .mm_users = ATOMIC_INIT(2), \ .mm_count = ATOMIC_INIT(1), \ .mmap_sem = __RWSEM_INITIALIZER(name.mmap_sem), \ - .page_table_lock = SPIN_LOCK_UNLOCKED, \ + .page_table_lock = __SPIN_LOCK_UNLOCKED(name.page_table_lock), \ .mmlist = LIST_HEAD_INIT(name.mmlist), \ .cpu_vm_mask = CPU_MASK_ALL, \ } @@ -69,7 +71,7 @@ #define INIT_SIGHAND(sighand) { \ .count = ATOMIC_INIT(1), \ .action = { { { .sa_handler = NULL, } }, }, \ - .siglock = SPIN_LOCK_UNLOCKED, \ + .siglock = __SPIN_LOCK_UNLOCKED(sighand.siglock), \ } extern struct group_info init_groups; @@ -119,12 +121,13 @@ extern struct group_info init_groups; .list = LIST_HEAD_INIT(tsk.pending.list), \ .signal = {{0}}}, \ .blocked = {{0}}, \ - .alloc_lock = SPIN_LOCK_UNLOCKED, \ + .alloc_lock = __SPIN_LOCK_UNLOCKED(tsk.alloc_lock), \ .journal_info = NULL, \ .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ .fs_excl = ATOMIC_INIT(0), \ .pi_lock = SPIN_LOCK_UNLOCKED, \ - INIT_RT_MUTEXES(tsk) \ + INIT_TRACE_IRQFLAGS \ + INIT_LOCKDEP \ } diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index da3e0dbe61d4..d5afee95fd43 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -10,6 +10,7 @@ #include <linux/irqreturn.h> #include <linux/hardirq.h> #include <linux/sched.h> +#include <linux/irqflags.h> #include <asm/atomic.h> #include <asm/ptrace.h> #include <asm/system.h> @@ -80,12 +81,64 @@ extern int request_irq(unsigned int, unsigned long, const char *, void *); extern void free_irq(unsigned int, void *); +/* + * On lockdep we dont want to enable hardirqs in hardirq + * context. Use local_irq_enable_in_hardirq() to annotate + * kernel code that has to do this nevertheless (pretty much + * the only valid case is for old/broken hardware that is + * insanely slow). + * + * NOTE: in theory this might break fragile code that relies + * on hardirq delivery - in practice we dont seem to have such + * places left. So the only effect should be slightly increased + * irqs-off latencies. + */ +#ifdef CONFIG_LOCKDEP +# define local_irq_enable_in_hardirq() do { } while (0) +#else +# define local_irq_enable_in_hardirq() local_irq_enable() +#endif #ifdef CONFIG_GENERIC_HARDIRQS extern void disable_irq_nosync(unsigned int irq); extern void disable_irq(unsigned int irq); extern void enable_irq(unsigned int irq); +/* + * Special lockdep variants of irq disabling/enabling. + * These should be used for locking constructs that + * know that a particular irq context which is disabled, + * and which is the only irq-context user of a lock, + * that it's safe to take the lock in the irq-disabled + * section without disabling hardirqs. + * + * On !CONFIG_LOCKDEP they are equivalent to the normal + * irq disable/enable methods. + */ +static inline void disable_irq_nosync_lockdep(unsigned int irq) +{ + disable_irq_nosync(irq); +#ifdef CONFIG_LOCKDEP + local_irq_disable(); +#endif +} + +static inline void disable_irq_lockdep(unsigned int irq) +{ + disable_irq(irq); +#ifdef CONFIG_LOCKDEP + local_irq_disable(); +#endif +} + +static inline void enable_irq_lockdep(unsigned int irq) +{ +#ifdef CONFIG_LOCKDEP + local_irq_enable(); +#endif + enable_irq(irq); +} + /* IRQ wakeup (PM) control: */ extern int set_irq_wake(unsigned int irq, unsigned int on); @@ -99,7 +152,19 @@ static inline int disable_irq_wake(unsigned int irq) return set_irq_wake(irq, 0); } -#endif +#else /* !CONFIG_GENERIC_HARDIRQS */ +/* + * NOTE: non-genirq architectures, if they want to support the lock + * validator need to define the methods below in their asm/irq.h + * files, under an #ifdef CONFIG_LOCKDEP section. + */ +# ifndef CONFIG_LOCKDEP +# define disable_irq_nosync_lockdep(irq) disable_irq_nosync(irq) +# define disable_irq_lockdep(irq) disable_irq(irq) +# define enable_irq_lockdep(irq) enable_irq(irq) +# endif + +#endif /* CONFIG_GENERIC_HARDIRQS */ #ifndef __ARCH_SET_SOFTIRQ_PENDING #define set_softirq_pending(x) (local_softirq_pending() = (x)) @@ -135,13 +200,11 @@ static inline void __deprecated save_and_cli(unsigned long *x) #define save_and_cli(x) save_and_cli(&x) #endif /* CONFIG_SMP */ -/* SoftIRQ primitives. */ -#define local_bh_disable() \ - do { add_preempt_count(SOFTIRQ_OFFSET); barrier(); } while (0) -#define __local_bh_enable() \ - do { barrier(); sub_preempt_count(SOFTIRQ_OFFSET); } while (0) - +extern void local_bh_disable(void); +extern void __local_bh_enable(void); +extern void _local_bh_enable(void); extern void local_bh_enable(void); +extern void local_bh_enable_ip(unsigned long ip); /* PLEASE, avoid to allocate new softirqs, if you need not _really_ high frequency threaded job scheduling. For almost all the purposes diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 87a9fc039b47..5612dfeeae50 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -55,6 +55,7 @@ struct resource_list { #define IORESOURCE_IRQ_LOWEDGE (1<<1) #define IORESOURCE_IRQ_HIGHLEVEL (1<<2) #define IORESOURCE_IRQ_LOWLEVEL (1<<3) +#define IORESOURCE_IRQ_SHAREABLE (1<<4) /* ISA PnP DMA specific bits (IORESOURCE_BITS) */ #define IORESOURCE_DMA_TYPE_MASK (3<<0) diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h new file mode 100644 index 000000000000..412e025bc5c7 --- /dev/null +++ b/include/linux/irqflags.h @@ -0,0 +1,96 @@ +/* + * include/linux/irqflags.h + * + * IRQ flags tracing: follow the state of the hardirq and softirq flags and + * provide callbacks for transitions between ON and OFF states. + * + * This file gets included from lowlevel asm headers too, to provide + * wrapped versions of the local_irq_*() APIs, based on the + * raw_local_irq_*() macros from the lowlevel headers. + */ +#ifndef _LINUX_TRACE_IRQFLAGS_H +#define _LINUX_TRACE_IRQFLAGS_H + +#ifdef CONFIG_TRACE_IRQFLAGS + extern void trace_hardirqs_on(void); + extern void trace_hardirqs_off(void); + extern void trace_softirqs_on(unsigned long ip); + extern void trace_softirqs_off(unsigned long ip); +# define trace_hardirq_context(p) ((p)->hardirq_context) +# define trace_softirq_context(p) ((p)->softirq_context) +# define trace_hardirqs_enabled(p) ((p)->hardirqs_enabled) +# define trace_softirqs_enabled(p) ((p)->softirqs_enabled) +# define trace_hardirq_enter() do { current->hardirq_context++; } while (0) +# define trace_hardirq_exit() do { current->hardirq_context--; } while (0) +# define trace_softirq_enter() do { current->softirq_context++; } while (0) +# define trace_softirq_exit() do { current->softirq_context--; } while (0) +# define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1, +#else +# define trace_hardirqs_on() do { } while (0) +# define trace_hardirqs_off() do { } while (0) +# define trace_softirqs_on(ip) do { } while (0) +# define trace_softirqs_off(ip) do { } while (0) +# define trace_hardirq_context(p) 0 +# define trace_softirq_context(p) 0 +# define trace_hardirqs_enabled(p) 0 +# define trace_softirqs_enabled(p) 0 +# define trace_hardirq_enter() do { } while (0) +# define trace_hardirq_exit() do { } while (0) +# define trace_softirq_enter() do { } while (0) +# define trace_softirq_exit() do { } while (0) +# define INIT_TRACE_IRQFLAGS +#endif + +#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT + +#include <asm/irqflags.h> + +#define local_irq_enable() \ + do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0) +#define local_irq_disable() \ + do { raw_local_irq_disable(); trace_hardirqs_off(); } while (0) +#define local_irq_save(flags) \ + do { raw_local_irq_save(flags); trace_hardirqs_off(); } while (0) + +#define local_irq_restore(flags) \ + do { \ + if (raw_irqs_disabled_flags(flags)) { \ + raw_local_irq_restore(flags); \ + trace_hardirqs_off(); \ + } else { \ + trace_hardirqs_on(); \ + raw_local_irq_restore(flags); \ + } \ + } while (0) +#else /* !CONFIG_TRACE_IRQFLAGS_SUPPORT */ +/* + * The local_irq_*() APIs are equal to the raw_local_irq*() + * if !TRACE_IRQFLAGS. + */ +# define raw_local_irq_disable() local_irq_disable() +# define raw_local_irq_enable() local_irq_enable() +# define raw_local_irq_save(flags) local_irq_save(flags) +# define raw_local_irq_restore(flags) local_irq_restore(flags) +#endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */ + +#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT +#define safe_halt() \ + do { \ + trace_hardirqs_on(); \ + raw_safe_halt(); \ + } while (0) + +#define local_save_flags(flags) raw_local_save_flags(flags) + +#define irqs_disabled() \ +({ \ + unsigned long flags; \ + \ + raw_local_save_flags(flags); \ + raw_irqs_disabled_flags(flags); \ +}) + +#define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags) +#endif /* CONFIG_X86 */ + +#endif diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 54e2549f96ba..849043ce4ed6 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -57,10 +57,25 @@ do { \ #define print_fn_descriptor_symbol(fmt, addr) print_symbol(fmt, addr) #endif -#define print_symbol(fmt, addr) \ -do { \ - __check_printsym_format(fmt, ""); \ - __print_symbol(fmt, addr); \ +static inline void print_symbol(const char *fmt, unsigned long addr) +{ + __check_printsym_format(fmt, ""); + __print_symbol(fmt, (unsigned long) + __builtin_extract_return_addr((void *)addr)); +} + +#ifndef CONFIG_64BIT +#define print_ip_sym(ip) \ +do { \ + printk("[<%08lx>]", ip); \ + print_symbol(" %s\n", ip); \ } while(0) +#else +#define print_ip_sym(ip) \ +do { \ + printk("[<%016lx>]", ip); \ + print_symbol(" %s\n", ip); \ +} while(0) +#endif #endif /*_LINUX_KALLSYMS_H*/ diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h new file mode 100644 index 000000000000..316e0fb8d7b1 --- /dev/null +++ b/include/linux/lockdep.h @@ -0,0 +1,353 @@ +/* + * Runtime locking correctness validator + * + * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + * + * see Documentation/lockdep-design.txt for more details. + */ +#ifndef __LINUX_LOCKDEP_H +#define __LINUX_LOCKDEP_H + +#include <linux/linkage.h> +#include <linux/list.h> +#include <linux/debug_locks.h> +#include <linux/stacktrace.h> + +#ifdef CONFIG_LOCKDEP + +/* + * Lock-class usage-state bits: + */ +enum lock_usage_bit +{ + LOCK_USED = 0, + LOCK_USED_IN_HARDIRQ, + LOCK_USED_IN_SOFTIRQ, + LOCK_ENABLED_SOFTIRQS, + LOCK_ENABLED_HARDIRQS, + LOCK_USED_IN_HARDIRQ_READ, + LOCK_USED_IN_SOFTIRQ_READ, + LOCK_ENABLED_SOFTIRQS_READ, + LOCK_ENABLED_HARDIRQS_READ, + LOCK_USAGE_STATES +}; + +/* + * Usage-state bitmasks: + */ +#define LOCKF_USED (1 << LOCK_USED) +#define LOCKF_USED_IN_HARDIRQ (1 << LOCK_USED_IN_HARDIRQ) +#define LOCKF_USED_IN_SOFTIRQ (1 << LOCK_USED_IN_SOFTIRQ) +#define LOCKF_ENABLED_HARDIRQS (1 << LOCK_ENABLED_HARDIRQS) +#define LOCKF_ENABLED_SOFTIRQS (1 << LOCK_ENABLED_SOFTIRQS) + +#define LOCKF_ENABLED_IRQS (LOCKF_ENABLED_HARDIRQS | LOCKF_ENABLED_SOFTIRQS) +#define LOCKF_USED_IN_IRQ (LOCKF_USED_IN_HARDIRQ | LOCKF_USED_IN_SOFTIRQ) + +#define LOCKF_USED_IN_HARDIRQ_READ (1 << LOCK_USED_IN_HARDIRQ_READ) +#define LOCKF_USED_IN_SOFTIRQ_READ (1 << LOCK_USED_IN_SOFTIRQ_READ) +#define LOCKF_ENABLED_HARDIRQS_READ (1 << LOCK_ENABLED_HARDIRQS_READ) +#define LOCKF_ENABLED_SOFTIRQS_READ (1 << LOCK_ENABLED_SOFTIRQS_READ) + +#define LOCKF_ENABLED_IRQS_READ \ + (LOCKF_ENABLED_HARDIRQS_READ | LOCKF_ENABLED_SOFTIRQS_READ) +#define LOCKF_USED_IN_IRQ_READ \ + (LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ) + +#define MAX_LOCKDEP_SUBCLASSES 8UL + +/* + * Lock-classes are keyed via unique addresses, by embedding the + * lockclass-key into the kernel (or module) .data section. (For + * static locks we use the lock address itself as the key.) + */ +struct lockdep_subclass_key { + char __one_byte; +} __attribute__ ((__packed__)); + +struct lock_class_key { + struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES]; +}; + +/* + * The lock-class itself: + */ +struct lock_class { + /* + * class-hash: + */ + struct list_head hash_entry; + + /* + * global list of all lock-classes: + */ + struct list_head lock_entry; + + struct lockdep_subclass_key *key; + unsigned int subclass; + + /* + * IRQ/softirq usage tracking bits: + */ + unsigned long usage_mask; + struct stack_trace usage_traces[LOCK_USAGE_STATES]; + + /* + * These fields represent a directed graph of lock dependencies, + * to every node we attach a list of "forward" and a list of + * "backward" graph nodes. + */ + struct list_head locks_after, locks_before; + + /* + * Generation counter, when doing certain classes of graph walking, + * to ensure that we check one node only once: + */ + unsigned int version; + + /* + * Statistics counter: + */ + unsigned long ops; + + const char *name; + int name_version; +}; + +/* + * Map the lock object (the lock instance) to the lock-class object. + * This is embedded into specific lock instances: + */ +struct lockdep_map { + struct lock_class_key *key; + struct lock_class *class[MAX_LOCKDEP_SUBCLASSES]; + const char *name; +}; + +/* + * Every lock has a list of other locks that were taken after it. + * We only grow the list, never remove from it: + */ +struct lock_list { + struct list_head entry; + struct lock_class *class; + struct stack_trace trace; +}; + +/* + * We record lock dependency chains, so that we can cache them: + */ +struct lock_chain { + struct list_head entry; + u64 chain_key; +}; + +struct held_lock { + /* + * One-way hash of the dependency chain up to this point. We + * hash the hashes step by step as the dependency chain grows. + * + * We use it for dependency-caching and we skip detection + * passes and dependency-updates if there is a cache-hit, so + * it is absolutely critical for 100% coverage of the validator + * to have a unique key value for every unique dependency path + * that can occur in the system, to make a unique hash value + * as likely as possible - hence the 64-bit width. + * + * The task struct holds the current hash value (initialized + * with zero), here we store the previous hash value: + */ + u64 prev_chain_key; + struct lock_class *class; + unsigned long acquire_ip; + struct lockdep_map *instance; + + /* + * The lock-stack is unified in that the lock chains of interrupt + * contexts nest ontop of process context chains, but we 'separate' + * the hashes by starting with 0 if we cross into an interrupt + * context, and we also keep do not add cross-context lock + * dependencies - the lock usage graph walking covers that area + * anyway, and we'd just unnecessarily increase the number of + * dependencies otherwise. [Note: hardirq and softirq contexts + * are separated from each other too.] + * + * The following field is used to detect when we cross into an + * interrupt context: + */ + int irq_context; + int trylock; + int read; + int check; + int hardirqs_off; +}; + +/* + * Initialization, self-test and debugging-output methods: + */ +extern void lockdep_init(void); +extern void lockdep_info(void); +extern void lockdep_reset(void); +extern void lockdep_reset_lock(struct lockdep_map *lock); +extern void lockdep_free_key_range(void *start, unsigned long size); + +extern void lockdep_off(void); +extern void lockdep_on(void); +extern int lockdep_internal(void); + +/* + * These methods are used by specific locking variants (spinlocks, + * rwlocks, mutexes and rwsems) to pass init/acquire/release events + * to lockdep: + */ + +extern void lockdep_init_map(struct lockdep_map *lock, const char *name, + struct lock_class_key *key); + +/* + * Reinitialize a lock key - for cases where there is special locking or + * special initialization of locks so that the validator gets the scope + * of dependencies wrong: they are either too broad (they need a class-split) + * or they are too narrow (they suffer from a false class-split): + */ +#define lockdep_set_class(lock, key) \ + lockdep_init_map(&(lock)->dep_map, #key, key) +#define lockdep_set_class_and_name(lock, key, name) \ + lockdep_init_map(&(lock)->dep_map, name, key) + +/* + * Acquire a lock. + * + * Values for "read": + * + * 0: exclusive (write) acquire + * 1: read-acquire (no recursion allowed) + * 2: read-acquire with same-instance recursion allowed + * + * Values for check: + * + * 0: disabled + * 1: simple checks (freeing, held-at-exit-time, etc.) + * 2: full validation + */ +extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass, + int trylock, int read, int check, unsigned long ip); + +extern void lock_release(struct lockdep_map *lock, int nested, + unsigned long ip); + +# define INIT_LOCKDEP .lockdep_recursion = 0, + +#else /* !LOCKDEP */ + +static inline void lockdep_off(void) +{ +} + +static inline void lockdep_on(void) +{ +} + +static inline int lockdep_internal(void) +{ + return 0; +} + +# define lock_acquire(l, s, t, r, c, i) do { } while (0) +# define lock_release(l, n, i) do { } while (0) +# define lockdep_init() do { } while (0) +# define lockdep_info() do { } while (0) +# define lockdep_init_map(lock, name, key) do { (void)(key); } while (0) +# define lockdep_set_class(lock, key) do { (void)(key); } while (0) +# define lockdep_set_class_and_name(lock, key, name) \ + do { (void)(key); } while (0) +# define INIT_LOCKDEP +# define lockdep_reset() do { debug_locks = 1; } while (0) +# define lockdep_free_key_range(start, size) do { } while (0) +/* + * The class key takes no space if lockdep is disabled: + */ +struct lock_class_key { }; +#endif /* !LOCKDEP */ + +#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_GENERIC_HARDIRQS) +extern void early_init_irq_lock_class(void); +#else +# define early_init_irq_lock_class() do { } while (0) +#endif + +#ifdef CONFIG_TRACE_IRQFLAGS +extern void early_boot_irqs_off(void); +extern void early_boot_irqs_on(void); +#else +# define early_boot_irqs_off() do { } while (0) +# define early_boot_irqs_on() do { } while (0) +#endif + +/* + * For trivial one-depth nesting of a lock-class, the following + * global define can be used. (Subsystems with multiple levels + * of nesting should define their own lock-nesting subclasses.) + */ +#define SINGLE_DEPTH_NESTING 1 + +/* + * Map the dependency ops to NOP or to real lockdep ops, depending + * on the per lock-class debug mode: + */ + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# ifdef CONFIG_PROVE_LOCKING +# define spin_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i) +# else +# define spin_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i) +# endif +# define spin_release(l, n, i) lock_release(l, n, i) +#else +# define spin_acquire(l, s, t, i) do { } while (0) +# define spin_release(l, n, i) do { } while (0) +#endif + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# ifdef CONFIG_PROVE_LOCKING +# define rwlock_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i) +# define rwlock_acquire_read(l, s, t, i) lock_acquire(l, s, t, 2, 2, i) +# else +# define rwlock_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i) +# define rwlock_acquire_read(l, s, t, i) lock_acquire(l, s, t, 2, 1, i) +# endif +# define rwlock_release(l, n, i) lock_release(l, n, i) +#else +# define rwlock_acquire(l, s, t, i) do { } while (0) +# define rwlock_acquire_read(l, s, t, i) do { } while (0) +# define rwlock_release(l, n, i) do { } while (0) +#endif + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# ifdef CONFIG_PROVE_LOCKING +# define mutex_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i) +# else +# define mutex_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i) +# endif +# define mutex_release(l, n, i) lock_release(l, n, i) +#else +# define mutex_acquire(l, s, t, i) do { } while (0) +# define mutex_release(l, n, i) do { } while (0) +#endif + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# ifdef CONFIG_PROVE_LOCKING +# define rwsem_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i) +# define rwsem_acquire_read(l, s, t, i) lock_acquire(l, s, t, 1, 2, i) +# else +# define rwsem_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i) +# define rwsem_acquire_read(l, s, t, i) lock_acquire(l, s, t, 1, 1, i) +# endif +# define rwsem_release(l, n, i) lock_release(l, n, i) +#else +# define rwsem_acquire(l, s, t, i) do { } while (0) +# define rwsem_acquire_read(l, s, t, i) do { } while (0) +# define rwsem_release(l, n, i) do { } while (0) +#endif + +#endif /* __LINUX_LOCKDEP_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 75179529e399..990957e0929f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -14,6 +14,7 @@ #include <linux/prio_tree.h> #include <linux/fs.h> #include <linux/mutex.h> +#include <linux/debug_locks.h> struct mempolicy; struct anon_vma; @@ -1034,13 +1035,6 @@ static inline void vm_stat_account(struct mm_struct *mm, } #endif /* CONFIG_PROC_FS */ -static inline void -debug_check_no_locks_freed(const void *from, unsigned long len) -{ - mutex_debug_check_no_locks_freed(from, len); - rt_mutex_debug_check_no_locks_freed(from, len); -} - #ifndef CONFIG_DEBUG_PAGEALLOC static inline void kernel_map_pages(struct page *page, int numpages, int enable) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 27e748eb72b0..656b588a9f96 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -150,6 +150,10 @@ struct zone { unsigned long lowmem_reserve[MAX_NR_ZONES]; #ifdef CONFIG_NUMA + /* + * zone reclaim becomes active if more unmapped pages exist. + */ + unsigned long min_unmapped_ratio; struct per_cpu_pageset *pageset[NR_CPUS]; #else struct per_cpu_pageset pageset[NR_CPUS]; @@ -414,6 +418,8 @@ int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); +int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, + struct file *, void __user *, size_t *, loff_t *); #include <linux/topology.h> /* Returns the number of the current Node. */ diff --git a/include/linux/module.h b/include/linux/module.h index 9e9dc7c24d95..d06c74fb8c26 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -358,6 +358,7 @@ static inline int module_is_live(struct module *mod) /* Is this address in a module? (second is with no locks, for oops) */ struct module *module_text_address(unsigned long addr); struct module *__module_text_address(unsigned long addr); +int is_module_address(unsigned long addr); /* Returns module and fills in value, defined and namebuf, or NULL if symnum out of range. */ @@ -496,6 +497,11 @@ static inline struct module *__module_text_address(unsigned long addr) return NULL; } +static inline int is_module_address(unsigned long addr) +{ + return 0; +} + /* Get/put a kernel symbol (calls should be symmetric) */ #define symbol_get(x) ({ extern typeof(x) x __attribute__((weak)); &(x); }) #define symbol_put(x) do { } while(0) diff --git a/include/linux/mutex-debug.h b/include/linux/mutex-debug.h index 8b5769f00467..2537285e1064 100644 --- a/include/linux/mutex-debug.h +++ b/include/linux/mutex-debug.h @@ -2,22 +2,22 @@ #define __LINUX_MUTEX_DEBUG_H #include <linux/linkage.h> +#include <linux/lockdep.h> /* * Mutexes - debugging helpers: */ -#define __DEBUG_MUTEX_INITIALIZER(lockname) \ - , .held_list = LIST_HEAD_INIT(lockname.held_list), \ - .name = #lockname , .magic = &lockname +#define __DEBUG_MUTEX_INITIALIZER(lockname) \ + , .magic = &lockname -#define mutex_init(sem) __mutex_init(sem, __FUNCTION__) +#define mutex_init(mutex) \ +do { \ + static struct lock_class_key __key; \ + \ + __mutex_init((mutex), #mutex, &__key); \ +} while (0) extern void FASTCALL(mutex_destroy(struct mutex *lock)); -extern void mutex_debug_show_all_locks(void); -extern void mutex_debug_show_held_locks(struct task_struct *filter); -extern void mutex_debug_check_no_locks_held(struct task_struct *task); -extern void mutex_debug_check_no_locks_freed(const void *from, unsigned long len); - #endif diff --git a/include/linux/mutex.h b/include/linux/mutex.h index f1ac507fa20d..27c48daa3183 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -13,6 +13,7 @@ #include <linux/list.h> #include <linux/spinlock_types.h> #include <linux/linkage.h> +#include <linux/lockdep.h> #include <asm/atomic.h> @@ -50,11 +51,12 @@ struct mutex { struct list_head wait_list; #ifdef CONFIG_DEBUG_MUTEXES struct thread_info *owner; - struct list_head held_list; - unsigned long acquire_ip; const char *name; void *magic; #endif +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif }; /* @@ -74,24 +76,34 @@ struct mutex_waiter { # include <linux/mutex-debug.h> #else # define __DEBUG_MUTEX_INITIALIZER(lockname) -# define mutex_init(mutex) __mutex_init(mutex, NULL) +# define mutex_init(mutex) \ +do { \ + static struct lock_class_key __key; \ + \ + __mutex_init((mutex), #mutex, &__key); \ +} while (0) # define mutex_destroy(mutex) do { } while (0) -# define mutex_debug_show_all_locks() do { } while (0) -# define mutex_debug_show_held_locks(p) do { } while (0) -# define mutex_debug_check_no_locks_held(task) do { } while (0) -# define mutex_debug_check_no_locks_freed(from, len) do { } while (0) +#endif + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ + , .dep_map = { .name = #lockname } +#else +# define __DEP_MAP_MUTEX_INITIALIZER(lockname) #endif #define __MUTEX_INITIALIZER(lockname) \ { .count = ATOMIC_INIT(1) \ , .wait_lock = SPIN_LOCK_UNLOCKED \ , .wait_list = LIST_HEAD_INIT(lockname.wait_list) \ - __DEBUG_MUTEX_INITIALIZER(lockname) } + __DEBUG_MUTEX_INITIALIZER(lockname) \ + __DEP_MAP_MUTEX_INITIALIZER(lockname) } #define DEFINE_MUTEX(mutexname) \ struct mutex mutexname = __MUTEX_INITIALIZER(mutexname) -extern void fastcall __mutex_init(struct mutex *lock, const char *name); +extern void __mutex_init(struct mutex *lock, const char *name, + struct lock_class_key *key); /*** * mutex_is_locked - is the mutex locked @@ -110,6 +122,13 @@ static inline int fastcall mutex_is_locked(struct mutex *lock) */ extern void fastcall mutex_lock(struct mutex *lock); extern int fastcall mutex_lock_interruptible(struct mutex *lock); + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass); +#else +# define mutex_lock_nested(lock, subclass) mutex_lock(lock) +#endif + /* * NOTE: mutex_trylock() follows the spin_trylock() convention, * not the down_trylock() convention! diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 51dbab9710c7..7ff386a6ae87 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -65,7 +65,7 @@ struct raw_notifier_head { } while (0) #define ATOMIC_NOTIFIER_INIT(name) { \ - .lock = SPIN_LOCK_UNLOCKED, \ + .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ .head = NULL } #define BLOCKING_NOTIFIER_INIT(name) { \ .rwsem = __RWSEM_INITIALIZER((name).rwsem), \ diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h index fa4a3b82ba70..5d41dee82f80 100644 --- a/include/linux/rtmutex.h +++ b/include/linux/rtmutex.h @@ -29,8 +29,6 @@ struct rt_mutex { struct task_struct *owner; #ifdef CONFIG_DEBUG_RT_MUTEXES int save_state; - struct list_head held_list_entry; - unsigned long acquire_ip; const char *name, *file; int line; void *magic; @@ -98,14 +96,6 @@ extern int rt_mutex_trylock(struct rt_mutex *lock); extern void rt_mutex_unlock(struct rt_mutex *lock); -#ifdef CONFIG_DEBUG_RT_MUTEXES -# define INIT_RT_MUTEX_DEBUG(tsk) \ - .held_list_head = LIST_HEAD_INIT(tsk.held_list_head), \ - .held_list_lock = SPIN_LOCK_UNLOCKED -#else -# define INIT_RT_MUTEX_DEBUG(tsk) -#endif - #ifdef CONFIG_RT_MUTEXES # define INIT_RT_MUTEXES(tsk) \ .pi_waiters = PLIST_HEAD_INIT(tsk.pi_waiters, tsk.pi_lock), \ diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h index f30f805080ae..ae1fcadd598e 100644 --- a/include/linux/rwsem-spinlock.h +++ b/include/linux/rwsem-spinlock.h @@ -32,30 +32,37 @@ struct rw_semaphore { __s32 activity; spinlock_t wait_lock; struct list_head wait_list; -#if RWSEM_DEBUG - int debug; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; #endif }; -/* - * initialisation - */ -#if RWSEM_DEBUG -#define __RWSEM_DEBUG_INIT , 0 +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } #else -#define __RWSEM_DEBUG_INIT /* */ +# define __RWSEM_DEP_MAP_INIT(lockname) #endif #define __RWSEM_INITIALIZER(name) \ -{ 0, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) __RWSEM_DEBUG_INIT } +{ 0, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) } #define DECLARE_RWSEM(name) \ struct rw_semaphore name = __RWSEM_INITIALIZER(name) -extern void FASTCALL(init_rwsem(struct rw_semaphore *sem)); +extern void __init_rwsem(struct rw_semaphore *sem, const char *name, + struct lock_class_key *key); + +#define init_rwsem(sem) \ +do { \ + static struct lock_class_key __key; \ + \ + __init_rwsem((sem), #sem, &__key); \ +} while (0) + extern void FASTCALL(__down_read(struct rw_semaphore *sem)); extern int FASTCALL(__down_read_trylock(struct rw_semaphore *sem)); extern void FASTCALL(__down_write(struct rw_semaphore *sem)); +extern void FASTCALL(__down_write_nested(struct rw_semaphore *sem, int subclass)); extern int FASTCALL(__down_write_trylock(struct rw_semaphore *sem)); extern void FASTCALL(__up_read(struct rw_semaphore *sem)); extern void FASTCALL(__up_write(struct rw_semaphore *sem)); diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index f99fe90732ab..658afb37c3f5 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -9,8 +9,6 @@ #include <linux/linkage.h> -#define RWSEM_DEBUG 0 - #ifdef __KERNEL__ #include <linux/types.h> @@ -26,89 +24,58 @@ struct rw_semaphore; #include <asm/rwsem.h> /* use an arch-specific implementation */ #endif -#ifndef rwsemtrace -#if RWSEM_DEBUG -extern void FASTCALL(rwsemtrace(struct rw_semaphore *sem, const char *str)); -#else -#define rwsemtrace(SEM,FMT) -#endif -#endif - /* * lock for reading */ -static inline void down_read(struct rw_semaphore *sem) -{ - might_sleep(); - rwsemtrace(sem,"Entering down_read"); - __down_read(sem); - rwsemtrace(sem,"Leaving down_read"); -} +extern void down_read(struct rw_semaphore *sem); /* * trylock for reading -- returns 1 if successful, 0 if contention */ -static inline int down_read_trylock(struct rw_semaphore *sem) -{ - int ret; - rwsemtrace(sem,"Entering down_read_trylock"); - ret = __down_read_trylock(sem); - rwsemtrace(sem,"Leaving down_read_trylock"); - return ret; -} +extern int down_read_trylock(struct rw_semaphore *sem); /* * lock for writing */ -static inline void down_write(struct rw_semaphore *sem) -{ - might_sleep(); - rwsemtrace(sem,"Entering down_write"); - __down_write(sem); - rwsemtrace(sem,"Leaving down_write"); -} +extern void down_write(struct rw_semaphore *sem); /* * trylock for writing -- returns 1 if successful, 0 if contention */ -static inline int down_write_trylock(struct rw_semaphore *sem) -{ - int ret; - rwsemtrace(sem,"Entering down_write_trylock"); - ret = __down_write_trylock(sem); - rwsemtrace(sem,"Leaving down_write_trylock"); - return ret; -} +extern int down_write_trylock(struct rw_semaphore *sem); /* * release a read lock */ -static inline void up_read(struct rw_semaphore *sem) -{ - rwsemtrace(sem,"Entering up_read"); - __up_read(sem); - rwsemtrace(sem,"Leaving up_read"); -} +extern void up_read(struct rw_semaphore *sem); /* * release a write lock */ -static inline void up_write(struct rw_semaphore *sem) -{ - rwsemtrace(sem,"Entering up_write"); - __up_write(sem); - rwsemtrace(sem,"Leaving up_write"); -} +extern void up_write(struct rw_semaphore *sem); /* * downgrade write lock to read lock */ -static inline void downgrade_write(struct rw_semaphore *sem) -{ - rwsemtrace(sem,"Entering downgrade_write"); - __downgrade_write(sem); - rwsemtrace(sem,"Leaving downgrade_write"); -} +extern void downgrade_write(struct rw_semaphore *sem); + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +/* + * nested locking: + */ +extern void down_read_nested(struct rw_semaphore *sem, int subclass); +extern void down_write_nested(struct rw_semaphore *sem, int subclass); +/* + * Take/release a lock when not the owner will release it: + */ +extern void down_read_non_owner(struct rw_semaphore *sem); +extern void up_read_non_owner(struct rw_semaphore *sem); +#else +# define down_read_nested(sem, subclass) down_read(sem) +# define down_write_nested(sem, subclass) down_write(sem) +# define down_read_non_owner(sem) down_read(sem) +# define up_read_non_owner(sem) up_read(sem) +#endif #endif /* __KERNEL__ */ #endif /* _LINUX_RWSEM_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index aaf723308ed4..1c876e27ff93 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -184,11 +184,11 @@ extern unsigned long weighted_cpuload(const int cpu); extern rwlock_t tasklist_lock; extern spinlock_t mmlist_lock; -typedef struct task_struct task_t; +struct task_struct; extern void sched_init(void); extern void sched_init_smp(void); -extern void init_idle(task_t *idle, int cpu); +extern void init_idle(struct task_struct *idle, int cpu); extern cpumask_t nohz_cpu_mask; @@ -383,7 +383,7 @@ struct signal_struct { wait_queue_head_t wait_chldexit; /* for wait4() */ /* current thread group signal load-balancing target: */ - task_t *curr_target; + struct task_struct *curr_target; /* shared signal handling: */ struct sigpending shared_pending; @@ -534,7 +534,6 @@ extern struct user_struct *find_user(uid_t); extern struct user_struct root_user; #define INIT_USER (&root_user) -typedef struct prio_array prio_array_t; struct backing_dev_info; struct reclaim_state; @@ -699,7 +698,7 @@ extern int groups_search(struct group_info *group_info, gid_t grp); ((gi)->blocks[(i)/NGROUPS_PER_BLOCK][(i)%NGROUPS_PER_BLOCK]) #ifdef ARCH_HAS_PREFETCH_SWITCH_STACK -extern void prefetch_stack(struct task_struct*); +extern void prefetch_stack(struct task_struct *t); #else static inline void prefetch_stack(struct task_struct *t) { } #endif @@ -715,6 +714,8 @@ enum sleep_type { SLEEP_INTERRUPTED, }; +struct prio_array; + struct task_struct { volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ struct thread_info *thread_info; @@ -732,7 +733,7 @@ struct task_struct { int load_weight; /* for niceness load balancing purposes */ int prio, static_prio, normal_prio; struct list_head run_list; - prio_array_t *array; + struct prio_array *array; unsigned short ioprio; unsigned int btrace_seq; @@ -865,16 +866,34 @@ struct task_struct { struct plist_head pi_waiters; /* Deadlock detection and priority inheritance handling */ struct rt_mutex_waiter *pi_blocked_on; -# ifdef CONFIG_DEBUG_RT_MUTEXES - spinlock_t held_list_lock; - struct list_head held_list_head; -# endif #endif #ifdef CONFIG_DEBUG_MUTEXES /* mutex deadlock detection */ struct mutex_waiter *blocked_on; #endif +#ifdef CONFIG_TRACE_IRQFLAGS + unsigned int irq_events; + int hardirqs_enabled; + unsigned long hardirq_enable_ip; + unsigned int hardirq_enable_event; + unsigned long hardirq_disable_ip; + unsigned int hardirq_disable_event; + int softirqs_enabled; + unsigned long softirq_disable_ip; + unsigned int softirq_disable_event; + unsigned long softirq_enable_ip; + unsigned int softirq_enable_event; + int hardirq_context; + int softirq_context; +#endif +#ifdef CONFIG_LOCKDEP +# define MAX_LOCK_DEPTH 30UL + u64 curr_chain_key; + int lockdep_depth; + struct held_lock held_locks[MAX_LOCK_DEPTH]; + unsigned int lockdep_recursion; +#endif /* journalling filesystem info */ void *journal_info; @@ -1013,9 +1032,9 @@ static inline void put_task_struct(struct task_struct *t) #define used_math() tsk_used_math(current) #ifdef CONFIG_SMP -extern int set_cpus_allowed(task_t *p, cpumask_t new_mask); +extern int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask); #else -static inline int set_cpus_allowed(task_t *p, cpumask_t new_mask) +static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) { if (!cpu_isset(0, new_mask)) return -EINVAL; @@ -1024,7 +1043,8 @@ static inline int set_cpus_allowed(task_t *p, cpumask_t new_mask) #endif extern unsigned long long sched_clock(void); -extern unsigned long long current_sched_time(const task_t *current_task); +extern unsigned long long +current_sched_time(const struct task_struct *current_task); /* sched_exec is called by processes performing an exec */ #ifdef CONFIG_SMP @@ -1042,27 +1062,27 @@ static inline void idle_task_exit(void) {} extern void sched_idle_next(void); #ifdef CONFIG_RT_MUTEXES -extern int rt_mutex_getprio(task_t *p); -extern void rt_mutex_setprio(task_t *p, int prio); -extern void rt_mutex_adjust_pi(task_t *p); +extern int rt_mutex_getprio(struct task_struct *p); +extern void rt_mutex_setprio(struct task_struct *p, int prio); +extern void rt_mutex_adjust_pi(struct task_struct *p); #else -static inline int rt_mutex_getprio(task_t *p) +static inline int rt_mutex_getprio(struct task_struct *p) { return p->normal_prio; } # define rt_mutex_adjust_pi(p) do { } while (0) #endif -extern void set_user_nice(task_t *p, long nice); -extern int task_prio(const task_t *p); -extern int task_nice(const task_t *p); -extern int can_nice(const task_t *p, const int nice); -extern int task_curr(const task_t *p); +extern void set_user_nice(struct task_struct *p, long nice); +extern int task_prio(const struct task_struct *p); +extern int task_nice(const struct task_struct *p); +extern int can_nice(const struct task_struct *p, const int nice); +extern int task_curr(const struct task_struct *p); extern int idle_cpu(int cpu); extern int sched_setscheduler(struct task_struct *, int, struct sched_param *); -extern task_t *idle_task(int cpu); -extern task_t *curr_task(int cpu); -extern void set_curr_task(int cpu, task_t *p); +extern struct task_struct *idle_task(int cpu); +extern struct task_struct *curr_task(int cpu); +extern void set_curr_task(int cpu, struct task_struct *p); void yield(void); @@ -1119,8 +1139,8 @@ extern void FASTCALL(wake_up_new_task(struct task_struct * tsk, #else static inline void kick_process(struct task_struct *tsk) { } #endif -extern void FASTCALL(sched_fork(task_t * p, int clone_flags)); -extern void FASTCALL(sched_exit(task_t * p)); +extern void FASTCALL(sched_fork(struct task_struct * p, int clone_flags)); +extern void FASTCALL(sched_exit(struct task_struct * p)); extern int in_group_p(gid_t); extern int in_egroup_p(gid_t); @@ -1225,17 +1245,17 @@ extern NORET_TYPE void do_group_exit(int); extern void daemonize(const char *, ...); extern int allow_signal(int); extern int disallow_signal(int); -extern task_t *child_reaper; +extern struct task_struct *child_reaper; extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *); extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *); -task_t *fork_idle(int); +struct task_struct *fork_idle(int); extern void set_task_comm(struct task_struct *tsk, char *from); extern void get_task_comm(char *to, struct task_struct *tsk); #ifdef CONFIG_SMP -extern void wait_task_inactive(task_t * p); +extern void wait_task_inactive(struct task_struct * p); #else #define wait_task_inactive(p) do { } while (0) #endif @@ -1261,13 +1281,13 @@ extern void wait_task_inactive(task_t * p); /* de_thread depends on thread_group_leader not being a pid based check */ #define thread_group_leader(p) (p == p->group_leader) -static inline task_t *next_thread(const task_t *p) +static inline struct task_struct *next_thread(const struct task_struct *p) { return list_entry(rcu_dereference(p->thread_group.next), - task_t, thread_group); + struct task_struct, thread_group); } -static inline int thread_group_empty(task_t *p) +static inline int thread_group_empty(struct task_struct *p) { return list_empty(&p->thread_group); } diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 7bc5c7c12b54..46000936f8f1 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -38,9 +38,17 @@ typedef struct { * These macros triggered gcc-3.x compile-time problems. We think these are * OK now. Be cautious. */ -#define SEQLOCK_UNLOCKED { 0, SPIN_LOCK_UNLOCKED } -#define seqlock_init(x) do { *(x) = (seqlock_t) SEQLOCK_UNLOCKED; } while (0) +#define __SEQLOCK_UNLOCKED(lockname) \ + { 0, __SPIN_LOCK_UNLOCKED(lockname) } +#define SEQLOCK_UNLOCKED \ + __SEQLOCK_UNLOCKED(old_style_seqlock_init) + +#define seqlock_init(x) \ + do { *(x) = (seqlock_t) __SEQLOCK_UNLOCKED(x); } while (0) + +#define DEFINE_SEQLOCK(x) \ + seqlock_t x = __SEQLOCK_UNLOCKED(x) /* Lock out other writers and update the count. * Acts like a normal spin_lock/unlock. diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 57d7d4965f9a..3597b4f14389 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -604,9 +604,12 @@ static inline __u32 skb_queue_len(const struct sk_buff_head *list_) return list_->qlen; } +extern struct lock_class_key skb_queue_lock_key; + static inline void skb_queue_head_init(struct sk_buff_head *list) { spin_lock_init(&list->lock); + lockdep_set_class(&list->lock, &skb_queue_lock_key); list->prev = list->next = (struct sk_buff *)list; list->qlen = 0; } diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index ae23beef9cc9..31473db92d3b 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -82,14 +82,40 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock); /* * Pull the __raw*() functions/declarations (UP-nondebug doesnt need them): */ -#if defined(CONFIG_SMP) +#ifdef CONFIG_SMP # include <asm/spinlock.h> #else # include <linux/spinlock_up.h> #endif -#define spin_lock_init(lock) do { *(lock) = SPIN_LOCK_UNLOCKED; } while (0) -#define rwlock_init(lock) do { *(lock) = RW_LOCK_UNLOCKED; } while (0) +#ifdef CONFIG_DEBUG_SPINLOCK + extern void __spin_lock_init(spinlock_t *lock, const char *name, + struct lock_class_key *key); +# define spin_lock_init(lock) \ +do { \ + static struct lock_class_key __key; \ + \ + __spin_lock_init((lock), #lock, &__key); \ +} while (0) + +#else +# define spin_lock_init(lock) \ + do { *(lock) = SPIN_LOCK_UNLOCKED; } while (0) +#endif + +#ifdef CONFIG_DEBUG_SPINLOCK + extern void __rwlock_init(rwlock_t *lock, const char *name, + struct lock_class_key *key); +# define rwlock_init(lock) \ +do { \ + static struct lock_class_key __key; \ + \ + __rwlock_init((lock), #lock, &__key); \ +} while (0) +#else +# define rwlock_init(lock) \ + do { *(lock) = RW_LOCK_UNLOCKED; } while (0) +#endif #define spin_is_locked(lock) __raw_spin_is_locked(&(lock)->raw_lock) @@ -113,7 +139,6 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock); #define _raw_spin_lock_flags(lock, flags) _raw_spin_lock(lock) extern int _raw_spin_trylock(spinlock_t *lock); extern void _raw_spin_unlock(spinlock_t *lock); - extern void _raw_read_lock(rwlock_t *lock); extern int _raw_read_trylock(rwlock_t *lock); extern void _raw_read_unlock(rwlock_t *lock); @@ -121,17 +146,17 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock); extern int _raw_write_trylock(rwlock_t *lock); extern void _raw_write_unlock(rwlock_t *lock); #else -# define _raw_spin_unlock(lock) __raw_spin_unlock(&(lock)->raw_lock) -# define _raw_spin_trylock(lock) __raw_spin_trylock(&(lock)->raw_lock) # define _raw_spin_lock(lock) __raw_spin_lock(&(lock)->raw_lock) # define _raw_spin_lock_flags(lock, flags) \ __raw_spin_lock_flags(&(lock)->raw_lock, *(flags)) +# define _raw_spin_trylock(lock) __raw_spin_trylock(&(lock)->raw_lock) +# define _raw_spin_unlock(lock) __raw_spin_unlock(&(lock)->raw_lock) # define _raw_read_lock(rwlock) __raw_read_lock(&(rwlock)->raw_lock) -# define _raw_write_lock(rwlock) __raw_write_lock(&(rwlock)->raw_lock) -# define _raw_read_unlock(rwlock) __raw_read_unlock(&(rwlock)->raw_lock) -# define _raw_write_unlock(rwlock) __raw_write_unlock(&(rwlock)->raw_lock) # define _raw_read_trylock(rwlock) __raw_read_trylock(&(rwlock)->raw_lock) +# define _raw_read_unlock(rwlock) __raw_read_unlock(&(rwlock)->raw_lock) +# define _raw_write_lock(rwlock) __raw_write_lock(&(rwlock)->raw_lock) # define _raw_write_trylock(rwlock) __raw_write_trylock(&(rwlock)->raw_lock) +# define _raw_write_unlock(rwlock) __raw_write_unlock(&(rwlock)->raw_lock) #endif #define read_can_lock(rwlock) __raw_read_can_lock(&(rwlock)->raw_lock) @@ -147,6 +172,13 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock); #define write_trylock(lock) __cond_lock(_write_trylock(lock)) #define spin_lock(lock) _spin_lock(lock) + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define spin_lock_nested(lock, subclass) _spin_lock_nested(lock, subclass) +#else +# define spin_lock_nested(lock, subclass) _spin_lock(lock) +#endif + #define write_lock(lock) _write_lock(lock) #define read_lock(lock) _read_lock(lock) @@ -172,21 +204,18 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock); /* * We inline the unlock functions in the nondebug case: */ -#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP) +#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT) || \ + !defined(CONFIG_SMP) # define spin_unlock(lock) _spin_unlock(lock) # define read_unlock(lock) _read_unlock(lock) # define write_unlock(lock) _write_unlock(lock) -#else -# define spin_unlock(lock) __raw_spin_unlock(&(lock)->raw_lock) -# define read_unlock(lock) __raw_read_unlock(&(lock)->raw_lock) -# define write_unlock(lock) __raw_write_unlock(&(lock)->raw_lock) -#endif - -#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP) # define spin_unlock_irq(lock) _spin_unlock_irq(lock) # define read_unlock_irq(lock) _read_unlock_irq(lock) # define write_unlock_irq(lock) _write_unlock_irq(lock) #else +# define spin_unlock(lock) __raw_spin_unlock(&(lock)->raw_lock) +# define read_unlock(lock) __raw_read_unlock(&(lock)->raw_lock) +# define write_unlock(lock) __raw_write_unlock(&(lock)->raw_lock) # define spin_unlock_irq(lock) \ do { __raw_spin_unlock(&(lock)->raw_lock); local_irq_enable(); } while (0) # define read_unlock_irq(lock) \ diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index 78e6989ffb54..b2c4f8299464 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -20,6 +20,8 @@ int in_lock_functions(unsigned long addr); #define assert_spin_locked(x) BUG_ON(!spin_is_locked(x)) void __lockfunc _spin_lock(spinlock_t *lock) __acquires(spinlock_t); +void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass) + __acquires(spinlock_t); void __lockfunc _read_lock(rwlock_t *lock) __acquires(rwlock_t); void __lockfunc _write_lock(rwlock_t *lock) __acquires(rwlock_t); void __lockfunc _spin_lock_bh(spinlock_t *lock) __acquires(spinlock_t); diff --git a/include/linux/spinlock_api_up.h b/include/linux/spinlock_api_up.h index cd81cee566f4..67faa044c5f5 100644 --- a/include/linux/spinlock_api_up.h +++ b/include/linux/spinlock_api_up.h @@ -49,6 +49,7 @@ do { local_irq_restore(flags); __UNLOCK(lock); } while (0) #define _spin_lock(lock) __LOCK(lock) +#define _spin_lock_nested(lock, subclass) __LOCK(lock) #define _read_lock(lock) __LOCK(lock) #define _write_lock(lock) __LOCK(lock) #define _spin_lock_bh(lock) __LOCK_BH(lock) diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h index 9cb51e070390..dc5fb69e4de9 100644 --- a/include/linux/spinlock_types.h +++ b/include/linux/spinlock_types.h @@ -9,6 +9,8 @@ * Released under the General Public License (GPL). */ +#include <linux/lockdep.h> + #if defined(CONFIG_SMP) # include <asm/spinlock_types.h> #else @@ -24,6 +26,9 @@ typedef struct { unsigned int magic, owner_cpu; void *owner; #endif +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif } spinlock_t; #define SPINLOCK_MAGIC 0xdead4ead @@ -37,31 +42,53 @@ typedef struct { unsigned int magic, owner_cpu; void *owner; #endif +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif } rwlock_t; #define RWLOCK_MAGIC 0xdeaf1eed #define SPINLOCK_OWNER_INIT ((void *)-1L) +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } +#else +# define SPIN_DEP_MAP_INIT(lockname) +#endif + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } +#else +# define RW_DEP_MAP_INIT(lockname) +#endif + #ifdef CONFIG_DEBUG_SPINLOCK -# define SPIN_LOCK_UNLOCKED \ +# define __SPIN_LOCK_UNLOCKED(lockname) \ (spinlock_t) { .raw_lock = __RAW_SPIN_LOCK_UNLOCKED, \ .magic = SPINLOCK_MAGIC, \ .owner = SPINLOCK_OWNER_INIT, \ - .owner_cpu = -1 } -#define RW_LOCK_UNLOCKED \ + .owner_cpu = -1, \ + SPIN_DEP_MAP_INIT(lockname) } +#define __RW_LOCK_UNLOCKED(lockname) \ (rwlock_t) { .raw_lock = __RAW_RW_LOCK_UNLOCKED, \ .magic = RWLOCK_MAGIC, \ .owner = SPINLOCK_OWNER_INIT, \ - .owner_cpu = -1 } + .owner_cpu = -1, \ + RW_DEP_MAP_INIT(lockname) } #else -# define SPIN_LOCK_UNLOCKED \ - (spinlock_t) { .raw_lock = __RAW_SPIN_LOCK_UNLOCKED } -#define RW_LOCK_UNLOCKED \ - (rwlock_t) { .raw_lock = __RAW_RW_LOCK_UNLOCKED } +# define __SPIN_LOCK_UNLOCKED(lockname) \ + (spinlock_t) { .raw_lock = __RAW_SPIN_LOCK_UNLOCKED, \ + SPIN_DEP_MAP_INIT(lockname) } +#define __RW_LOCK_UNLOCKED(lockname) \ + (rwlock_t) { .raw_lock = __RAW_RW_LOCK_UNLOCKED, \ + RW_DEP_MAP_INIT(lockname) } #endif -#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED -#define DEFINE_RWLOCK(x) rwlock_t x = RW_LOCK_UNLOCKED +#define SPIN_LOCK_UNLOCKED __SPIN_LOCK_UNLOCKED(old_style_spin_init) +#define RW_LOCK_UNLOCKED __RW_LOCK_UNLOCKED(old_style_rw_init) + +#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) +#define DEFINE_RWLOCK(x) rwlock_t x = __RW_LOCK_UNLOCKED(x) #endif /* __LINUX_SPINLOCK_TYPES_H */ diff --git a/include/linux/spinlock_types_up.h b/include/linux/spinlock_types_up.h index 04135b0e198e..27644af20b7c 100644 --- a/include/linux/spinlock_types_up.h +++ b/include/linux/spinlock_types_up.h @@ -12,10 +12,14 @@ * Released under the General Public License (GPL). */ -#ifdef CONFIG_DEBUG_SPINLOCK +#if defined(CONFIG_DEBUG_SPINLOCK) || \ + defined(CONFIG_DEBUG_LOCK_ALLOC) typedef struct { volatile unsigned int slock; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif } raw_spinlock_t; #define __RAW_SPIN_LOCK_UNLOCKED { 1 } @@ -30,6 +34,9 @@ typedef struct { } raw_spinlock_t; typedef struct { /* no debug version on UP */ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif } raw_rwlock_t; #define __RAW_RW_LOCK_UNLOCKED { } diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h index 31accf2f0b13..ea54c4c9a4ec 100644 --- a/include/linux/spinlock_up.h +++ b/include/linux/spinlock_up.h @@ -18,7 +18,6 @@ */ #ifdef CONFIG_DEBUG_SPINLOCK - #define __raw_spin_is_locked(x) ((x)->slock == 0) static inline void __raw_spin_lock(raw_spinlock_t *lock) diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h new file mode 100644 index 000000000000..9cc81e572224 --- /dev/null +++ b/include/linux/stacktrace.h @@ -0,0 +1,20 @@ +#ifndef __LINUX_STACKTRACE_H +#define __LINUX_STACKTRACE_H + +#ifdef CONFIG_STACKTRACE +struct stack_trace { + unsigned int nr_entries, max_entries; + unsigned long *entries; +}; + +extern void save_stack_trace(struct stack_trace *trace, + struct task_struct *task, int all_contexts, + unsigned int skip); + +extern void print_stack_trace(struct stack_trace *trace, int spaces); +#else +# define save_stack_trace(trace, task, all, skip) do { } while (0) +# define print_stack_trace(trace) do { } while (0) +#endif + +#endif diff --git a/include/linux/swap.h b/include/linux/swap.h index cf6ca6e377bd..5e59184c9096 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -189,6 +189,7 @@ extern long vm_total_pages; #ifdef CONFIG_NUMA extern int zone_reclaim_mode; +extern int sysctl_min_unmapped_ratio; extern int zone_reclaim(struct zone *, gfp_t, unsigned int); #else #define zone_reclaim_mode 0 diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 46e4d8f2771f..e4b1a4d4dcf3 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -188,7 +188,7 @@ enum VM_DROP_PAGECACHE=29, /* int: nuke lots of pagecache */ VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */ VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off node */ - VM_ZONE_RECLAIM_INTERVAL=32, /* time period to wait after reclaim failure */ + VM_MIN_UNMAPPED=32, /* Set min percent of unmapped pages */ VM_PANIC_ON_OOM=33, /* panic at out-of-memory */ VM_VDSO_ENABLED=34, /* map VDSO into new processes? */ }; diff --git a/include/linux/wait.h b/include/linux/wait.h index 544e855c7c02..794be7af58ae 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -68,7 +68,7 @@ struct task_struct; wait_queue_t name = __WAITQUEUE_INITIALIZER(name, tsk) #define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \ - .lock = SPIN_LOCK_UNLOCKED, \ + .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ .task_list = { &(name).task_list, &(name).task_list } } #define DECLARE_WAIT_QUEUE_HEAD(name) \ @@ -77,9 +77,15 @@ struct task_struct; #define __WAIT_BIT_KEY_INITIALIZER(word, bit) \ { .flags = word, .bit_nr = bit, } +/* + * lockdep: we want one lock-class for all waitqueue locks. + */ +extern struct lock_class_key waitqueue_lock_key; + static inline void init_waitqueue_head(wait_queue_head_t *q) { spin_lock_init(&q->lock); + lockdep_set_class(&q->lock, &waitqueue_lock_key); INIT_LIST_HEAD(&q->task_list); } diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 5ba72d95280c..2fec827c8801 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -67,6 +67,9 @@ struct unix_skb_parms { #define unix_state_rlock(s) spin_lock(&unix_sk(s)->lock) #define unix_state_runlock(s) spin_unlock(&unix_sk(s)->lock) #define unix_state_wlock(s) spin_lock(&unix_sk(s)->lock) +#define unix_state_wlock_nested(s) \ + spin_lock_nested(&unix_sk(s)->lock, \ + SINGLE_DEPTH_NESTING) #define unix_state_wunlock(s) spin_unlock(&unix_sk(s)->lock) #ifdef __KERNEL__ diff --git a/include/net/sock.h b/include/net/sock.h index 7b3d6b856946..324b3ea233d6 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -44,6 +44,7 @@ #include <linux/timer.h> #include <linux/cache.h> #include <linux/module.h> +#include <linux/lockdep.h> #include <linux/netdevice.h> #include <linux/skbuff.h> /* struct sk_buff */ #include <linux/security.h> @@ -78,14 +79,17 @@ typedef struct { spinlock_t slock; struct sock_iocb *owner; wait_queue_head_t wq; + /* + * We express the mutex-alike socket_lock semantics + * to the lock validator by explicitly managing + * the slock as a lock variant (in addition to + * the slock itself): + */ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif } socket_lock_t; -#define sock_lock_init(__sk) \ -do { spin_lock_init(&((__sk)->sk_lock.slock)); \ - (__sk)->sk_lock.owner = NULL; \ - init_waitqueue_head(&((__sk)->sk_lock.wq)); \ -} while(0) - struct sock; struct proto; @@ -747,6 +751,9 @@ extern void FASTCALL(release_sock(struct sock *sk)); /* BH context may only use the following locking interface. */ #define bh_lock_sock(__sk) spin_lock(&((__sk)->sk_lock.slock)) +#define bh_lock_sock_nested(__sk) \ + spin_lock_nested(&((__sk)->sk_lock.slock), \ + SINGLE_DEPTH_NESTING) #define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock)) extern struct sock *sk_alloc(int family, |