diff options
author | David Woodhouse <dwmw2@infradead.org> | 2006-10-21 16:46:04 +0100 |
---|---|---|
committer | David Woodhouse <dwmw2@infradead.org> | 2006-10-21 16:46:04 +0100 |
commit | 513b046c96cc2fbce730a3474f6f7ff0c4fdd05c (patch) | |
tree | e8006368b6f643067486f92405a404757807d6da /kernel | |
parent | 82810b7b6cc7a74c68881a13b0eb66c7a6370fcc (diff) | |
parent | c7a3bd177f248d01ee18a01d22048c80e071c331 (diff) | |
download | blackbird-op-linux-513b046c96cc2fbce730a3474f6f7ff0c4fdd05c.tar.gz blackbird-op-linux-513b046c96cc2fbce730a3474f6f7ff0c4fdd05c.zip |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'kernel')
39 files changed, 1038 insertions, 258 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index d948ca12acf0..5e3f3b75563a 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -8,7 +8,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ signal.o sys.o kmod.o workqueue.o pid.o \ rcupdate.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ - hrtimer.o rwsem.o latency.o nsproxy.o + hrtimer.o rwsem.o latency.o nsproxy.o srcu.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += time/ diff --git a/kernel/audit.c b/kernel/audit.c index f9889ee77825..98106f6078b0 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -340,7 +340,7 @@ static int kauditd_thread(void *dummy) { struct sk_buff *skb; - while (1) { + while (!kthread_should_stop()) { skb = skb_dequeue(&audit_skb_queue); wake_up(&audit_backlog_wait); if (skb) { @@ -369,6 +369,7 @@ static int kauditd_thread(void *dummy) remove_wait_queue(&kauditd_wait, &wait); } } + return 0; } int audit_send_list(void *_dest) diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 1a58a81fb09d..4f40d923af8e 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -411,7 +411,6 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule) case AUDIT_FSGID: case AUDIT_LOGINUID: case AUDIT_PERS: - case AUDIT_ARCH: case AUDIT_MSGTYPE: case AUDIT_PPID: case AUDIT_DEVMAJOR: @@ -423,6 +422,14 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule) case AUDIT_ARG2: case AUDIT_ARG3: break; + /* arch is only allowed to be = or != */ + case AUDIT_ARCH: + if ((f->op != AUDIT_NOT_EQUAL) && (f->op != AUDIT_EQUAL) + && (f->op != AUDIT_NEGATE) && (f->op)) { + err = -EINVAL; + goto exit_free; + } + break; case AUDIT_PERM: if (f->val & ~15) goto exit_free; diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 105147631753..42f2f1179711 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -278,8 +278,11 @@ static int audit_filter_rules(struct task_struct *tsk, result = audit_comparator(tsk->pid, f->op, f->val); break; case AUDIT_PPID: - if (ctx) + if (ctx) { + if (!ctx->ppid) + ctx->ppid = sys_getppid(); result = audit_comparator(ctx->ppid, f->op, f->val); + } break; case AUDIT_UID: result = audit_comparator(tsk->uid, f->op, f->val); @@ -795,7 +798,8 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts /* tsk == current */ context->pid = tsk->pid; - context->ppid = sys_getppid(); /* sic. tsk == current in all cases */ + if (!context->ppid) + context->ppid = sys_getppid(); context->uid = tsk->uid; context->gid = tsk->gid; context->euid = tsk->euid; @@ -1137,6 +1141,7 @@ void audit_syscall_entry(int arch, int major, context->ctime = CURRENT_TIME; context->in_syscall = 1; context->auditable = !!(state == AUDIT_RECORD_CONTEXT); + context->ppid = 0; } /** @@ -1352,7 +1357,13 @@ void __audit_inode_child(const char *dname, const struct inode *inode, } update_context: - idx = context->name_count++; + idx = context->name_count; + if (context->name_count == AUDIT_NAMES) { + printk(KERN_DEBUG "name_count maxed and losing %s\n", + found_name ?: "(null)"); + return; + } + context->name_count++; #if AUDIT_DEBUG context->ino_count++; #endif @@ -1370,7 +1381,16 @@ update_context: /* A parent was not found in audit_names, so copy the inode data for the * provided parent. */ if (!found_name) { - idx = context->name_count++; + idx = context->name_count; + if (context->name_count == AUDIT_NAMES) { + printk(KERN_DEBUG + "name_count maxed and losing parent inode data: dev=%02x:%02x, inode=%lu", + MAJOR(parent->i_sb->s_dev), + MINOR(parent->i_sb->s_dev), + parent->i_ino); + return; + } + context->name_count++; #if AUDIT_DEBUG context->ino_count++; #endif diff --git a/kernel/cpu.c b/kernel/cpu.c index 32c96628463e..27dd3ee47099 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -19,7 +19,7 @@ static DEFINE_MUTEX(cpu_add_remove_lock); static DEFINE_MUTEX(cpu_bitmask_lock); -static __cpuinitdata BLOCKING_NOTIFIER_HEAD(cpu_chain); +static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain); /* If set, cpu_up and cpu_down will return -EBUSY and do nothing. * Should always be manipulated under cpu_add_remove_lock @@ -68,7 +68,11 @@ EXPORT_SYMBOL_GPL(unlock_cpu_hotplug); /* Need to know about CPUs going up/down? */ int __cpuinit register_cpu_notifier(struct notifier_block *nb) { - return blocking_notifier_chain_register(&cpu_chain, nb); + int ret; + mutex_lock(&cpu_add_remove_lock); + ret = raw_notifier_chain_register(&cpu_chain, nb); + mutex_unlock(&cpu_add_remove_lock); + return ret; } #ifdef CONFIG_HOTPLUG_CPU @@ -77,7 +81,9 @@ EXPORT_SYMBOL(register_cpu_notifier); void unregister_cpu_notifier(struct notifier_block *nb) { - blocking_notifier_chain_unregister(&cpu_chain, nb); + mutex_lock(&cpu_add_remove_lock); + raw_notifier_chain_unregister(&cpu_chain, nb); + mutex_unlock(&cpu_add_remove_lock); } EXPORT_SYMBOL(unregister_cpu_notifier); @@ -126,7 +132,7 @@ static int _cpu_down(unsigned int cpu) if (!cpu_online(cpu)) return -EINVAL; - err = blocking_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE, + err = raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE, (void *)(long)cpu); if (err == NOTIFY_BAD) { printk("%s: attempt to take down CPU %u failed\n", @@ -146,7 +152,7 @@ static int _cpu_down(unsigned int cpu) if (IS_ERR(p)) { /* CPU didn't die: tell everyone. Can't complain. */ - if (blocking_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED, + if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED, (void *)(long)cpu) == NOTIFY_BAD) BUG(); @@ -169,7 +175,7 @@ static int _cpu_down(unsigned int cpu) put_cpu(); /* CPU is completely dead: tell everyone. Too late to complain. */ - if (blocking_notifier_call_chain(&cpu_chain, CPU_DEAD, + if (raw_notifier_call_chain(&cpu_chain, CPU_DEAD, (void *)(long)cpu) == NOTIFY_BAD) BUG(); @@ -206,7 +212,7 @@ static int __devinit _cpu_up(unsigned int cpu) if (cpu_online(cpu) || !cpu_present(cpu)) return -EINVAL; - ret = blocking_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu); + ret = raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu); if (ret == NOTIFY_BAD) { printk("%s: attempt to bring up CPU %u failed\n", __FUNCTION__, cpu); @@ -223,11 +229,11 @@ static int __devinit _cpu_up(unsigned int cpu) BUG_ON(!cpu_online(cpu)); /* Now call notifier in preparation. */ - blocking_notifier_call_chain(&cpu_chain, CPU_ONLINE, hcpu); + raw_notifier_call_chain(&cpu_chain, CPU_ONLINE, hcpu); out_notify: if (ret != 0) - blocking_notifier_call_chain(&cpu_chain, + raw_notifier_call_chain(&cpu_chain, CPU_UP_CANCELED, hcpu); return ret; diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 9d850ae13b1b..6313c38c930e 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2137,7 +2137,7 @@ static int cpuset_handle_cpuhp(struct notifier_block *nb, * See also the previous routine cpuset_handle_cpuhp(). */ -void cpuset_track_online_nodes() +void cpuset_track_online_nodes(void) { common_cpu_mem_hotplug_unplug(); } diff --git a/kernel/fork.c b/kernel/fork.c index 7dc6140baac6..29ebb30850ed 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -984,6 +984,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (!p) goto fork_out; + rt_mutex_init_task(p); + #ifdef CONFIG_TRACE_IRQFLAGS DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); @@ -1088,8 +1090,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->lockdep_recursion = 0; #endif - rt_mutex_init_task(p); - #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */ #endif diff --git a/kernel/futex.c b/kernel/futex.c index 4aaf91951a43..b364e0026191 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1612,10 +1612,10 @@ sys_set_robust_list(struct robust_list_head __user *head, * @len_ptr: pointer to a length field, the kernel fills in the header size */ asmlinkage long -sys_get_robust_list(int pid, struct robust_list_head __user **head_ptr, +sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr, size_t __user *len_ptr) { - struct robust_list_head *head; + struct robust_list_head __user *head; unsigned long ret; if (!pid) @@ -1694,14 +1694,15 @@ retry: * Fetch a robust-list pointer. Bit 0 signals PI futexes: */ static inline int fetch_robust_entry(struct robust_list __user **entry, - struct robust_list __user **head, int *pi) + struct robust_list __user * __user *head, + int *pi) { unsigned long uentry; - if (get_user(uentry, (unsigned long *)head)) + if (get_user(uentry, (unsigned long __user *)head)) return -EFAULT; - *entry = (void *)(uentry & ~1UL); + *entry = (void __user *)(uentry & ~1UL); *pi = uentry & 1; return 0; @@ -1739,7 +1740,7 @@ void exit_robust_list(struct task_struct *curr) return; if (pending) - handle_futex_death((void *)pending + futex_offset, curr, pip); + handle_futex_death((void __user *)pending + futex_offset, curr, pip); while (entry != &head->list) { /* @@ -1747,7 +1748,7 @@ void exit_robust_list(struct task_struct *curr) * don't process it twice: */ if (entry != pending) - if (handle_futex_death((void *)entry + futex_offset, + if (handle_futex_death((void __user *)entry + futex_offset, curr, pi)) return; /* diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index c5cca3f65cb7..50f24eea6cd0 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c @@ -18,7 +18,7 @@ */ static inline int fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry, - compat_uptr_t *head, int *pi) + compat_uptr_t __user *head, int *pi) { if (get_user(*uentry, head)) return -EFAULT; @@ -62,7 +62,7 @@ void compat_exit_robust_list(struct task_struct *curr) &head->list_op_pending, &pip)) return; if (upending) - handle_futex_death((void *)pending + futex_offset, curr, pip); + handle_futex_death((void __user *)pending + futex_offset, curr, pip); while (compat_ptr(uentry) != &head->list) { /* @@ -70,7 +70,7 @@ void compat_exit_robust_list(struct task_struct *curr) * dont process it twice: */ if (entry != pending) - if (handle_futex_death((void *)entry + futex_offset, + if (handle_futex_death((void __user *)entry + futex_offset, curr, pi)) return; @@ -78,7 +78,7 @@ void compat_exit_robust_list(struct task_struct *curr) * Fetch the next entry in the list: */ if (fetch_robust_entry(&uentry, &entry, - (compat_uptr_t *)&entry->next, &pi)) + (compat_uptr_t __user *)&entry->next, &pi)) return; /* * Avoid excessively long or circular lists: @@ -103,10 +103,10 @@ compat_sys_set_robust_list(struct compat_robust_list_head __user *head, } asmlinkage long -compat_sys_get_robust_list(int pid, compat_uptr_t *head_ptr, +compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, compat_size_t __user *len_ptr) { - struct compat_robust_list_head *head; + struct compat_robust_list_head __user *head; unsigned long ret; if (!pid) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 736cb0bd498f..2d0dc3efe813 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -18,6 +18,69 @@ #include "internals.h" /** + * dynamic_irq_init - initialize a dynamically allocated irq + * @irq: irq number to initialize + */ +void dynamic_irq_init(unsigned int irq) +{ + struct irq_desc *desc; + unsigned long flags; + + if (irq >= NR_IRQS) { + printk(KERN_ERR "Trying to initialize invalid IRQ%d\n", irq); + WARN_ON(1); + return; + } + + /* Ensure we don't have left over values from a previous use of this irq */ + desc = irq_desc + irq; + spin_lock_irqsave(&desc->lock, flags); + desc->status = IRQ_DISABLED; + desc->chip = &no_irq_chip; + desc->handle_irq = handle_bad_irq; + desc->depth = 1; + desc->handler_data = NULL; + desc->chip_data = NULL; + desc->action = NULL; + desc->irq_count = 0; + desc->irqs_unhandled = 0; +#ifdef CONFIG_SMP + desc->affinity = CPU_MASK_ALL; +#endif + spin_unlock_irqrestore(&desc->lock, flags); +} + +/** + * dynamic_irq_cleanup - cleanup a dynamically allocated irq + * @irq: irq number to initialize + */ +void dynamic_irq_cleanup(unsigned int irq) +{ + struct irq_desc *desc; + unsigned long flags; + + if (irq >= NR_IRQS) { + printk(KERN_ERR "Trying to cleanup invalid IRQ%d\n", irq); + WARN_ON(1); + return; + } + + desc = irq_desc + irq; + spin_lock_irqsave(&desc->lock, flags); + if (desc->action) { + spin_unlock_irqrestore(&desc->lock, flags); + printk(KERN_ERR "Destroying IRQ%d without calling free_irq\n", + irq); + WARN_ON(1); + return; + } + desc->handle_irq = handle_bad_irq; + desc->chip = &no_irq_chip; + spin_unlock_irqrestore(&desc->lock, flags); +} + + +/** * set_irq_chip - set the irq chip for an irq * @irq: irq number * @chip: pointer to irq chip description structure @@ -186,7 +249,6 @@ static inline void mask_ack_irq(struct irq_desc *desc, int irq) * handle_simple_irq - Simple and software-decoded IRQs. * @irq: the interrupt number * @desc: the interrupt description structure for this irq - * @regs: pointer to a register structure * * Simple interrupts are either sent from a demultiplexing interrupt * handler or come from hardware, where no interrupt hardware control @@ -196,7 +258,7 @@ static inline void mask_ack_irq(struct irq_desc *desc, int irq) * unmask issues if necessary. */ void fastcall -handle_simple_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs) +handle_simple_irq(unsigned int irq, struct irq_desc *desc) { struct irqaction *action; irqreturn_t action_ret; @@ -216,9 +278,9 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs) desc->status |= IRQ_INPROGRESS; spin_unlock(&desc->lock); - action_ret = handle_IRQ_event(irq, regs, action); + action_ret = handle_IRQ_event(irq, action); if (!noirqdebug) - note_interrupt(irq, desc, action_ret, regs); + note_interrupt(irq, desc, action_ret); spin_lock(&desc->lock); desc->status &= ~IRQ_INPROGRESS; @@ -230,7 +292,6 @@ out_unlock: * handle_level_irq - Level type irq handler * @irq: the interrupt number * @desc: the interrupt description structure for this irq - * @regs: pointer to a register structure * * Level type interrupts are active as long as the hardware line has * the active level. This may require to mask the interrupt and unmask @@ -238,7 +299,7 @@ out_unlock: * interrupt line is back to inactive. */ void fastcall -handle_level_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs) +handle_level_irq(unsigned int irq, struct irq_desc *desc) { unsigned int cpu = smp_processor_id(); struct irqaction *action; @@ -266,9 +327,9 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs) desc->status &= ~IRQ_PENDING; spin_unlock(&desc->lock); - action_ret = handle_IRQ_event(irq, regs, action); + action_ret = handle_IRQ_event(irq, action); if (!noirqdebug) - note_interrupt(irq, desc, action_ret, regs); + note_interrupt(irq, desc, action_ret); spin_lock(&desc->lock); desc->status &= ~IRQ_INPROGRESS; @@ -282,7 +343,6 @@ out_unlock: * handle_fasteoi_irq - irq handler for transparent controllers * @irq: the interrupt number * @desc: the interrupt description structure for this irq - * @regs: pointer to a register structure * * Only a single callback will be issued to the chip: an ->eoi() * call when the interrupt has been serviced. This enables support @@ -290,8 +350,7 @@ out_unlock: * details in hardware, transparently. */ void fastcall -handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc, - struct pt_regs *regs) +handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) { unsigned int cpu = smp_processor_id(); struct irqaction *action; @@ -319,9 +378,9 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc, desc->status &= ~IRQ_PENDING; spin_unlock(&desc->lock); - action_ret = handle_IRQ_event(irq, regs, action); + action_ret = handle_IRQ_event(irq, action); if (!noirqdebug) - note_interrupt(irq, desc, action_ret, regs); + note_interrupt(irq, desc, action_ret); spin_lock(&desc->lock); desc->status &= ~IRQ_INPROGRESS; @@ -335,7 +394,6 @@ out: * handle_edge_irq - edge type IRQ handler * @irq: the interrupt number * @desc: the interrupt description structure for this irq - * @regs: pointer to a register structure * * Interrupt occures on the falling and/or rising edge of a hardware * signal. The occurence is latched into the irq controller hardware @@ -349,7 +407,7 @@ out: * loop is left. */ void fastcall -handle_edge_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs) +handle_edge_irq(unsigned int irq, struct irq_desc *desc) { const unsigned int cpu = smp_processor_id(); @@ -400,9 +458,9 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs) desc->status &= ~IRQ_PENDING; spin_unlock(&desc->lock); - action_ret = handle_IRQ_event(irq, regs, action); + action_ret = handle_IRQ_event(irq, action); if (!noirqdebug) - note_interrupt(irq, desc, action_ret, regs); + note_interrupt(irq, desc, action_ret); spin_lock(&desc->lock); } while ((desc->status & (IRQ_PENDING | IRQ_DISABLED)) == IRQ_PENDING); @@ -417,12 +475,11 @@ out_unlock: * handle_percpu_IRQ - Per CPU local irq handler * @irq: the interrupt number * @desc: the interrupt description structure for this irq - * @regs: pointer to a register structure * * Per CPU interrupts on SMP machines without locking requirements */ void fastcall -handle_percpu_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs) +handle_percpu_irq(unsigned int irq, struct irq_desc *desc) { irqreturn_t action_ret; @@ -431,9 +488,9 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs) if (desc->chip->ack) desc->chip->ack(irq); - action_ret = handle_IRQ_event(irq, regs, desc->action); + action_ret = handle_IRQ_event(irq, desc->action); if (!noirqdebug) - note_interrupt(irq, desc, action_ret, regs); + note_interrupt(irq, desc, action_ret); if (desc->chip->eoi) desc->chip->eoi(irq); @@ -442,10 +499,8 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs) #endif /* CONFIG_SMP */ void -__set_irq_handler(unsigned int irq, - void fastcall (*handle)(unsigned int, irq_desc_t *, - struct pt_regs *), - int is_chained) +__set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, + const char *name) { struct irq_desc *desc; unsigned long flags; @@ -486,6 +541,7 @@ __set_irq_handler(unsigned int irq, desc->depth = 1; } desc->handle_irq = handle; + desc->name = name; if (handle != handle_bad_irq && is_chained) { desc->status &= ~IRQ_DISABLED; @@ -498,36 +554,16 @@ __set_irq_handler(unsigned int irq, void set_irq_chip_and_handler(unsigned int irq, struct irq_chip *chip, - void fastcall (*handle)(unsigned int, - struct irq_desc *, - struct pt_regs *)) + irq_flow_handler_t handle) { set_irq_chip(irq, chip); - __set_irq_handler(irq, handle, 0); + __set_irq_handler(irq, handle, 0, NULL); } -/* - * Get a descriptive string for the highlevel handler, for - * /proc/interrupts output: - */ -const char * -handle_irq_name(void fastcall (*handle)(unsigned int, struct irq_desc *, - struct pt_regs *)) +void +set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip, + irq_flow_handler_t handle, const char *name) { - if (handle == handle_level_irq) - return "level "; - if (handle == handle_fasteoi_irq) - return "fasteoi"; - if (handle == handle_edge_irq) - return "edge "; - if (handle == handle_simple_irq) - return "simple "; -#ifdef CONFIG_SMP - if (handle == handle_percpu_irq) - return "percpu "; -#endif - if (handle == handle_bad_irq) - return "bad "; - - return NULL; + set_irq_chip(irq, chip); + __set_irq_handler(irq, handle, 0, name); } diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 4c6cdbaed661..42aa6f1a3f0f 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -27,7 +27,7 @@ * Handles spurious and unhandled IRQ's. It also prints a debugmessage. */ void fastcall -handle_bad_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs) +handle_bad_irq(unsigned int irq, struct irq_desc *desc) { print_irq_desc(irq, desc); kstat_this_cpu.irqs[irq]++; @@ -115,7 +115,7 @@ struct irq_chip dummy_irq_chip = { /* * Special, empty irq handler: */ -irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs) +irqreturn_t no_action(int cpl, void *dev_id) { return IRQ_NONE; } @@ -123,13 +123,11 @@ irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs) /** * handle_IRQ_event - irq action chain handler * @irq: the interrupt number - * @regs: pointer to a register structure * @action: the interrupt action chain for this irq * * Handles the action chain of an irq event */ -irqreturn_t handle_IRQ_event(unsigned int irq, struct pt_regs *regs, - struct irqaction *action) +irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) { irqreturn_t ret, retval = IRQ_NONE; unsigned int status = 0; @@ -140,7 +138,7 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct pt_regs *regs, local_irq_enable_in_hardirq(); do { - ret = action->handler(irq, action->dev_id, regs); + ret = action->handler(irq, action->dev_id); if (ret == IRQ_HANDLED) status |= action->flags; retval |= ret; @@ -158,7 +156,6 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct pt_regs *regs, /** * __do_IRQ - original all in one highlevel IRQ handler * @irq: the interrupt number - * @regs: pointer to a register structure * * __do_IRQ handles all normal device IRQ's (the special * SMP cross-CPU interrupts have their own specific @@ -167,7 +164,7 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct pt_regs *regs, * This is the original x86 implementation which is used for every * interrupt type. */ -fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs) +fastcall unsigned int __do_IRQ(unsigned int irq) { struct irq_desc *desc = irq_desc + irq; struct irqaction *action; @@ -182,7 +179,7 @@ fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs) */ if (desc->chip->ack) desc->chip->ack(irq); - action_ret = handle_IRQ_event(irq, regs, desc->action); + action_ret = handle_IRQ_event(irq, desc->action); desc->chip->end(irq); return 1; } @@ -233,11 +230,11 @@ fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs) spin_unlock(&desc->lock); - action_ret = handle_IRQ_event(irq, regs, action); + action_ret = handle_IRQ_event(irq, action); spin_lock(&desc->lock); if (!noirqdebug) - note_interrupt(irq, desc, action_ret, regs); + note_interrupt(irq, desc, action_ret); if (likely(!(desc->status & IRQ_PENDING))) break; desc->status &= ~IRQ_PENDING; diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 92be519eff26..6879202afe9a 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -427,8 +427,7 @@ EXPORT_SYMBOL(free_irq); * IRQF_SAMPLE_RANDOM The interrupt can be used for entropy * */ -int request_irq(unsigned int irq, - irqreturn_t (*handler)(int, void *, struct pt_regs *), +int request_irq(unsigned int irq, irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id) { struct irqaction *action; @@ -475,4 +474,3 @@ int request_irq(unsigned int irq, return retval; } EXPORT_SYMBOL(request_irq); - diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index a57ebe9fa6f6..4baa3bbcd25a 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c @@ -7,17 +7,17 @@ void set_pending_irq(unsigned int irq, cpumask_t mask) unsigned long flags; spin_lock_irqsave(&desc->lock, flags); - desc->move_irq = 1; + desc->status |= IRQ_MOVE_PENDING; irq_desc[irq].pending_mask = mask; spin_unlock_irqrestore(&desc->lock, flags); } -void move_native_irq(int irq) +void move_masked_irq(int irq) { struct irq_desc *desc = irq_desc + irq; cpumask_t tmp; - if (likely(!desc->move_irq)) + if (likely(!(desc->status & IRQ_MOVE_PENDING))) return; /* @@ -28,7 +28,7 @@ void move_native_irq(int irq) return; } - desc->move_irq = 0; + desc->status &= ~IRQ_MOVE_PENDING; if (unlikely(cpus_empty(irq_desc[irq].pending_mask))) return; @@ -48,15 +48,29 @@ void move_native_irq(int irq) * when an active trigger is comming in. This could * cause some ioapics to mal-function. * Being paranoid i guess! + * + * For correct operation this depends on the caller + * masking the irqs. */ if (likely(!cpus_empty(tmp))) { - if (likely(!(desc->status & IRQ_DISABLED))) - desc->chip->disable(irq); - desc->chip->set_affinity(irq,tmp); - - if (likely(!(desc->status & IRQ_DISABLED))) - desc->chip->enable(irq); } cpus_clear(irq_desc[irq].pending_mask); } + +void move_native_irq(int irq) +{ + struct irq_desc *desc = irq_desc + irq; + + if (likely(!(desc->status & IRQ_MOVE_PENDING))) + return; + + if (likely(!(desc->status & IRQ_DISABLED))) + desc->chip->disable(irq); + + move_masked_irq(irq); + + if (likely(!(desc->status & IRQ_DISABLED))) + desc->chip->enable(irq); +} + diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 607c7809ad01..9a352667007c 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -57,7 +57,7 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer, if (!irq_desc[irq].chip->set_affinity || no_irq_affinity) return -EIO; - err = cpumask_parse(buffer, count, new_value); + err = cpumask_parse_user(buffer, count, new_value); if (err) return err; diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c index 35f10f7ff94a..5bfeaed7e487 100644 --- a/kernel/irq/resend.c +++ b/kernel/irq/resend.c @@ -38,7 +38,7 @@ static void resend_irqs(unsigned long arg) clear_bit(irq, irqs_resend); desc = irq_desc + irq; local_irq_disable(); - desc->handle_irq(irq, desc, NULL); + desc->handle_irq(irq, desc); local_irq_enable(); } } diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index 417e98092cf2..543ea2e5ad93 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -16,7 +16,7 @@ static int irqfixup __read_mostly; /* * Recovery handler for misrouted interrupts. */ -static int misrouted_irq(int irq, struct pt_regs *regs) +static int misrouted_irq(int irq) { int i; int ok = 0; @@ -49,7 +49,7 @@ static int misrouted_irq(int irq, struct pt_regs *regs) while (action) { /* Only shared IRQ handlers are safe to call */ if (action->flags & IRQF_SHARED) { - if (action->handler(i, action->dev_id, regs) == + if (action->handler(i, action->dev_id) == IRQ_HANDLED) ok = 1; } @@ -70,7 +70,7 @@ static int misrouted_irq(int irq, struct pt_regs *regs) */ work = 1; spin_unlock(&desc->lock); - handle_IRQ_event(i, regs, action); + handle_IRQ_event(i, action); spin_lock(&desc->lock); desc->status &= ~IRQ_PENDING; } @@ -136,7 +136,7 @@ report_bad_irq(unsigned int irq, struct irq_desc *desc, irqreturn_t action_ret) } void note_interrupt(unsigned int irq, struct irq_desc *desc, - irqreturn_t action_ret, struct pt_regs *regs) + irqreturn_t action_ret) { if (unlikely(action_ret != IRQ_HANDLED)) { desc->irqs_unhandled++; @@ -147,7 +147,7 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc, if (unlikely(irqfixup)) { /* Don't punish working computers */ if ((irqfixup == 2 && irq == 0) || action_ret == IRQ_NONE) { - int ok = misrouted_irq(irq, regs); + int ok = misrouted_irq(irq); if (action_ret == IRQ_NONE) desc->irqs_unhandled -= ok; } diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 4c0553461000..b739be2a6dc9 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -575,6 +575,8 @@ static noinline int print_circular_bug_tail(void) return 0; } +#define RECURSION_LIMIT 40 + static int noinline print_infinite_recursion_bug(void) { __raw_spin_unlock(&hash_lock); @@ -595,7 +597,7 @@ check_noncircular(struct lock_class *source, unsigned int depth) debug_atomic_inc(&nr_cyclic_check_recursions); if (depth > max_recursion_depth) max_recursion_depth = depth; - if (depth >= 20) + if (depth >= RECURSION_LIMIT) return print_infinite_recursion_bug(); /* * Check this lock's dependency list: @@ -645,7 +647,7 @@ find_usage_forwards(struct lock_class *source, unsigned int depth) if (depth > max_recursion_depth) max_recursion_depth = depth; - if (depth >= 20) + if (depth >= RECURSION_LIMIT) return print_infinite_recursion_bug(); debug_atomic_inc(&nr_find_usage_forwards_checks); @@ -684,7 +686,7 @@ find_usage_backwards(struct lock_class *source, unsigned int depth) if (depth > max_recursion_depth) max_recursion_depth = depth; - if (depth >= 20) + if (depth >= RECURSION_LIMIT) return print_infinite_recursion_bug(); debug_atomic_inc(&nr_find_usage_backwards_checks); @@ -1114,8 +1116,6 @@ static int count_matching_names(struct lock_class *new_class) return count + 1; } -extern void __error_too_big_MAX_LOCKDEP_SUBCLASSES(void); - /* * Register a lock's class in the hash-table, if the class is not present * yet. Otherwise we look it up. We cache the result in the lock object @@ -1153,8 +1153,7 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) * (or spin_lock_init()) call - which acts as the key. For static * locks we use the lock object itself as the key. */ - if (sizeof(struct lock_class_key) > sizeof(struct lock_class)) - __error_too_big_MAX_LOCKDEP_SUBCLASSES(); + BUILD_BUG_ON(sizeof(struct lock_class_key) > sizeof(struct lock_class)); key = lock->key->subkeys + subclass; @@ -1177,7 +1176,7 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) * itself, so actual lookup of the hash should be once per lock object. */ static inline struct lock_class * -register_lock_class(struct lockdep_map *lock, unsigned int subclass) +register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) { struct lockdep_subclass_key *key; struct list_head *hash_head; @@ -1249,7 +1248,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass) out_unlock_set: __raw_spin_unlock(&hash_lock); - if (!subclass) + if (!subclass || force) lock->class_cache = class; DEBUG_LOCKS_WARN_ON(class->subclass != subclass); @@ -1937,7 +1936,7 @@ void trace_softirqs_off(unsigned long ip) * Initialize a lock instance's lock-class mapping info: */ void lockdep_init_map(struct lockdep_map *lock, const char *name, - struct lock_class_key *key) + struct lock_class_key *key, int subclass) { if (unlikely(!debug_locks)) return; @@ -1957,6 +1956,8 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name, lock->name = name; lock->key = key; lock->class_cache = NULL; + if (subclass) + register_lock_class(lock, subclass, 1); } EXPORT_SYMBOL_GPL(lockdep_init_map); @@ -1995,7 +1996,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, * Not cached yet or subclass? */ if (unlikely(!class)) { - class = register_lock_class(lock, subclass); + class = register_lock_class(lock, subclass, 0); if (!class) return 0; } diff --git a/kernel/module.c b/kernel/module.c index 7f60e782de1e..67009bd56c52 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -87,6 +87,12 @@ static inline int strong_try_module_get(struct module *mod) return try_module_get(mod); } +static inline void add_taint_module(struct module *mod, unsigned flag) +{ + add_taint(flag); + mod->taints |= flag; +} + /* A thread that wants to hold a reference to a module only while it * is running can call ths to safely exit. * nfsd and lockd use this. @@ -847,12 +853,10 @@ static int check_version(Elf_Shdr *sechdrs, return 0; } /* Not in module's version table. OK, but that taints the kernel. */ - if (!(tainted & TAINT_FORCED_MODULE)) { + if (!(tainted & TAINT_FORCED_MODULE)) printk("%s: no version for \"%s\" found: kernel tainted.\n", mod->name, symname); - add_taint(TAINT_FORCED_MODULE); - mod->taints |= TAINT_FORCED_MODULE; - } + add_taint_module(mod, TAINT_FORCED_MODULE); return 1; } @@ -910,7 +914,8 @@ static unsigned long resolve_symbol(Elf_Shdr *sechdrs, unsigned long ret; const unsigned long *crc; - ret = __find_symbol(name, &owner, &crc, mod->license_gplok); + ret = __find_symbol(name, &owner, &crc, + !(mod->taints & TAINT_PROPRIETARY_MODULE)); if (ret) { /* use_module can fail due to OOM, or module unloading */ if (!check_version(sechdrs, versindex, name, mod, crc) || @@ -1335,12 +1340,11 @@ static void set_license(struct module *mod, const char *license) if (!license) license = "unspecified"; - mod->license_gplok = license_is_gpl_compatible(license); - if (!mod->license_gplok && !(tainted & TAINT_PROPRIETARY_MODULE)) { - printk(KERN_WARNING "%s: module license '%s' taints kernel.\n", - mod->name, license); - add_taint(TAINT_PROPRIETARY_MODULE); - mod->taints |= TAINT_PROPRIETARY_MODULE; + if (!license_is_gpl_compatible(license)) { + if (!(tainted & TAINT_PROPRIETARY_MODULE)) + printk(KERN_WARNING "%s: module license '%s' taints" + "kernel.\n", mod->name, license); + add_taint_module(mod, TAINT_PROPRIETARY_MODULE); } } @@ -1619,8 +1623,7 @@ static struct module *load_module(void __user *umod, modmagic = get_modinfo(sechdrs, infoindex, "vermagic"); /* This is allowed: modprobe --force will invalidate it. */ if (!modmagic) { - add_taint(TAINT_FORCED_MODULE); - mod->taints |= TAINT_FORCED_MODULE; + add_taint_module(mod, TAINT_FORCED_MODULE); printk(KERN_WARNING "%s: no version magic, tainting kernel.\n", mod->name); } else if (!same_magic(modmagic, vermagic)) { @@ -1714,14 +1717,10 @@ static struct module *load_module(void __user *umod, /* Set up license info based on the info section */ set_license(mod, get_modinfo(sechdrs, infoindex, "license")); - if (strcmp(mod->name, "ndiswrapper") == 0) { - add_taint(TAINT_PROPRIETARY_MODULE); - mod->taints |= TAINT_PROPRIETARY_MODULE; - } - if (strcmp(mod->name, "driverloader") == 0) { - add_taint(TAINT_PROPRIETARY_MODULE); - mod->taints |= TAINT_PROPRIETARY_MODULE; - } + if (strcmp(mod->name, "ndiswrapper") == 0) + add_taint_module(mod, TAINT_PROPRIETARY_MODULE); + if (strcmp(mod->name, "driverloader") == 0) + add_taint_module(mod, TAINT_PROPRIETARY_MODULE); /* Set up MODINFO_ATTR fields */ setup_modinfo(mod, sechdrs, infoindex); @@ -1766,8 +1765,7 @@ static struct module *load_module(void __user *umod, (mod->num_unused_gpl_syms && !unusedgplcrcindex)) { printk(KERN_WARNING "%s: No versions for exported symbols." " Tainting kernel.\n", mod->name); - add_taint(TAINT_FORCED_MODULE); - mod->taints |= TAINT_FORCED_MODULE; + add_taint_module(mod, TAINT_FORCED_MODULE); } #endif @@ -2132,9 +2130,33 @@ static void m_stop(struct seq_file *m, void *p) mutex_unlock(&module_mutex); } +static char *taint_flags(unsigned int taints, char *buf) +{ + int bx = 0; + + if (taints) { + buf[bx++] = '('; + if (taints & TAINT_PROPRIETARY_MODULE) + buf[bx++] = 'P'; + if (taints & TAINT_FORCED_MODULE) + buf[bx++] = 'F'; + /* + * TAINT_FORCED_RMMOD: could be added. + * TAINT_UNSAFE_SMP, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't + * apply to modules. + */ + buf[bx++] = ')'; + } + buf[bx] = '\0'; + + return buf; +} + static int m_show(struct seq_file *m, void *p) { struct module *mod = list_entry(p, struct module, list); + char buf[8]; + seq_printf(m, "%s %lu", mod->name, mod->init_size + mod->core_size); print_unload_info(m, mod); @@ -2147,6 +2169,10 @@ static int m_show(struct seq_file *m, void *p) /* Used by oprofile and other similar tools. */ seq_printf(m, " 0x%p", mod->module_core); + /* Taints info */ + if (mod->taints) + seq_printf(m, " %s", taint_flags(mod->taints, buf)); + seq_printf(m, "\n"); return 0; } @@ -2235,28 +2261,6 @@ struct module *module_text_address(unsigned long addr) return mod; } -static char *taint_flags(unsigned int taints, char *buf) -{ - *buf = '\0'; - if (taints) { - int bx; - - buf[0] = '('; - bx = 1; - if (taints & TAINT_PROPRIETARY_MODULE) - buf[bx++] = 'P'; - if (taints & TAINT_FORCED_MODULE) - buf[bx++] = 'F'; - /* - * TAINT_FORCED_RMMOD: could be added. - * TAINT_UNSAFE_SMP, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't - * apply to modules. - */ - buf[bx] = ')'; - } - return buf; -} - /* Don't grab lock, we're oopsing. */ void print_modules(void) { diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c index e3203c654dda..18651641a7b5 100644 --- a/kernel/mutex-debug.c +++ b/kernel/mutex-debug.c @@ -91,7 +91,7 @@ void debug_mutex_init(struct mutex *lock, const char *name, * Make sure we are not reinitializing a held lock: */ debug_check_no_locks_freed((void *)lock, sizeof(*lock)); - lockdep_init_map(&lock->dep_map, name, key); + lockdep_init_map(&lock->dep_map, name, key, 0); #endif lock->owner = NULL; lock->magic = lock; diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 6ebdb82a0ce4..674aceb7335a 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -44,11 +44,9 @@ static inline struct nsproxy *clone_namespaces(struct nsproxy *orig) { struct nsproxy *ns; - ns = kmalloc(sizeof(struct nsproxy), GFP_KERNEL); - if (ns) { - memcpy(ns, orig, sizeof(struct nsproxy)); + ns = kmemdup(orig, sizeof(struct nsproxy), GFP_KERNEL); + if (ns) atomic_set(&ns->count, 1); - } return ns; } diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 479b16b44f79..7c3e1e6dfb5b 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -88,6 +88,19 @@ static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock, } /* + * Divide and limit the result to res >= 1 + * + * This is necessary to prevent signal delivery starvation, when the result of + * the division would be rounded down to 0. + */ +static inline cputime_t cputime_div_non_zero(cputime_t time, unsigned long div) +{ + cputime_t res = cputime_div(time, div); + + return max_t(cputime_t, res, 1); +} + +/* * Update expiry time from increment, and increase overrun count, * given the current clock sample. */ @@ -483,8 +496,8 @@ static void process_timer_rebalance(struct task_struct *p, BUG(); break; case CPUCLOCK_PROF: - left = cputime_div(cputime_sub(expires.cpu, val.cpu), - nthreads); + left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu), + nthreads); do { if (likely(!(t->flags & PF_EXITING))) { ticks = cputime_add(prof_ticks(t), left); @@ -498,8 +511,8 @@ static void process_timer_rebalance(struct task_struct *p, } while (t != p); break; case CPUCLOCK_VIRT: - left = cputime_div(cputime_sub(expires.cpu, val.cpu), - nthreads); + left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu), + nthreads); do { if (likely(!(t->flags & PF_EXITING))) { ticks = cputime_add(virt_ticks(t), left); @@ -515,6 +528,7 @@ static void process_timer_rebalance(struct task_struct *p, case CPUCLOCK_SCHED: nsleft = expires.sched - val.sched; do_div(nsleft, nthreads); + nsleft = max_t(unsigned long long, nsleft, 1); do { if (likely(!(t->flags & PF_EXITING))) { ns = t->sched_time + nsleft; @@ -1159,12 +1173,13 @@ static void check_process_timers(struct task_struct *tsk, prof_left = cputime_sub(prof_expires, utime); prof_left = cputime_sub(prof_left, stime); - prof_left = cputime_div(prof_left, nthreads); + prof_left = cputime_div_non_zero(prof_left, nthreads); virt_left = cputime_sub(virt_expires, utime); - virt_left = cputime_div(virt_left, nthreads); + virt_left = cputime_div_non_zero(virt_left, nthreads); if (sched_expires) { sched_left = sched_expires - sched_time; do_div(sched_left, nthreads); + sched_left = max_t(unsigned long long, sched_left, 1); } else { sched_left = 0; } diff --git a/kernel/power/disk.c b/kernel/power/disk.c index d72234942798..d3a158a60312 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -18,6 +18,7 @@ #include <linux/fs.h> #include <linux/mount.h> #include <linux/pm.h> +#include <linux/console.h> #include <linux/cpu.h> #include "power.h" @@ -119,8 +120,10 @@ int pm_suspend_disk(void) if (error) return error; + suspend_console(); error = device_suspend(PMSG_FREEZE); if (error) { + resume_console(); printk("Some devices failed to suspend\n"); unprepare_processes(); return error; @@ -133,6 +136,7 @@ int pm_suspend_disk(void) if (in_suspend) { device_resume(); + resume_console(); pr_debug("PM: writing image.\n"); error = swsusp_write(); if (!error) @@ -148,6 +152,7 @@ int pm_suspend_disk(void) swsusp_free(); Done: device_resume(); + resume_console(); unprepare_processes(); return error; } @@ -212,7 +217,9 @@ static int software_resume(void) pr_debug("PM: Preparing devices for restore.\n"); + suspend_console(); if ((error = device_suspend(PMSG_PRETHAW))) { + resume_console(); printk("Some devices failed to suspend\n"); swsusp_free(); goto Thaw; @@ -224,6 +231,7 @@ static int software_resume(void) swsusp_resume(); pr_debug("PM: Restore failed, recovering.n"); device_resume(); + resume_console(); Thaw: unprepare_processes(); Done: diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c index 7a4144ba3afd..f1f900ac3164 100644 --- a/kernel/power/poweroff.c +++ b/kernel/power/poweroff.c @@ -23,8 +23,7 @@ static void do_poweroff(void *dummy) static DECLARE_WORK(poweroff_work, do_poweroff, NULL); -static void handle_poweroff(int key, struct pt_regs *pt_regs, - struct tty_struct *tty) +static void handle_poweroff(int key, struct tty_struct *tty) { schedule_work(&poweroff_work); } diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 9b2ee5344dee..1a3b0dd2c3fc 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -425,7 +425,8 @@ static int submit(int rw, pgoff_t page_off, struct page *page, bio_set_pages_dirty(bio); bio_put(bio); } else { - get_page(page); + if (rw == READ) + get_page(page); /* These pages are freed later */ bio->bi_private = *bio_chain; *bio_chain = bio; submit_bio(rw | (1 << BIO_RW_SYNC), bio); diff --git a/kernel/power/user.c b/kernel/power/user.c index 72825c853cd7..d991d3b0e5a4 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -19,6 +19,7 @@ #include <linux/swapops.h> #include <linux/pm.h> #include <linux/fs.h> +#include <linux/console.h> #include <linux/cpu.h> #include <asm/uaccess.h> @@ -145,10 +146,10 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, error = freeze_processes(); if (error) { thaw_processes(); + enable_nonboot_cpus(); error = -EBUSY; } } - enable_nonboot_cpus(); up(&pm_sem); if (!error) data->frozen = 1; @@ -173,12 +174,14 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, /* Free memory before shutting down devices. */ error = swsusp_shrink_memory(); if (!error) { + suspend_console(); error = device_suspend(PMSG_FREEZE); if (!error) { in_suspend = 1; error = swsusp_suspend(); device_resume(); } + resume_console(); } up(&pm_sem); if (!error) @@ -196,11 +199,13 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, snapshot_free_unused_memory(&data->handle); down(&pm_sem); pm_prepare_console(); + suspend_console(); error = device_suspend(PMSG_PRETHAW); if (!error) { error = swsusp_resume(); device_resume(); } + resume_console(); pm_restore_console(); up(&pm_sem); break; @@ -289,6 +294,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, } /* Put devices to sleep */ + suspend_console(); error = device_suspend(PMSG_SUSPEND); if (error) { printk(KERN_ERR "Failed to suspend some devices.\n"); @@ -299,7 +305,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, /* Wake up devices */ device_resume(); } - + resume_console(); if (pm_ops->finish) pm_ops->finish(PM_SUSPEND_MEM); diff --git a/kernel/printk.c b/kernel/printk.c index 771f5e861bcd..f7d427ef5038 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -820,15 +820,8 @@ void release_console_sem(void) console_locked = 0; up(&console_sem); spin_unlock_irqrestore(&logbuf_lock, flags); - if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait)) { - /* - * If we printk from within the lock dependency code, - * from within the scheduler code, then do not lock - * up due to self-recursion: - */ - if (!lockdep_internal()) - wake_up_interruptible(&log_wait); - } + if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait)) + wake_up_interruptible(&log_wait); } EXPORT_SYMBOL(release_console_sem); diff --git a/kernel/profile.c b/kernel/profile.c index fb660c7d35ba..f940b462eec9 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -25,6 +25,7 @@ #include <linux/mutex.h> #include <asm/sections.h> #include <asm/semaphore.h> +#include <asm/irq_regs.h> struct profile_hit { u32 pc, hits; @@ -366,8 +367,10 @@ void profile_hit(int type, void *__pc) } #endif /* !CONFIG_SMP */ -void profile_tick(int type, struct pt_regs *regs) +void profile_tick(int type) { + struct pt_regs *regs = get_irq_regs(); + if (type == CPU_PROFILING && timer_hook) timer_hook(regs); if (!user_mode(regs) && cpu_isset(smp_processor_id(), prof_cpu_mask)) @@ -396,7 +399,7 @@ static int prof_cpu_mask_write_proc (struct file *file, const char __user *buffe unsigned long full_count = count, err; cpumask_t new_value; - err = cpumask_parse(buffer, count, new_value); + err = cpumask_parse_user(buffer, count, new_value); if (err) return err; diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 523e46483b99..26bb5ffe1ef1 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -71,9 +71,6 @@ static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL}; static int blimit = 10; static int qhimark = 10000; static int qlowmark = 100; -#ifdef CONFIG_SMP -static int rsinterval = 1000; -#endif static atomic_t rcu_barrier_cpu_count; static DEFINE_MUTEX(rcu_barrier_mutex); @@ -86,8 +83,8 @@ static void force_quiescent_state(struct rcu_data *rdp, int cpu; cpumask_t cpumask; set_need_resched(); - if (unlikely(rdp->qlen - rdp->last_rs_qlen > rsinterval)) { - rdp->last_rs_qlen = rdp->qlen; + if (unlikely(!rcp->signaled)) { + rcp->signaled = 1; /* * Don't send IPI to itself. With irqs disabled, * rdp->cpu is the current cpu. @@ -301,6 +298,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp) smp_mb(); cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask); + rcp->signaled = 0; } } @@ -628,9 +626,6 @@ void synchronize_rcu(void) module_param(blimit, int, 0); module_param(qhimark, int, 0); module_param(qlowmark, int, 0); -#ifdef CONFIG_SMP -module_param(rsinterval, int, 0); -#endif EXPORT_SYMBOL_GPL(rcu_batches_completed); EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); EXPORT_SYMBOL_GPL(call_rcu); diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 23446e91cded..e2bda18f6f42 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -15,9 +15,10 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * - * Copyright (C) IBM Corporation, 2005 + * Copyright (C) IBM Corporation, 2005, 2006 * * Authors: Paul E. McKenney <paulmck@us.ibm.com> + * Josh Triplett <josh@freedesktop.org> * * See also: Documentation/RCU/torture.txt */ @@ -44,19 +45,25 @@ #include <linux/delay.h> #include <linux/byteorder/swabb.h> #include <linux/stat.h> +#include <linux/srcu.h> MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and " + "Josh Triplett <josh@freedesktop.org>"); static int nreaders = -1; /* # reader threads, defaults to 2*ncpus */ +static int nfakewriters = 4; /* # fake writer threads */ static int stat_interval; /* Interval between stats, in seconds. */ /* Defaults to "only at end of test". */ static int verbose; /* Print more debug info. */ static int test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */ static int shuffle_interval = 5; /* Interval between shuffles (in sec)*/ -static char *torture_type = "rcu"; /* What to torture. */ +static char *torture_type = "rcu"; /* What RCU implementation to torture. */ module_param(nreaders, int, 0); MODULE_PARM_DESC(nreaders, "Number of RCU reader threads"); +module_param(nfakewriters, int, 0); +MODULE_PARM_DESC(nfakewriters, "Number of RCU fake writer threads"); module_param(stat_interval, int, 0); MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s"); module_param(verbose, bool, 0); @@ -66,7 +73,7 @@ MODULE_PARM_DESC(test_no_idle_hz, "Test support for tickless idle CPUs"); module_param(shuffle_interval, int, 0); MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles"); module_param(torture_type, charp, 0); -MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh)"); +MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)"); #define TORTURE_FLAG "-torture:" #define PRINTK_STRING(s) \ @@ -80,6 +87,7 @@ static char printk_buf[4096]; static int nrealreaders; static struct task_struct *writer_task; +static struct task_struct **fakewriter_tasks; static struct task_struct **reader_tasks; static struct task_struct *stats_task; static struct task_struct *shuffler_task; @@ -104,11 +112,12 @@ static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count) = static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_batch) = { 0 }; static atomic_t rcu_torture_wcount[RCU_TORTURE_PIPE_LEN + 1]; -atomic_t n_rcu_torture_alloc; -atomic_t n_rcu_torture_alloc_fail; -atomic_t n_rcu_torture_free; -atomic_t n_rcu_torture_mberror; -atomic_t n_rcu_torture_error; +static atomic_t n_rcu_torture_alloc; +static atomic_t n_rcu_torture_alloc_fail; +static atomic_t n_rcu_torture_free; +static atomic_t n_rcu_torture_mberror; +static atomic_t n_rcu_torture_error; +static struct list_head rcu_torture_removed; /* * Allocate an element from the rcu_tortures pool. @@ -145,7 +154,7 @@ rcu_torture_free(struct rcu_torture *p) struct rcu_random_state { unsigned long rrs_state; - unsigned long rrs_count; + long rrs_count; }; #define RCU_RANDOM_MULT 39916801 /* prime */ @@ -158,7 +167,7 @@ struct rcu_random_state { * Crude but fast random-number generator. Uses a linear congruential * generator, with occasional help from get_random_bytes(). */ -static long +static unsigned long rcu_random(struct rcu_random_state *rrsp) { long refresh; @@ -180,9 +189,11 @@ struct rcu_torture_ops { void (*init)(void); void (*cleanup)(void); int (*readlock)(void); + void (*readdelay)(struct rcu_random_state *rrsp); void (*readunlock)(int idx); int (*completed)(void); void (*deferredfree)(struct rcu_torture *p); + void (*sync)(void); int (*stats)(char *page); char *name; }; @@ -198,6 +209,18 @@ static int rcu_torture_read_lock(void) __acquires(RCU) return 0; } +static void rcu_read_delay(struct rcu_random_state *rrsp) +{ + long delay; + const long longdelay = 200; + + /* We want there to be long-running readers, but not all the time. */ + + delay = rcu_random(rrsp) % (nrealreaders * 2 * longdelay); + if (!delay) + udelay(longdelay); +} + static void rcu_torture_read_unlock(int idx) __releases(RCU) { rcu_read_unlock(); @@ -239,13 +262,54 @@ static struct rcu_torture_ops rcu_ops = { .init = NULL, .cleanup = NULL, .readlock = rcu_torture_read_lock, + .readdelay = rcu_read_delay, .readunlock = rcu_torture_read_unlock, .completed = rcu_torture_completed, .deferredfree = rcu_torture_deferred_free, + .sync = synchronize_rcu, .stats = NULL, .name = "rcu" }; +static void rcu_sync_torture_deferred_free(struct rcu_torture *p) +{ + int i; + struct rcu_torture *rp; + struct rcu_torture *rp1; + + cur_ops->sync(); + list_add(&p->rtort_free, &rcu_torture_removed); + list_for_each_entry_safe(rp, rp1, &rcu_torture_removed, rtort_free) { + i = rp->rtort_pipe_count; + if (i > RCU_TORTURE_PIPE_LEN) + i = RCU_TORTURE_PIPE_LEN; + atomic_inc(&rcu_torture_wcount[i]); + if (++rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) { + rp->rtort_mbtest = 0; + list_del(&rp->rtort_free); + rcu_torture_free(rp); + } + } +} + +static void rcu_sync_torture_init(void) +{ + INIT_LIST_HEAD(&rcu_torture_removed); +} + +static struct rcu_torture_ops rcu_sync_ops = { + .init = rcu_sync_torture_init, + .cleanup = NULL, + .readlock = rcu_torture_read_lock, + .readdelay = rcu_read_delay, + .readunlock = rcu_torture_read_unlock, + .completed = rcu_torture_completed, + .deferredfree = rcu_sync_torture_deferred_free, + .sync = synchronize_rcu, + .stats = NULL, + .name = "rcu_sync" +}; + /* * Definitions for rcu_bh torture testing. */ @@ -271,19 +335,176 @@ static void rcu_bh_torture_deferred_free(struct rcu_torture *p) call_rcu_bh(&p->rtort_rcu, rcu_torture_cb); } +struct rcu_bh_torture_synchronize { + struct rcu_head head; + struct completion completion; +}; + +static void rcu_bh_torture_wakeme_after_cb(struct rcu_head *head) +{ + struct rcu_bh_torture_synchronize *rcu; + + rcu = container_of(head, struct rcu_bh_torture_synchronize, head); + complete(&rcu->completion); +} + +static void rcu_bh_torture_synchronize(void) +{ + struct rcu_bh_torture_synchronize rcu; + + init_completion(&rcu.completion); + call_rcu_bh(&rcu.head, rcu_bh_torture_wakeme_after_cb); + wait_for_completion(&rcu.completion); +} + static struct rcu_torture_ops rcu_bh_ops = { .init = NULL, .cleanup = NULL, .readlock = rcu_bh_torture_read_lock, + .readdelay = rcu_read_delay, /* just reuse rcu's version. */ .readunlock = rcu_bh_torture_read_unlock, .completed = rcu_bh_torture_completed, .deferredfree = rcu_bh_torture_deferred_free, + .sync = rcu_bh_torture_synchronize, .stats = NULL, .name = "rcu_bh" }; +static struct rcu_torture_ops rcu_bh_sync_ops = { + .init = rcu_sync_torture_init, + .cleanup = NULL, + .readlock = rcu_bh_torture_read_lock, + .readdelay = rcu_read_delay, /* just reuse rcu's version. */ + .readunlock = rcu_bh_torture_read_unlock, + .completed = rcu_bh_torture_completed, + .deferredfree = rcu_sync_torture_deferred_free, + .sync = rcu_bh_torture_synchronize, + .stats = NULL, + .name = "rcu_bh_sync" +}; + +/* + * Definitions for srcu torture testing. + */ + +static struct srcu_struct srcu_ctl; + +static void srcu_torture_init(void) +{ + init_srcu_struct(&srcu_ctl); + rcu_sync_torture_init(); +} + +static void srcu_torture_cleanup(void) +{ + synchronize_srcu(&srcu_ctl); + cleanup_srcu_struct(&srcu_ctl); +} + +static int srcu_torture_read_lock(void) +{ + return srcu_read_lock(&srcu_ctl); +} + +static void srcu_read_delay(struct rcu_random_state *rrsp) +{ + long delay; + const long uspertick = 1000000 / HZ; + const long longdelay = 10; + + /* We want there to be long-running readers, but not all the time. */ + + delay = rcu_random(rrsp) % (nrealreaders * 2 * longdelay * uspertick); + if (!delay) + schedule_timeout_interruptible(longdelay); +} + +static void srcu_torture_read_unlock(int idx) +{ + srcu_read_unlock(&srcu_ctl, idx); +} + +static int srcu_torture_completed(void) +{ + return srcu_batches_completed(&srcu_ctl); +} + +static void srcu_torture_synchronize(void) +{ + synchronize_srcu(&srcu_ctl); +} + +static int srcu_torture_stats(char *page) +{ + int cnt = 0; + int cpu; + int idx = srcu_ctl.completed & 0x1; + + cnt += sprintf(&page[cnt], "%s%s per-CPU(idx=%d):", + torture_type, TORTURE_FLAG, idx); + for_each_possible_cpu(cpu) { + cnt += sprintf(&page[cnt], " %d(%d,%d)", cpu, + per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx], + per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx]); + } + cnt += sprintf(&page[cnt], "\n"); + return cnt; +} + +static struct rcu_torture_ops srcu_ops = { + .init = srcu_torture_init, + .cleanup = srcu_torture_cleanup, + .readlock = srcu_torture_read_lock, + .readdelay = srcu_read_delay, + .readunlock = srcu_torture_read_unlock, + .completed = srcu_torture_completed, + .deferredfree = rcu_sync_torture_deferred_free, + .sync = srcu_torture_synchronize, + .stats = srcu_torture_stats, + .name = "srcu" +}; + +/* + * Definitions for sched torture testing. + */ + +static int sched_torture_read_lock(void) +{ + preempt_disable(); + return 0; +} + +static void sched_torture_read_unlock(int idx) +{ + preempt_enable(); +} + +static int sched_torture_completed(void) +{ + return 0; +} + +static void sched_torture_synchronize(void) +{ + synchronize_sched(); +} + +static struct rcu_torture_ops sched_ops = { + .init = rcu_sync_torture_init, + .cleanup = NULL, + .readlock = sched_torture_read_lock, + .readdelay = rcu_read_delay, /* just reuse rcu's version. */ + .readunlock = sched_torture_read_unlock, + .completed = sched_torture_completed, + .deferredfree = rcu_sync_torture_deferred_free, + .sync = sched_torture_synchronize, + .stats = NULL, + .name = "sched" +}; + static struct rcu_torture_ops *torture_ops[] = - { &rcu_ops, &rcu_bh_ops, NULL }; + { &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops, &srcu_ops, + &sched_ops, NULL }; /* * RCU torture writer kthread. Repeatedly substitutes a new structure @@ -330,6 +551,30 @@ rcu_torture_writer(void *arg) } /* + * RCU torture fake writer kthread. Repeatedly calls sync, with a random + * delay between calls. + */ +static int +rcu_torture_fakewriter(void *arg) +{ + DEFINE_RCU_RANDOM(rand); + + VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task started"); + set_user_nice(current, 19); + + do { + schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10); + udelay(rcu_random(&rand) & 0x3ff); + cur_ops->sync(); + } while (!kthread_should_stop() && !fullstop); + + VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task stopping"); + while (!kthread_should_stop()) + schedule_timeout_uninterruptible(1); + return 0; +} + +/* * RCU torture reader kthread. Repeatedly dereferences rcu_torture_current, * incrementing the corresponding element of the pipeline array. The * counter in the element should never be greater than 1, otherwise, the @@ -359,7 +604,7 @@ rcu_torture_reader(void *arg) } if (p->rtort_mbtest == 0) atomic_inc(&n_rcu_torture_mberror); - udelay(rcu_random(&rand) & 0x7f); + cur_ops->readdelay(&rand); preempt_disable(); pipe_count = p->rtort_pipe_count; if (pipe_count > RCU_TORTURE_PIPE_LEN) { @@ -483,7 +728,7 @@ static int rcu_idle_cpu; /* Force all torture tasks off this CPU */ /* Shuffle tasks such that we allow @rcu_idle_cpu to become idle. A special case * is when @rcu_idle_cpu = -1, when we allow the tasks to run on all CPUs. */ -void rcu_torture_shuffle_tasks(void) +static void rcu_torture_shuffle_tasks(void) { cpumask_t tmp_mask = CPU_MASK_ALL; int i; @@ -507,6 +752,12 @@ void rcu_torture_shuffle_tasks(void) set_cpus_allowed(reader_tasks[i], tmp_mask); } + if (fakewriter_tasks != NULL) { + for (i = 0; i < nfakewriters; i++) + if (fakewriter_tasks[i]) + set_cpus_allowed(fakewriter_tasks[i], tmp_mask); + } + if (writer_task) set_cpus_allowed(writer_task, tmp_mask); @@ -540,11 +791,12 @@ rcu_torture_shuffle(void *arg) static inline void rcu_torture_print_module_parms(char *tag) { - printk(KERN_ALERT "%s" TORTURE_FLAG "--- %s: nreaders=%d " + printk(KERN_ALERT "%s" TORTURE_FLAG + "--- %s: nreaders=%d nfakewriters=%d " "stat_interval=%d verbose=%d test_no_idle_hz=%d " "shuffle_interval = %d\n", - torture_type, tag, nrealreaders, stat_interval, verbose, - test_no_idle_hz, shuffle_interval); + torture_type, tag, nrealreaders, nfakewriters, + stat_interval, verbose, test_no_idle_hz, shuffle_interval); } static void @@ -579,6 +831,19 @@ rcu_torture_cleanup(void) } rcu_torture_current = NULL; + if (fakewriter_tasks != NULL) { + for (i = 0; i < nfakewriters; i++) { + if (fakewriter_tasks[i] != NULL) { + VERBOSE_PRINTK_STRING( + "Stopping rcu_torture_fakewriter task"); + kthread_stop(fakewriter_tasks[i]); + } + fakewriter_tasks[i] = NULL; + } + kfree(fakewriter_tasks); + fakewriter_tasks = NULL; + } + if (stats_task != NULL) { VERBOSE_PRINTK_STRING("Stopping rcu_torture_stats task"); kthread_stop(stats_task); @@ -666,7 +931,25 @@ rcu_torture_init(void) writer_task = NULL; goto unwind; } - reader_tasks = kmalloc(nrealreaders * sizeof(reader_tasks[0]), + fakewriter_tasks = kzalloc(nfakewriters * sizeof(fakewriter_tasks[0]), + GFP_KERNEL); + if (fakewriter_tasks == NULL) { + VERBOSE_PRINTK_ERRSTRING("out of memory"); + firsterr = -ENOMEM; + goto unwind; + } + for (i = 0; i < nfakewriters; i++) { + VERBOSE_PRINTK_STRING("Creating rcu_torture_fakewriter task"); + fakewriter_tasks[i] = kthread_run(rcu_torture_fakewriter, NULL, + "rcu_torture_fakewriter"); + if (IS_ERR(fakewriter_tasks[i])) { + firsterr = PTR_ERR(fakewriter_tasks[i]); + VERBOSE_PRINTK_ERRSTRING("Failed to create fakewriter"); + fakewriter_tasks[i] = NULL; + goto unwind; + } + } + reader_tasks = kzalloc(nrealreaders * sizeof(reader_tasks[0]), GFP_KERNEL); if (reader_tasks == NULL) { VERBOSE_PRINTK_ERRSTRING("out of memory"); diff --git a/kernel/relay.c b/kernel/relay.c index 1d63ecddfa70..f04bbdb56ac2 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -887,7 +887,7 @@ static int subbuf_read_actor(size_t read_start, from = buf->start + read_start; ret = avail; - if (copy_to_user(desc->arg.data, from, avail)) { + if (copy_to_user(desc->arg.buf, from, avail)) { desc->error = -EFAULT; ret = 0; } @@ -946,24 +946,17 @@ typedef int (*subbuf_actor_t) (size_t read_start, */ static inline ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos, - size_t count, subbuf_actor_t subbuf_actor, read_actor_t actor, - void *target) + read_descriptor_t *desc) { struct rchan_buf *buf = filp->private_data; size_t read_start, avail; - read_descriptor_t desc; int ret; - if (!count) + if (!desc->count) return 0; - desc.written = 0; - desc.count = count; - desc.arg.data = target; - desc.error = 0; - mutex_lock(&filp->f_dentry->d_inode->i_mutex); do { if (!relay_file_read_avail(buf, *ppos)) @@ -974,19 +967,19 @@ static inline ssize_t relay_file_read_subbufs(struct file *filp, if (!avail) break; - avail = min(desc.count, avail); - ret = subbuf_actor(read_start, buf, avail, &desc, actor); - if (desc.error < 0) + avail = min(desc->count, avail); + ret = subbuf_actor(read_start, buf, avail, desc, actor); + if (desc->error < 0) break; if (ret) { relay_file_read_consume(buf, read_start, ret); *ppos = relay_file_read_end_pos(buf, read_start, ret); } - } while (desc.count && ret); + } while (desc->count && ret); mutex_unlock(&filp->f_dentry->d_inode->i_mutex); - return desc.written; + return desc->written; } static ssize_t relay_file_read(struct file *filp, @@ -994,8 +987,13 @@ static ssize_t relay_file_read(struct file *filp, size_t count, loff_t *ppos) { - return relay_file_read_subbufs(filp, ppos, count, subbuf_read_actor, - NULL, buffer); + read_descriptor_t desc; + desc.written = 0; + desc.count = count; + desc.arg.buf = buffer; + desc.error = 0; + return relay_file_read_subbufs(filp, ppos, subbuf_read_actor, + NULL, &desc); } static ssize_t relay_file_sendfile(struct file *filp, @@ -1004,8 +1002,13 @@ static ssize_t relay_file_sendfile(struct file *filp, read_actor_t actor, void *target) { - return relay_file_read_subbufs(filp, ppos, count, subbuf_send_actor, - actor, target); + read_descriptor_t desc; + desc.written = 0; + desc.count = count; + desc.arg.data = target; + desc.error = 0; + return relay_file_read_subbufs(filp, ppos, subbuf_send_actor, + actor, &desc); } struct file_operations relay_file_operations = { diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c index 0c1faa950af7..da8d6bf46457 100644 --- a/kernel/rtmutex-debug.c +++ b/kernel/rtmutex-debug.c @@ -16,7 +16,6 @@ * * See rt.c in preempt-rt for proper credits and further information */ -#include <linux/config.h> #include <linux/sched.h> #include <linux/delay.h> #include <linux/module.h> diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c index 948bd8f643e2..6dcea9dd8c94 100644 --- a/kernel/rtmutex-tester.c +++ b/kernel/rtmutex-tester.c @@ -6,7 +6,6 @@ * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com> * */ -#include <linux/config.h> #include <linux/kthread.h> #include <linux/module.h> #include <linux/sched.h> diff --git a/kernel/sched.c b/kernel/sched.c index 53608a59d6e3..3399701c680e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -160,15 +160,6 @@ #define TASK_PREEMPTS_CURR(p, rq) \ ((p)->prio < (rq)->curr->prio) -/* - * task_timeslice() scales user-nice values [ -20 ... 0 ... 19 ] - * to time slice values: [800ms ... 100ms ... 5ms] - * - * The higher a thread's priority, the bigger timeslices - * it gets during one round of execution. But even the lowest - * priority thread gets MIN_TIMESLICE worth of execution time. - */ - #define SCALE_PRIO(x, prio) \ max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_TIMESLICE) @@ -180,6 +171,15 @@ static unsigned int static_prio_timeslice(int static_prio) return SCALE_PRIO(DEF_TIMESLICE, static_prio); } +/* + * task_timeslice() scales user-nice values [ -20 ... 0 ... 19 ] + * to time slice values: [800ms ... 100ms ... 5ms] + * + * The higher a thread's priority, the bigger timeslices + * it gets during one round of execution. But even the lowest + * priority thread gets MIN_TIMESLICE worth of execution time. + */ + static inline unsigned int task_timeslice(struct task_struct *p) { return static_prio_timeslice(p->static_prio); @@ -1822,14 +1822,14 @@ context_switch(struct rq *rq, struct task_struct *prev, struct mm_struct *mm = next->mm; struct mm_struct *oldmm = prev->active_mm; - if (unlikely(!mm)) { + if (!mm) { next->active_mm = oldmm; atomic_inc(&oldmm->mm_count); enter_lazy_tlb(oldmm, next); } else switch_mm(oldmm, mm, next); - if (unlikely(!prev->mm)) { + if (!prev->mm) { prev->active_mm = NULL; WARN_ON(rq->prev_mm); rq->prev_mm = oldmm; @@ -3491,7 +3491,7 @@ asmlinkage void __sched preempt_schedule(void) * If there is a non-zero preempt_count or interrupts are disabled, * we do not want to preempt the current task. Just return.. */ - if (unlikely(ti->preempt_count || irqs_disabled())) + if (likely(ti->preempt_count || irqs_disabled())) return; need_resched: diff --git a/kernel/srcu.c b/kernel/srcu.c new file mode 100644 index 000000000000..3507cabe963b --- /dev/null +++ b/kernel/srcu.c @@ -0,0 +1,258 @@ +/* + * Sleepable Read-Copy Update mechanism for mutual exclusion. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2006 + * + * Author: Paul McKenney <paulmck@us.ibm.com> + * + * For detailed explanation of Read-Copy Update mechanism see - + * Documentation/RCU/ *.txt + * + */ + +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/percpu.h> +#include <linux/preempt.h> +#include <linux/rcupdate.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/smp.h> +#include <linux/srcu.h> + +/** + * init_srcu_struct - initialize a sleep-RCU structure + * @sp: structure to initialize. + * + * Must invoke this on a given srcu_struct before passing that srcu_struct + * to any other function. Each srcu_struct represents a separate domain + * of SRCU protection. + */ +int init_srcu_struct(struct srcu_struct *sp) +{ + sp->completed = 0; + mutex_init(&sp->mutex); + sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array); + return (sp->per_cpu_ref ? 0 : -ENOMEM); +} + +/* + * srcu_readers_active_idx -- returns approximate number of readers + * active on the specified rank of per-CPU counters. + */ + +static int srcu_readers_active_idx(struct srcu_struct *sp, int idx) +{ + int cpu; + int sum; + + sum = 0; + for_each_possible_cpu(cpu) + sum += per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx]; + return sum; +} + +/** + * srcu_readers_active - returns approximate number of readers. + * @sp: which srcu_struct to count active readers (holding srcu_read_lock). + * + * Note that this is not an atomic primitive, and can therefore suffer + * severe errors when invoked on an active srcu_struct. That said, it + * can be useful as an error check at cleanup time. + */ +int srcu_readers_active(struct srcu_struct *sp) +{ + return srcu_readers_active_idx(sp, 0) + srcu_readers_active_idx(sp, 1); +} + +/** + * cleanup_srcu_struct - deconstruct a sleep-RCU structure + * @sp: structure to clean up. + * + * Must invoke this after you are finished using a given srcu_struct that + * was initialized via init_srcu_struct(), else you leak memory. + */ +void cleanup_srcu_struct(struct srcu_struct *sp) +{ + int sum; + + sum = srcu_readers_active(sp); + WARN_ON(sum); /* Leakage unless caller handles error. */ + if (sum != 0) + return; + free_percpu(sp->per_cpu_ref); + sp->per_cpu_ref = NULL; +} + +/** + * srcu_read_lock - register a new reader for an SRCU-protected structure. + * @sp: srcu_struct in which to register the new reader. + * + * Counts the new reader in the appropriate per-CPU element of the + * srcu_struct. Must be called from process context. + * Returns an index that must be passed to the matching srcu_read_unlock(). + */ +int srcu_read_lock(struct srcu_struct *sp) +{ + int idx; + + preempt_disable(); + idx = sp->completed & 0x1; + barrier(); /* ensure compiler looks -once- at sp->completed. */ + per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]++; + srcu_barrier(); /* ensure compiler won't misorder critical section. */ + preempt_enable(); + return idx; +} + +/** + * srcu_read_unlock - unregister a old reader from an SRCU-protected structure. + * @sp: srcu_struct in which to unregister the old reader. + * @idx: return value from corresponding srcu_read_lock(). + * + * Removes the count for the old reader from the appropriate per-CPU + * element of the srcu_struct. Note that this may well be a different + * CPU than that which was incremented by the corresponding srcu_read_lock(). + * Must be called from process context. + */ +void srcu_read_unlock(struct srcu_struct *sp, int idx) +{ + preempt_disable(); + srcu_barrier(); /* ensure compiler won't misorder critical section. */ + per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]--; + preempt_enable(); +} + +/** + * synchronize_srcu - wait for prior SRCU read-side critical-section completion + * @sp: srcu_struct with which to synchronize. + * + * Flip the completed counter, and wait for the old count to drain to zero. + * As with classic RCU, the updater must use some separate means of + * synchronizing concurrent updates. Can block; must be called from + * process context. + * + * Note that it is illegal to call synchornize_srcu() from the corresponding + * SRCU read-side critical section; doing so will result in deadlock. + * However, it is perfectly legal to call synchronize_srcu() on one + * srcu_struct from some other srcu_struct's read-side critical section. + */ +void synchronize_srcu(struct srcu_struct *sp) +{ + int idx; + + idx = sp->completed; + mutex_lock(&sp->mutex); + + /* + * Check to see if someone else did the work for us while we were + * waiting to acquire the lock. We need -two- advances of + * the counter, not just one. If there was but one, we might have + * shown up -after- our helper's first synchronize_sched(), thus + * having failed to prevent CPU-reordering races with concurrent + * srcu_read_unlock()s on other CPUs (see comment below). So we + * either (1) wait for two or (2) supply the second ourselves. + */ + + if ((sp->completed - idx) >= 2) { + mutex_unlock(&sp->mutex); + return; + } + + synchronize_sched(); /* Force memory barrier on all CPUs. */ + + /* + * The preceding synchronize_sched() ensures that any CPU that + * sees the new value of sp->completed will also see any preceding + * changes to data structures made by this CPU. This prevents + * some other CPU from reordering the accesses in its SRCU + * read-side critical section to precede the corresponding + * srcu_read_lock() -- ensuring that such references will in + * fact be protected. + * + * So it is now safe to do the flip. + */ + + idx = sp->completed & 0x1; + sp->completed++; + + synchronize_sched(); /* Force memory barrier on all CPUs. */ + + /* + * At this point, because of the preceding synchronize_sched(), + * all srcu_read_lock() calls using the old counters have completed. + * Their corresponding critical sections might well be still + * executing, but the srcu_read_lock() primitives themselves + * will have finished executing. + */ + + while (srcu_readers_active_idx(sp, idx)) + schedule_timeout_interruptible(1); + + synchronize_sched(); /* Force memory barrier on all CPUs. */ + + /* + * The preceding synchronize_sched() forces all srcu_read_unlock() + * primitives that were executing concurrently with the preceding + * for_each_possible_cpu() loop to have completed by this point. + * More importantly, it also forces the corresponding SRCU read-side + * critical sections to have also completed, and the corresponding + * references to SRCU-protected data items to be dropped. + * + * Note: + * + * Despite what you might think at first glance, the + * preceding synchronize_sched() -must- be within the + * critical section ended by the following mutex_unlock(). + * Otherwise, a task taking the early exit can race + * with a srcu_read_unlock(), which might have executed + * just before the preceding srcu_readers_active() check, + * and whose CPU might have reordered the srcu_read_unlock() + * with the preceding critical section. In this case, there + * is nothing preventing the synchronize_sched() task that is + * taking the early exit from freeing a data structure that + * is still being referenced (out of order) by the task + * doing the srcu_read_unlock(). + * + * Alternatively, the comparison with "2" on the early exit + * could be changed to "3", but this increases synchronize_srcu() + * latency for bulk loads. So the current code is preferred. + */ + + mutex_unlock(&sp->mutex); +} + +/** + * srcu_batches_completed - return batches completed. + * @sp: srcu_struct on which to report batch completion. + * + * Report the number of batches, correlated with, but not necessarily + * precisely the same as, the number of grace periods that have elapsed. + */ + +long srcu_batches_completed(struct srcu_struct *sp) +{ + return sp->completed; +} + +EXPORT_SYMBOL_GPL(init_srcu_struct); +EXPORT_SYMBOL_GPL(cleanup_srcu_struct); +EXPORT_SYMBOL_GPL(srcu_read_lock); +EXPORT_SYMBOL_GPL(srcu_read_unlock); +EXPORT_SYMBOL_GPL(synchronize_srcu); +EXPORT_SYMBOL_GPL(srcu_batches_completed); +EXPORT_SYMBOL_GPL(srcu_readers_active); diff --git a/kernel/sys.c b/kernel/sys.c index 2314867ae34f..98489d82801b 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -153,7 +153,7 @@ static int __kprobes notifier_call_chain(struct notifier_block **nl, /* * Atomic notifier chain routines. Registration and unregistration - * use a mutex, and call_chain is synchronized by RCU (no locks). + * use a spinlock, and call_chain is synchronized by RCU (no locks). */ /** @@ -401,6 +401,129 @@ int raw_notifier_call_chain(struct raw_notifier_head *nh, EXPORT_SYMBOL_GPL(raw_notifier_call_chain); +/* + * SRCU notifier chain routines. Registration and unregistration + * use a mutex, and call_chain is synchronized by SRCU (no locks). + */ + +/** + * srcu_notifier_chain_register - Add notifier to an SRCU notifier chain + * @nh: Pointer to head of the SRCU notifier chain + * @n: New entry in notifier chain + * + * Adds a notifier to an SRCU notifier chain. + * Must be called in process context. + * + * Currently always returns zero. + */ + +int srcu_notifier_chain_register(struct srcu_notifier_head *nh, + struct notifier_block *n) +{ + int ret; + + /* + * This code gets used during boot-up, when task switching is + * not yet working and interrupts must remain disabled. At + * such times we must not call mutex_lock(). + */ + if (unlikely(system_state == SYSTEM_BOOTING)) + return notifier_chain_register(&nh->head, n); + + mutex_lock(&nh->mutex); + ret = notifier_chain_register(&nh->head, n); + mutex_unlock(&nh->mutex); + return ret; +} + +EXPORT_SYMBOL_GPL(srcu_notifier_chain_register); + +/** + * srcu_notifier_chain_unregister - Remove notifier from an SRCU notifier chain + * @nh: Pointer to head of the SRCU notifier chain + * @n: Entry to remove from notifier chain + * + * Removes a notifier from an SRCU notifier chain. + * Must be called from process context. + * + * Returns zero on success or %-ENOENT on failure. + */ +int srcu_notifier_chain_unregister(struct srcu_notifier_head *nh, + struct notifier_block *n) +{ + int ret; + + /* + * This code gets used during boot-up, when task switching is + * not yet working and interrupts must remain disabled. At + * such times we must not call mutex_lock(). + */ + if (unlikely(system_state == SYSTEM_BOOTING)) + return notifier_chain_unregister(&nh->head, n); + + mutex_lock(&nh->mutex); + ret = notifier_chain_unregister(&nh->head, n); + mutex_unlock(&nh->mutex); + synchronize_srcu(&nh->srcu); + return ret; +} + +EXPORT_SYMBOL_GPL(srcu_notifier_chain_unregister); + +/** + * srcu_notifier_call_chain - Call functions in an SRCU notifier chain + * @nh: Pointer to head of the SRCU notifier chain + * @val: Value passed unmodified to notifier function + * @v: Pointer passed unmodified to notifier function + * + * Calls each function in a notifier chain in turn. The functions + * run in a process context, so they are allowed to block. + * + * If the return value of the notifier can be and'ed + * with %NOTIFY_STOP_MASK then srcu_notifier_call_chain + * will return immediately, with the return value of + * the notifier function which halted execution. + * Otherwise the return value is the return value + * of the last notifier function called. + */ + +int srcu_notifier_call_chain(struct srcu_notifier_head *nh, + unsigned long val, void *v) +{ + int ret; + int idx; + + idx = srcu_read_lock(&nh->srcu); + ret = notifier_call_chain(&nh->head, val, v); + srcu_read_unlock(&nh->srcu, idx); + return ret; +} + +EXPORT_SYMBOL_GPL(srcu_notifier_call_chain); + +/** + * srcu_init_notifier_head - Initialize an SRCU notifier head + * @nh: Pointer to head of the srcu notifier chain + * + * Unlike other sorts of notifier heads, SRCU notifier heads require + * dynamic initialization. Be sure to call this routine before + * calling any of the other SRCU notifier routines for this head. + * + * If an SRCU notifier head is deallocated, it must first be cleaned + * up by calling srcu_cleanup_notifier_head(). Otherwise the head's + * per-cpu data (used by the SRCU mechanism) will leak. + */ + +void srcu_init_notifier_head(struct srcu_notifier_head *nh) +{ + mutex_init(&nh->mutex); + if (init_srcu_struct(&nh->srcu) < 0) + BUG(); + nh->head = NULL; +} + +EXPORT_SYMBOL_GPL(srcu_init_notifier_head); + /** * register_reboot_notifier - Register function to be called at reboot time * @nb: Info about notifier function to be called diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 7a3b2e75f040..0e53314b14de 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -49,6 +49,7 @@ cond_syscall(compat_sys_get_robust_list); cond_syscall(sys_epoll_create); cond_syscall(sys_epoll_ctl); cond_syscall(sys_epoll_wait); +cond_syscall(sys_epoll_pwait); cond_syscall(sys_semget); cond_syscall(sys_semop); cond_syscall(sys_semtimedop); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8020fb273c4f..8bff2c18fb5a 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -136,8 +136,10 @@ static int parse_table(int __user *, int, void __user *, size_t __user *, static int proc_do_uts_string(ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos); +#ifdef CONFIG_PROC_SYSCTL static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos); +#endif static ctl_table root_table[]; static struct ctl_table_header root_table_header = @@ -542,6 +544,7 @@ static ctl_table kern_table[] = { .proc_handler = &proc_dointvec, }, #endif +#ifdef CONFIG_PROC_SYSCTL { .ctl_name = KERN_CADPID, .procname = "cad_pid", @@ -550,6 +553,7 @@ static ctl_table kern_table[] = { .mode = 0600, .proc_handler = &proc_do_cad_pid, }, +#endif { .ctl_name = KERN_MAX_THREADS, .procname = "threads-max", diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index 126bb30c4afe..a99b2a6e6a07 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -57,7 +57,7 @@ static cycle_t jiffies_read(void) struct clocksource clocksource_jiffies = { .name = "jiffies", - .rating = 0, /* lowest rating*/ + .rating = 1, /* lowest valid rating*/ .read = jiffies_read, .mask = 0xffffffff, /*32bits*/ .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */ diff --git a/kernel/workqueue.c b/kernel/workqueue.c index cfc737bffe6d..3df9bfc7ff78 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -28,6 +28,7 @@ #include <linux/notifier.h> #include <linux/kthread.h> #include <linux/hardirq.h> +#include <linux/mempolicy.h> /* * The per-CPU workqueue (if single thread, we always use the first @@ -245,6 +246,12 @@ static int worker_thread(void *__cwq) sigprocmask(SIG_BLOCK, &blocked, NULL); flush_signals(current); + /* + * We inherited MPOL_INTERLEAVE from the booting kernel. + * Set MPOL_DEFAULT to insure node local allocations. + */ + numa_default_policy(); + /* SIG_IGN makes children autoreap: see do_notify_parent(). */ sa.sa.sa_handler = SIG_IGN; sa.sa.sa_flags = 0; |