summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/compat.c35
-rw-r--r--kernel/cpu.c36
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c7
-rw-r--r--kernel/futex.c7
-rw-r--r--kernel/irq/chip.c33
-rw-r--r--kernel/irq/proc.c2
-rw-r--r--kernel/lockdep.c13
-rw-r--r--kernel/module.c92
-rw-r--r--kernel/nsproxy.c6
-rw-r--r--kernel/posix-cpu-timers.c27
-rw-r--r--kernel/power/disk.c45
-rw-r--r--kernel/power/swap.c3
-rw-r--r--kernel/power/user.c8
-rw-r--r--kernel/printk.c32
-rw-r--r--kernel/profile.c2
-rw-r--r--kernel/sched.c24
-rw-r--r--kernel/signal.c15
-rw-r--r--kernel/sys_ni.c2
-rw-r--r--kernel/sysctl.c4
-rw-r--r--kernel/taskstats.c87
-rw-r--r--kernel/time/jiffies.c2
-rw-r--r--kernel/time/ntp.c2
-rw-r--r--kernel/tsacct.c17
-rw-r--r--kernel/unwind.c318
-rw-r--r--kernel/user.c11
-rw-r--r--kernel/workqueue.c13
27 files changed, 604 insertions, 240 deletions
diff --git a/kernel/compat.c b/kernel/compat.c
index 75573e5d27b0..6952dd057300 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -678,7 +678,7 @@ int get_compat_sigevent(struct sigevent *event,
? -EFAULT : 0;
}
-long compat_get_bitmap(unsigned long *mask, compat_ulong_t __user *umask,
+long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
unsigned long bitmap_size)
{
int i, j;
@@ -982,4 +982,37 @@ asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_pages,
}
return sys_move_pages(pid, nr_pages, pages, nodes, status, flags);
}
+
+asmlinkage long compat_sys_migrate_pages(compat_pid_t pid,
+ compat_ulong_t maxnode,
+ const compat_ulong_t __user *old_nodes,
+ const compat_ulong_t __user *new_nodes)
+{
+ unsigned long __user *old = NULL;
+ unsigned long __user *new = NULL;
+ nodemask_t tmp_mask;
+ unsigned long nr_bits;
+ unsigned long size;
+
+ nr_bits = min_t(unsigned long, maxnode - 1, MAX_NUMNODES);
+ size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
+ if (old_nodes) {
+ if (compat_get_bitmap(nodes_addr(tmp_mask), old_nodes, nr_bits))
+ return -EFAULT;
+ old = compat_alloc_user_space(new_nodes ? size * 2 : size);
+ if (new_nodes)
+ new = old + size / sizeof(unsigned long);
+ if (copy_to_user(old, nodes_addr(tmp_mask), size))
+ return -EFAULT;
+ }
+ if (new_nodes) {
+ if (compat_get_bitmap(nodes_addr(tmp_mask), new_nodes, nr_bits))
+ return -EFAULT;
+ if (new == NULL)
+ new = compat_alloc_user_space(size);
+ if (copy_to_user(new, nodes_addr(tmp_mask), size))
+ return -EFAULT;
+ }
+ return sys_migrate_pages(pid, nr_bits + 1, old, new);
+}
#endif
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 32c96628463e..663c920b2234 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -19,7 +19,7 @@
static DEFINE_MUTEX(cpu_add_remove_lock);
static DEFINE_MUTEX(cpu_bitmask_lock);
-static __cpuinitdata BLOCKING_NOTIFIER_HEAD(cpu_chain);
+static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain);
/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
* Should always be manipulated under cpu_add_remove_lock
@@ -68,7 +68,11 @@ EXPORT_SYMBOL_GPL(unlock_cpu_hotplug);
/* Need to know about CPUs going up/down? */
int __cpuinit register_cpu_notifier(struct notifier_block *nb)
{
- return blocking_notifier_chain_register(&cpu_chain, nb);
+ int ret;
+ mutex_lock(&cpu_add_remove_lock);
+ ret = raw_notifier_chain_register(&cpu_chain, nb);
+ mutex_unlock(&cpu_add_remove_lock);
+ return ret;
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -77,7 +81,9 @@ EXPORT_SYMBOL(register_cpu_notifier);
void unregister_cpu_notifier(struct notifier_block *nb)
{
- blocking_notifier_chain_unregister(&cpu_chain, nb);
+ mutex_lock(&cpu_add_remove_lock);
+ raw_notifier_chain_unregister(&cpu_chain, nb);
+ mutex_unlock(&cpu_add_remove_lock);
}
EXPORT_SYMBOL(unregister_cpu_notifier);
@@ -126,7 +132,7 @@ static int _cpu_down(unsigned int cpu)
if (!cpu_online(cpu))
return -EINVAL;
- err = blocking_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE,
+ err = raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE,
(void *)(long)cpu);
if (err == NOTIFY_BAD) {
printk("%s: attempt to take down CPU %u failed\n",
@@ -144,18 +150,18 @@ static int _cpu_down(unsigned int cpu)
p = __stop_machine_run(take_cpu_down, NULL, cpu);
mutex_unlock(&cpu_bitmask_lock);
- if (IS_ERR(p)) {
+ if (IS_ERR(p) || cpu_online(cpu)) {
/* CPU didn't die: tell everyone. Can't complain. */
- if (blocking_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED,
+ if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED,
(void *)(long)cpu) == NOTIFY_BAD)
BUG();
- err = PTR_ERR(p);
- goto out_allowed;
- }
-
- if (cpu_online(cpu))
+ if (IS_ERR(p)) {
+ err = PTR_ERR(p);
+ goto out_allowed;
+ }
goto out_thread;
+ }
/* Wait for it to sleep (leaving idle task). */
while (!idle_cpu(cpu))
@@ -169,7 +175,7 @@ static int _cpu_down(unsigned int cpu)
put_cpu();
/* CPU is completely dead: tell everyone. Too late to complain. */
- if (blocking_notifier_call_chain(&cpu_chain, CPU_DEAD,
+ if (raw_notifier_call_chain(&cpu_chain, CPU_DEAD,
(void *)(long)cpu) == NOTIFY_BAD)
BUG();
@@ -206,7 +212,7 @@ static int __devinit _cpu_up(unsigned int cpu)
if (cpu_online(cpu) || !cpu_present(cpu))
return -EINVAL;
- ret = blocking_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu);
+ ret = raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu);
if (ret == NOTIFY_BAD) {
printk("%s: attempt to bring up CPU %u failed\n",
__FUNCTION__, cpu);
@@ -223,11 +229,11 @@ static int __devinit _cpu_up(unsigned int cpu)
BUG_ON(!cpu_online(cpu));
/* Now call notifier in preparation. */
- blocking_notifier_call_chain(&cpu_chain, CPU_ONLINE, hcpu);
+ raw_notifier_call_chain(&cpu_chain, CPU_ONLINE, hcpu);
out_notify:
if (ret != 0)
- blocking_notifier_call_chain(&cpu_chain,
+ raw_notifier_call_chain(&cpu_chain,
CPU_UP_CANCELED, hcpu);
return ret;
diff --git a/kernel/exit.c b/kernel/exit.c
index f250a5e3e281..06de6c4e8ca3 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -128,6 +128,7 @@ static void __exit_signal(struct task_struct *tsk)
flush_sigqueue(&tsk->pending);
if (sig) {
flush_sigqueue(&sig->shared_pending);
+ taskstats_tgid_free(sig);
__cleanup_signal(sig);
}
}
diff --git a/kernel/fork.c b/kernel/fork.c
index 7dc6140baac6..3da978eec791 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -830,7 +830,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
if (clone_flags & CLONE_THREAD) {
atomic_inc(&current->signal->count);
atomic_inc(&current->signal->live);
- taskstats_tgid_alloc(current->signal);
+ taskstats_tgid_alloc(current);
return 0;
}
sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
@@ -897,7 +897,6 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
void __cleanup_signal(struct signal_struct *sig)
{
exit_thread_group_keys(sig);
- taskstats_tgid_free(sig);
kmem_cache_free(signal_cachep, sig);
}
@@ -984,6 +983,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
if (!p)
goto fork_out;
+ rt_mutex_init_task(p);
+
#ifdef CONFIG_TRACE_IRQFLAGS
DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
@@ -1088,8 +1089,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->lockdep_recursion = 0;
#endif
- rt_mutex_init_task(p);
-
#ifdef CONFIG_DEBUG_MUTEXES
p->blocked_on = NULL; /* not blocked yet */
#endif
diff --git a/kernel/futex.c b/kernel/futex.c
index b364e0026191..93ef30ba209f 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1507,6 +1507,13 @@ static int futex_fd(u32 __user *uaddr, int signal)
struct futex_q *q;
struct file *filp;
int ret, err;
+ static unsigned long printk_interval;
+
+ if (printk_timed_ratelimit(&printk_interval, 60 * 60 * 1000)) {
+ printk(KERN_WARNING "Process `%s' used FUTEX_FD, which "
+ "will be removed from the kernel in June 2007\n",
+ current->comm);
+ }
ret = -EINVAL;
if (!valid_signal(signal))
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 11c99697acfe..2d0dc3efe813 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -499,7 +499,8 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
#endif /* CONFIG_SMP */
void
-__set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained)
+__set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
+ const char *name)
{
struct irq_desc *desc;
unsigned long flags;
@@ -540,6 +541,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained)
desc->depth = 1;
}
desc->handle_irq = handle;
+ desc->name = name;
if (handle != handle_bad_irq && is_chained) {
desc->status &= ~IRQ_DISABLED;
@@ -555,30 +557,13 @@ set_irq_chip_and_handler(unsigned int irq, struct irq_chip *chip,
irq_flow_handler_t handle)
{
set_irq_chip(irq, chip);
- __set_irq_handler(irq, handle, 0);
+ __set_irq_handler(irq, handle, 0, NULL);
}
-/*
- * Get a descriptive string for the highlevel handler, for
- * /proc/interrupts output:
- */
-const char *
-handle_irq_name(irq_flow_handler_t handle)
+void
+set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
+ irq_flow_handler_t handle, const char *name)
{
- if (handle == handle_level_irq)
- return "level ";
- if (handle == handle_fasteoi_irq)
- return "fasteoi";
- if (handle == handle_edge_irq)
- return "edge ";
- if (handle == handle_simple_irq)
- return "simple ";
-#ifdef CONFIG_SMP
- if (handle == handle_percpu_irq)
- return "percpu ";
-#endif
- if (handle == handle_bad_irq)
- return "bad ";
-
- return NULL;
+ set_irq_chip(irq, chip);
+ __set_irq_handler(irq, handle, 0, name);
}
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 607c7809ad01..9a352667007c 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -57,7 +57,7 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer,
if (!irq_desc[irq].chip->set_affinity || no_irq_affinity)
return -EIO;
- err = cpumask_parse(buffer, count, new_value);
+ err = cpumask_parse_user(buffer, count, new_value);
if (err)
return err;
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index ba7156ac70c1..b739be2a6dc9 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -575,6 +575,8 @@ static noinline int print_circular_bug_tail(void)
return 0;
}
+#define RECURSION_LIMIT 40
+
static int noinline print_infinite_recursion_bug(void)
{
__raw_spin_unlock(&hash_lock);
@@ -595,7 +597,7 @@ check_noncircular(struct lock_class *source, unsigned int depth)
debug_atomic_inc(&nr_cyclic_check_recursions);
if (depth > max_recursion_depth)
max_recursion_depth = depth;
- if (depth >= 20)
+ if (depth >= RECURSION_LIMIT)
return print_infinite_recursion_bug();
/*
* Check this lock's dependency list:
@@ -645,7 +647,7 @@ find_usage_forwards(struct lock_class *source, unsigned int depth)
if (depth > max_recursion_depth)
max_recursion_depth = depth;
- if (depth >= 20)
+ if (depth >= RECURSION_LIMIT)
return print_infinite_recursion_bug();
debug_atomic_inc(&nr_find_usage_forwards_checks);
@@ -684,7 +686,7 @@ find_usage_backwards(struct lock_class *source, unsigned int depth)
if (depth > max_recursion_depth)
max_recursion_depth = depth;
- if (depth >= 20)
+ if (depth >= RECURSION_LIMIT)
return print_infinite_recursion_bug();
debug_atomic_inc(&nr_find_usage_backwards_checks);
@@ -1114,8 +1116,6 @@ static int count_matching_names(struct lock_class *new_class)
return count + 1;
}
-extern void __error_too_big_MAX_LOCKDEP_SUBCLASSES(void);
-
/*
* Register a lock's class in the hash-table, if the class is not present
* yet. Otherwise we look it up. We cache the result in the lock object
@@ -1153,8 +1153,7 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
* (or spin_lock_init()) call - which acts as the key. For static
* locks we use the lock object itself as the key.
*/
- if (sizeof(struct lock_class_key) > sizeof(struct lock_class))
- __error_too_big_MAX_LOCKDEP_SUBCLASSES();
+ BUILD_BUG_ON(sizeof(struct lock_class_key) > sizeof(struct lock_class));
key = lock->key->subkeys + subclass;
diff --git a/kernel/module.c b/kernel/module.c
index 7f60e782de1e..f0166563c602 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -87,6 +87,12 @@ static inline int strong_try_module_get(struct module *mod)
return try_module_get(mod);
}
+static inline void add_taint_module(struct module *mod, unsigned flag)
+{
+ add_taint(flag);
+ mod->taints |= flag;
+}
+
/* A thread that wants to hold a reference to a module only while it
* is running can call ths to safely exit.
* nfsd and lockd use this.
@@ -847,12 +853,10 @@ static int check_version(Elf_Shdr *sechdrs,
return 0;
}
/* Not in module's version table. OK, but that taints the kernel. */
- if (!(tainted & TAINT_FORCED_MODULE)) {
+ if (!(tainted & TAINT_FORCED_MODULE))
printk("%s: no version for \"%s\" found: kernel tainted.\n",
mod->name, symname);
- add_taint(TAINT_FORCED_MODULE);
- mod->taints |= TAINT_FORCED_MODULE;
- }
+ add_taint_module(mod, TAINT_FORCED_MODULE);
return 1;
}
@@ -910,7 +914,8 @@ static unsigned long resolve_symbol(Elf_Shdr *sechdrs,
unsigned long ret;
const unsigned long *crc;
- ret = __find_symbol(name, &owner, &crc, mod->license_gplok);
+ ret = __find_symbol(name, &owner, &crc,
+ !(mod->taints & TAINT_PROPRIETARY_MODULE));
if (ret) {
/* use_module can fail due to OOM, or module unloading */
if (!check_version(sechdrs, versindex, name, mod, crc) ||
@@ -1335,12 +1340,11 @@ static void set_license(struct module *mod, const char *license)
if (!license)
license = "unspecified";
- mod->license_gplok = license_is_gpl_compatible(license);
- if (!mod->license_gplok && !(tainted & TAINT_PROPRIETARY_MODULE)) {
- printk(KERN_WARNING "%s: module license '%s' taints kernel.\n",
- mod->name, license);
- add_taint(TAINT_PROPRIETARY_MODULE);
- mod->taints |= TAINT_PROPRIETARY_MODULE;
+ if (!license_is_gpl_compatible(license)) {
+ if (!(tainted & TAINT_PROPRIETARY_MODULE))
+ printk(KERN_WARNING "%s: module license '%s' taints "
+ "kernel.\n", mod->name, license);
+ add_taint_module(mod, TAINT_PROPRIETARY_MODULE);
}
}
@@ -1619,8 +1623,7 @@ static struct module *load_module(void __user *umod,
modmagic = get_modinfo(sechdrs, infoindex, "vermagic");
/* This is allowed: modprobe --force will invalidate it. */
if (!modmagic) {
- add_taint(TAINT_FORCED_MODULE);
- mod->taints |= TAINT_FORCED_MODULE;
+ add_taint_module(mod, TAINT_FORCED_MODULE);
printk(KERN_WARNING "%s: no version magic, tainting kernel.\n",
mod->name);
} else if (!same_magic(modmagic, vermagic)) {
@@ -1714,14 +1717,10 @@ static struct module *load_module(void __user *umod,
/* Set up license info based on the info section */
set_license(mod, get_modinfo(sechdrs, infoindex, "license"));
- if (strcmp(mod->name, "ndiswrapper") == 0) {
- add_taint(TAINT_PROPRIETARY_MODULE);
- mod->taints |= TAINT_PROPRIETARY_MODULE;
- }
- if (strcmp(mod->name, "driverloader") == 0) {
+ if (strcmp(mod->name, "ndiswrapper") == 0)
add_taint(TAINT_PROPRIETARY_MODULE);
- mod->taints |= TAINT_PROPRIETARY_MODULE;
- }
+ if (strcmp(mod->name, "driverloader") == 0)
+ add_taint_module(mod, TAINT_PROPRIETARY_MODULE);
/* Set up MODINFO_ATTR fields */
setup_modinfo(mod, sechdrs, infoindex);
@@ -1766,8 +1765,7 @@ static struct module *load_module(void __user *umod,
(mod->num_unused_gpl_syms && !unusedgplcrcindex)) {
printk(KERN_WARNING "%s: No versions for exported symbols."
" Tainting kernel.\n", mod->name);
- add_taint(TAINT_FORCED_MODULE);
- mod->taints |= TAINT_FORCED_MODULE;
+ add_taint_module(mod, TAINT_FORCED_MODULE);
}
#endif
@@ -2132,9 +2130,33 @@ static void m_stop(struct seq_file *m, void *p)
mutex_unlock(&module_mutex);
}
+static char *taint_flags(unsigned int taints, char *buf)
+{
+ int bx = 0;
+
+ if (taints) {
+ buf[bx++] = '(';
+ if (taints & TAINT_PROPRIETARY_MODULE)
+ buf[bx++] = 'P';
+ if (taints & TAINT_FORCED_MODULE)
+ buf[bx++] = 'F';
+ /*
+ * TAINT_FORCED_RMMOD: could be added.
+ * TAINT_UNSAFE_SMP, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't
+ * apply to modules.
+ */
+ buf[bx++] = ')';
+ }
+ buf[bx] = '\0';
+
+ return buf;
+}
+
static int m_show(struct seq_file *m, void *p)
{
struct module *mod = list_entry(p, struct module, list);
+ char buf[8];
+
seq_printf(m, "%s %lu",
mod->name, mod->init_size + mod->core_size);
print_unload_info(m, mod);
@@ -2147,6 +2169,10 @@ static int m_show(struct seq_file *m, void *p)
/* Used by oprofile and other similar tools. */
seq_printf(m, " 0x%p", mod->module_core);
+ /* Taints info */
+ if (mod->taints)
+ seq_printf(m, " %s", taint_flags(mod->taints, buf));
+
seq_printf(m, "\n");
return 0;
}
@@ -2235,28 +2261,6 @@ struct module *module_text_address(unsigned long addr)
return mod;
}
-static char *taint_flags(unsigned int taints, char *buf)
-{
- *buf = '\0';
- if (taints) {
- int bx;
-
- buf[0] = '(';
- bx = 1;
- if (taints & TAINT_PROPRIETARY_MODULE)
- buf[bx++] = 'P';
- if (taints & TAINT_FORCED_MODULE)
- buf[bx++] = 'F';
- /*
- * TAINT_FORCED_RMMOD: could be added.
- * TAINT_UNSAFE_SMP, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't
- * apply to modules.
- */
- buf[bx] = ')';
- }
- return buf;
-}
-
/* Don't grab lock, we're oopsing. */
void print_modules(void)
{
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 6ebdb82a0ce4..674aceb7335a 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -44,11 +44,9 @@ static inline struct nsproxy *clone_namespaces(struct nsproxy *orig)
{
struct nsproxy *ns;
- ns = kmalloc(sizeof(struct nsproxy), GFP_KERNEL);
- if (ns) {
- memcpy(ns, orig, sizeof(struct nsproxy));
+ ns = kmemdup(orig, sizeof(struct nsproxy), GFP_KERNEL);
+ if (ns)
atomic_set(&ns->count, 1);
- }
return ns;
}
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 479b16b44f79..7c3e1e6dfb5b 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -88,6 +88,19 @@ static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock,
}
/*
+ * Divide and limit the result to res >= 1
+ *
+ * This is necessary to prevent signal delivery starvation, when the result of
+ * the division would be rounded down to 0.
+ */
+static inline cputime_t cputime_div_non_zero(cputime_t time, unsigned long div)
+{
+ cputime_t res = cputime_div(time, div);
+
+ return max_t(cputime_t, res, 1);
+}
+
+/*
* Update expiry time from increment, and increase overrun count,
* given the current clock sample.
*/
@@ -483,8 +496,8 @@ static void process_timer_rebalance(struct task_struct *p,
BUG();
break;
case CPUCLOCK_PROF:
- left = cputime_div(cputime_sub(expires.cpu, val.cpu),
- nthreads);
+ left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu),
+ nthreads);
do {
if (likely(!(t->flags & PF_EXITING))) {
ticks = cputime_add(prof_ticks(t), left);
@@ -498,8 +511,8 @@ static void process_timer_rebalance(struct task_struct *p,
} while (t != p);
break;
case CPUCLOCK_VIRT:
- left = cputime_div(cputime_sub(expires.cpu, val.cpu),
- nthreads);
+ left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu),
+ nthreads);
do {
if (likely(!(t->flags & PF_EXITING))) {
ticks = cputime_add(virt_ticks(t), left);
@@ -515,6 +528,7 @@ static void process_timer_rebalance(struct task_struct *p,
case CPUCLOCK_SCHED:
nsleft = expires.sched - val.sched;
do_div(nsleft, nthreads);
+ nsleft = max_t(unsigned long long, nsleft, 1);
do {
if (likely(!(t->flags & PF_EXITING))) {
ns = t->sched_time + nsleft;
@@ -1159,12 +1173,13 @@ static void check_process_timers(struct task_struct *tsk,
prof_left = cputime_sub(prof_expires, utime);
prof_left = cputime_sub(prof_left, stime);
- prof_left = cputime_div(prof_left, nthreads);
+ prof_left = cputime_div_non_zero(prof_left, nthreads);
virt_left = cputime_sub(virt_expires, utime);
- virt_left = cputime_div(virt_left, nthreads);
+ virt_left = cputime_div_non_zero(virt_left, nthreads);
if (sched_expires) {
sched_left = sched_expires - sched_time;
do_div(sched_left, nthreads);
+ sched_left = max_t(unsigned long long, sched_left, 1);
} else {
sched_left = 0;
}
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index d72234942798..b1fb7866b0b3 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -18,6 +18,7 @@
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/pm.h>
+#include <linux/console.h>
#include <linux/cpu.h>
#include "power.h"
@@ -70,7 +71,7 @@ static inline void platform_finish(void)
static int prepare_processes(void)
{
- int error;
+ int error = 0;
pm_prepare_console();
@@ -83,6 +84,12 @@ static int prepare_processes(void)
goto thaw;
}
+ if (pm_disk_mode == PM_DISK_TESTPROC) {
+ printk("swsusp debug: Waiting for 5 seconds.\n");
+ mdelay(5000);
+ goto thaw;
+ }
+
/* Free memory before shutting down devices. */
if (!(error = swsusp_shrink_memory()))
return 0;
@@ -119,11 +126,21 @@ int pm_suspend_disk(void)
if (error)
return error;
+ if (pm_disk_mode == PM_DISK_TESTPROC)
+ goto Thaw;
+
+ suspend_console();
error = device_suspend(PMSG_FREEZE);
if (error) {
+ resume_console();
printk("Some devices failed to suspend\n");
- unprepare_processes();
- return error;
+ goto Thaw;
+ }
+
+ if (pm_disk_mode == PM_DISK_TEST) {
+ printk("swsusp debug: Waiting for 5 seconds.\n");
+ mdelay(5000);
+ goto Done;
}
pr_debug("PM: snapshotting memory.\n");
@@ -133,21 +150,24 @@ int pm_suspend_disk(void)
if (in_suspend) {
device_resume();
+ resume_console();
pr_debug("PM: writing image.\n");
error = swsusp_write();
if (!error)
power_down(pm_disk_mode);
else {
swsusp_free();
- unprepare_processes();
- return error;
+ goto Thaw;
}
- } else
+ } else {
pr_debug("PM: Image restored successfully.\n");
+ }
swsusp_free();
Done:
device_resume();
+ resume_console();
+ Thaw:
unprepare_processes();
return error;
}
@@ -212,7 +232,9 @@ static int software_resume(void)
pr_debug("PM: Preparing devices for restore.\n");
+ suspend_console();
if ((error = device_suspend(PMSG_PRETHAW))) {
+ resume_console();
printk("Some devices failed to suspend\n");
swsusp_free();
goto Thaw;
@@ -224,6 +246,7 @@ static int software_resume(void)
swsusp_resume();
pr_debug("PM: Restore failed, recovering.n");
device_resume();
+ resume_console();
Thaw:
unprepare_processes();
Done:
@@ -241,6 +264,8 @@ static const char * const pm_disk_modes[] = {
[PM_DISK_PLATFORM] = "platform",
[PM_DISK_SHUTDOWN] = "shutdown",
[PM_DISK_REBOOT] = "reboot",
+ [PM_DISK_TEST] = "test",
+ [PM_DISK_TESTPROC] = "testproc",
};
/**
@@ -295,17 +320,19 @@ static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n)
}
}
if (mode) {
- if (mode == PM_DISK_SHUTDOWN || mode == PM_DISK_REBOOT)
+ if (mode == PM_DISK_SHUTDOWN || mode == PM_DISK_REBOOT ||
+ mode == PM_DISK_TEST || mode == PM_DISK_TESTPROC) {
pm_disk_mode = mode;
- else {
+ } else {
if (pm_ops && pm_ops->enter &&
(mode == pm_ops->pm_disk_mode))
pm_disk_mode = mode;
else
error = -EINVAL;
}
- } else
+ } else {
error = -EINVAL;
+ }
pr_debug("PM: suspend-to-disk mode set to '%s'\n",
pm_disk_modes[mode]);
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 9b2ee5344dee..1a3b0dd2c3fc 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -425,7 +425,8 @@ static int submit(int rw, pgoff_t page_off, struct page *page,
bio_set_pages_dirty(bio);
bio_put(bio);
} else {
- get_page(page);
+ if (rw == READ)
+ get_page(page); /* These pages are freed later */
bio->bi_private = *bio_chain;
*bio_chain = bio;
submit_bio(rw | (1 << BIO_RW_SYNC), bio);
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 93b5dd283dea..d991d3b0e5a4 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -19,6 +19,7 @@
#include <linux/swapops.h>
#include <linux/pm.h>
#include <linux/fs.h>
+#include <linux/console.h>
#include <linux/cpu.h>
#include <asm/uaccess.h>
@@ -173,12 +174,14 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
/* Free memory before shutting down devices. */
error = swsusp_shrink_memory();
if (!error) {
+ suspend_console();
error = device_suspend(PMSG_FREEZE);
if (!error) {
in_suspend = 1;
error = swsusp_suspend();
device_resume();
}
+ resume_console();
}
up(&pm_sem);
if (!error)
@@ -196,11 +199,13 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
snapshot_free_unused_memory(&data->handle);
down(&pm_sem);
pm_prepare_console();
+ suspend_console();
error = device_suspend(PMSG_PRETHAW);
if (!error) {
error = swsusp_resume();
device_resume();
}
+ resume_console();
pm_restore_console();
up(&pm_sem);
break;
@@ -289,6 +294,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
}
/* Put devices to sleep */
+ suspend_console();
error = device_suspend(PMSG_SUSPEND);
if (error) {
printk(KERN_ERR "Failed to suspend some devices.\n");
@@ -299,7 +305,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
/* Wake up devices */
device_resume();
}
-
+ resume_console();
if (pm_ops->finish)
pm_ops->finish(PM_SUSPEND_MEM);
diff --git a/kernel/printk.c b/kernel/printk.c
index 771f5e861bcd..66426552fbfe 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -31,6 +31,7 @@
#include <linux/security.h>
#include <linux/bootmem.h>
#include <linux/syscalls.h>
+#include <linux/jiffies.h>
#include <asm/uaccess.h>
@@ -820,15 +821,8 @@ void release_console_sem(void)
console_locked = 0;
up(&console_sem);
spin_unlock_irqrestore(&logbuf_lock, flags);
- if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait)) {
- /*
- * If we printk from within the lock dependency code,
- * from within the scheduler code, then do not lock
- * up due to self-recursion:
- */
- if (!lockdep_internal())
- wake_up_interruptible(&log_wait);
- }
+ if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait))
+ wake_up_interruptible(&log_wait);
}
EXPORT_SYMBOL(release_console_sem);
@@ -1108,3 +1102,23 @@ int printk_ratelimit(void)
printk_ratelimit_burst);
}
EXPORT_SYMBOL(printk_ratelimit);
+
+/**
+ * printk_timed_ratelimit - caller-controlled printk ratelimiting
+ * @caller_jiffies: pointer to caller's state
+ * @interval_msecs: minimum interval between prints
+ *
+ * printk_timed_ratelimit() returns true if more than @interval_msecs
+ * milliseconds have elapsed since the last time printk_timed_ratelimit()
+ * returned true.
+ */
+bool printk_timed_ratelimit(unsigned long *caller_jiffies,
+ unsigned int interval_msecs)
+{
+ if (*caller_jiffies == 0 || time_after(jiffies, *caller_jiffies)) {
+ *caller_jiffies = jiffies + msecs_to_jiffies(interval_msecs);
+ return true;
+ }
+ return false;
+}
+EXPORT_SYMBOL(printk_timed_ratelimit);
diff --git a/kernel/profile.c b/kernel/profile.c
index 857300a2afec..f940b462eec9 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -399,7 +399,7 @@ static int prof_cpu_mask_write_proc (struct file *file, const char __user *buffe
unsigned long full_count = count, err;
cpumask_t new_value;
- err = cpumask_parse(buffer, count, new_value);
+ err = cpumask_parse_user(buffer, count, new_value);
if (err)
return err;
diff --git a/kernel/sched.c b/kernel/sched.c
index 53608a59d6e3..3399701c680e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -160,15 +160,6 @@
#define TASK_PREEMPTS_CURR(p, rq) \
((p)->prio < (rq)->curr->prio)
-/*
- * task_timeslice() scales user-nice values [ -20 ... 0 ... 19 ]
- * to time slice values: [800ms ... 100ms ... 5ms]
- *
- * The higher a thread's priority, the bigger timeslices
- * it gets during one round of execution. But even the lowest
- * priority thread gets MIN_TIMESLICE worth of execution time.
- */
-
#define SCALE_PRIO(x, prio) \
max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_TIMESLICE)
@@ -180,6 +171,15 @@ static unsigned int static_prio_timeslice(int static_prio)
return SCALE_PRIO(DEF_TIMESLICE, static_prio);
}
+/*
+ * task_timeslice() scales user-nice values [ -20 ... 0 ... 19 ]
+ * to time slice values: [800ms ... 100ms ... 5ms]
+ *
+ * The higher a thread's priority, the bigger timeslices
+ * it gets during one round of execution. But even the lowest
+ * priority thread gets MIN_TIMESLICE worth of execution time.
+ */
+
static inline unsigned int task_timeslice(struct task_struct *p)
{
return static_prio_timeslice(p->static_prio);
@@ -1822,14 +1822,14 @@ context_switch(struct rq *rq, struct task_struct *prev,
struct mm_struct *mm = next->mm;
struct mm_struct *oldmm = prev->active_mm;
- if (unlikely(!mm)) {
+ if (!mm) {
next->active_mm = oldmm;
atomic_inc(&oldmm->mm_count);
enter_lazy_tlb(oldmm, next);
} else
switch_mm(oldmm, mm, next);
- if (unlikely(!prev->mm)) {
+ if (!prev->mm) {
prev->active_mm = NULL;
WARN_ON(rq->prev_mm);
rq->prev_mm = oldmm;
@@ -3491,7 +3491,7 @@ asmlinkage void __sched preempt_schedule(void)
* If there is a non-zero preempt_count or interrupts are disabled,
* we do not want to preempt the current task. Just return..
*/
- if (unlikely(ti->preempt_count || irqs_disabled()))
+ if (likely(ti->preempt_count || irqs_disabled()))
return;
need_resched:
diff --git a/kernel/signal.c b/kernel/signal.c
index 7ed8d5304bec..df18c167a2a7 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -267,18 +267,25 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
int override_rlimit)
{
struct sigqueue *q = NULL;
+ struct user_struct *user;
- atomic_inc(&t->user->sigpending);
+ /*
+ * In order to avoid problems with "switch_user()", we want to make
+ * sure that the compiler doesn't re-load "t->user"
+ */
+ user = t->user;
+ barrier();
+ atomic_inc(&user->sigpending);
if (override_rlimit ||
- atomic_read(&t->user->sigpending) <=
+ atomic_read(&user->sigpending) <=
t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur)
q = kmem_cache_alloc(sigqueue_cachep, flags);
if (unlikely(q == NULL)) {
- atomic_dec(&t->user->sigpending);
+ atomic_dec(&user->sigpending);
} else {
INIT_LIST_HEAD(&q->list);
q->flags = 0;
- q->user = get_uid(t->user);
+ q->user = get_uid(user);
}
return(q);
}
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 7a3b2e75f040..d7306d0f3dfc 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -49,6 +49,7 @@ cond_syscall(compat_sys_get_robust_list);
cond_syscall(sys_epoll_create);
cond_syscall(sys_epoll_ctl);
cond_syscall(sys_epoll_wait);
+cond_syscall(sys_epoll_pwait);
cond_syscall(sys_semget);
cond_syscall(sys_semop);
cond_syscall(sys_semtimedop);
@@ -134,6 +135,7 @@ cond_syscall(sys_madvise);
cond_syscall(sys_mremap);
cond_syscall(sys_remap_file_pages);
cond_syscall(compat_sys_move_pages);
+cond_syscall(compat_sys_migrate_pages);
/* block-layer dependent */
cond_syscall(sys_bdflush);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8020fb273c4f..8bff2c18fb5a 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -136,8 +136,10 @@ static int parse_table(int __user *, int, void __user *, size_t __user *,
static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
void __user *buffer, size_t *lenp, loff_t *ppos);
+#ifdef CONFIG_PROC_SYSCTL
static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
void __user *buffer, size_t *lenp, loff_t *ppos);
+#endif
static ctl_table root_table[];
static struct ctl_table_header root_table_header =
@@ -542,6 +544,7 @@ static ctl_table kern_table[] = {
.proc_handler = &proc_dointvec,
},
#endif
+#ifdef CONFIG_PROC_SYSCTL
{
.ctl_name = KERN_CADPID,
.procname = "cad_pid",
@@ -550,6 +553,7 @@ static ctl_table kern_table[] = {
.mode = 0600,
.proc_handler = &proc_do_cad_pid,
},
+#endif
{
.ctl_name = KERN_MAX_THREADS,
.procname = "threads-max",
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 5d6a8c54ee85..f45c5e70773c 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -77,7 +77,8 @@ static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp,
/*
* If new attributes are added, please revisit this allocation
*/
- skb = nlmsg_new(genlmsg_total_size(size), GFP_KERNEL);
+ size = nlmsg_total_size(genlmsg_total_size(size));
+ skb = nlmsg_new(size, GFP_KERNEL);
if (!skb)
return -ENOMEM;
@@ -174,21 +175,19 @@ static void send_cpu_listeners(struct sk_buff *skb, unsigned int cpu)
up_write(&listeners->sem);
}
-static int fill_pid(pid_t pid, struct task_struct *pidtsk,
+static int fill_pid(pid_t pid, struct task_struct *tsk,
struct taskstats *stats)
{
int rc = 0;
- struct task_struct *tsk = pidtsk;
- if (!pidtsk) {
- read_lock(&tasklist_lock);
+ if (!tsk) {
+ rcu_read_lock();
tsk = find_task_by_pid(pid);
- if (!tsk) {
- read_unlock(&tasklist_lock);
+ if (tsk)
+ get_task_struct(tsk);
+ rcu_read_unlock();
+ if (!tsk)
return -ESRCH;
- }
- get_task_struct(tsk);
- read_unlock(&tasklist_lock);
} else
get_task_struct(tsk);
@@ -214,39 +213,30 @@ static int fill_pid(pid_t pid, struct task_struct *pidtsk,
}
-static int fill_tgid(pid_t tgid, struct task_struct *tgidtsk,
+static int fill_tgid(pid_t tgid, struct task_struct *first,
struct taskstats *stats)
{
- struct task_struct *tsk, *first;
+ struct task_struct *tsk;
unsigned long flags;
+ int rc = -ESRCH;
/*
* Add additional stats from live tasks except zombie thread group
* leaders who are already counted with the dead tasks
*/
- first = tgidtsk;
- if (!first) {
- read_lock(&tasklist_lock);
+ rcu_read_lock();
+ if (!first)
first = find_task_by_pid(tgid);
- if (!first) {
- read_unlock(&tasklist_lock);
- return -ESRCH;
- }
- get_task_struct(first);
- read_unlock(&tasklist_lock);
- } else
- get_task_struct(first);
- /* Start with stats from dead tasks */
- spin_lock_irqsave(&first->signal->stats_lock, flags);
+ if (!first || !lock_task_sighand(first, &flags))
+ goto out;
+
if (first->signal->stats)
memcpy(stats, first->signal->stats, sizeof(*stats));
- spin_unlock_irqrestore(&first->signal->stats_lock, flags);
tsk = first;
- read_lock(&tasklist_lock);
do {
- if (tsk->exit_state == EXIT_ZOMBIE && thread_group_leader(tsk))
+ if (tsk->exit_state)
continue;
/*
* Accounting subsystem can call its functions here to
@@ -257,15 +247,18 @@ static int fill_tgid(pid_t tgid, struct task_struct *tgidtsk,
delayacct_add_tsk(stats, tsk);
} while_each_thread(first, tsk);
- read_unlock(&tasklist_lock);
- stats->version = TASKSTATS_VERSION;
+ unlock_task_sighand(first, &flags);
+ rc = 0;
+out:
+ rcu_read_unlock();
+
+ stats->version = TASKSTATS_VERSION;
/*
* Accounting subsytems can also add calls here to modify
* fields of taskstats.
*/
-
- return 0;
+ return rc;
}
@@ -273,7 +266,7 @@ static void fill_tgid_exit(struct task_struct *tsk)
{
unsigned long flags;
- spin_lock_irqsave(&tsk->signal->stats_lock, flags);
+ spin_lock_irqsave(&tsk->sighand->siglock, flags);
if (!tsk->signal->stats)
goto ret;
@@ -285,7 +278,7 @@ static void fill_tgid_exit(struct task_struct *tsk)
*/
delayacct_add_tsk(tsk->signal->stats, tsk);
ret:
- spin_unlock_irqrestore(&tsk->signal->stats_lock, flags);
+ spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
return;
}
@@ -419,7 +412,7 @@ static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
return send_reply(rep_skb, info->snd_pid);
nla_put_failure:
- return genlmsg_cancel(rep_skb, reply);
+ rc = genlmsg_cancel(rep_skb, reply);
err:
nlmsg_free(rep_skb);
return rc;
@@ -461,24 +454,26 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats,
size_t size;
int is_thread_group;
struct nlattr *na;
- unsigned long flags;
- if (!family_registered || !tidstats)
+ if (!family_registered)
return;
- spin_lock_irqsave(&tsk->signal->stats_lock, flags);
- is_thread_group = tsk->signal->stats ? 1 : 0;
- spin_unlock_irqrestore(&tsk->signal->stats_lock, flags);
-
- rc = 0;
/*
* Size includes space for nested attributes
*/
size = nla_total_size(sizeof(u32)) +
nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
- if (is_thread_group)
- size = 2 * size; /* PID + STATS + TGID + STATS */
+ is_thread_group = (tsk->signal->stats != NULL);
+ if (is_thread_group) {
+ /* PID + STATS + TGID + STATS */
+ size = 2 * size;
+ /* fill the tsk->signal->stats structure */
+ fill_tgid_exit(tsk);
+ }
+
+ if (!tidstats)
+ return;
rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, &reply, size);
if (rc < 0)
@@ -498,11 +493,8 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats,
goto send;
/*
- * tsk has/had a thread group so fill the tsk->signal->stats structure
* Doesn't matter if tsk is the leader or the last group member leaving
*/
-
- fill_tgid_exit(tsk);
if (!group_dead)
goto send;
@@ -519,7 +511,6 @@ send:
nla_put_failure:
genlmsg_cancel(rep_skb, reply);
- goto ret;
err_skb:
nlmsg_free(rep_skb);
ret:
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 126bb30c4afe..a99b2a6e6a07 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -57,7 +57,7 @@ static cycle_t jiffies_read(void)
struct clocksource clocksource_jiffies = {
.name = "jiffies",
- .rating = 0, /* lowest rating*/
+ .rating = 1, /* lowest valid rating*/
.read = jiffies_read,
.mask = 0xffffffff, /*32bits*/
.mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 47195fa0ec4f..3afeaa3a73f9 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -161,9 +161,9 @@ void second_overflow(void)
time_adjust += MAX_TICKADJ;
tick_length -= MAX_TICKADJ_SCALED;
} else {
- time_adjust = 0;
tick_length += (s64)(time_adjust * NSEC_PER_USEC /
HZ) << TICK_LENGTH_SHIFT;
+ time_adjust = 0;
}
}
}
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index db443221ba5b..96f77013d3f0 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -36,7 +36,7 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk)
/* calculate task elapsed time in timespec */
do_posix_clock_monotonic_gettime(&uptime);
- ts = timespec_sub(uptime, current->group_leader->start_time);
+ ts = timespec_sub(uptime, tsk->start_time);
/* rebase elapsed time to usec */
ac_etime = timespec_to_ns(&ts);
do_div(ac_etime, NSEC_PER_USEC);
@@ -58,7 +58,10 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk)
stats->ac_uid = tsk->uid;
stats->ac_gid = tsk->gid;
stats->ac_pid = tsk->pid;
- stats->ac_ppid = (tsk->parent) ? tsk->parent->pid : 0;
+ rcu_read_lock();
+ stats->ac_ppid = pid_alive(tsk) ?
+ rcu_dereference(tsk->real_parent)->tgid : 0;
+ rcu_read_unlock();
stats->ac_utime = cputime_to_msecs(tsk->utime) * USEC_PER_MSEC;
stats->ac_stime = cputime_to_msecs(tsk->stime) * USEC_PER_MSEC;
stats->ac_minflt = tsk->min_flt;
@@ -77,13 +80,17 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk)
*/
void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
{
+ struct mm_struct *mm;
+
/* convert pages-jiffies to Mbyte-usec */
stats->coremem = jiffies_to_usecs(p->acct_rss_mem1) * PAGE_SIZE / MB;
stats->virtmem = jiffies_to_usecs(p->acct_vm_mem1) * PAGE_SIZE / MB;
- if (p->mm) {
+ mm = get_task_mm(p);
+ if (mm) {
/* adjust to KB unit */
- stats->hiwater_rss = p->mm->hiwater_rss * PAGE_SIZE / KB;
- stats->hiwater_vm = p->mm->hiwater_vm * PAGE_SIZE / KB;
+ stats->hiwater_rss = mm->hiwater_rss * PAGE_SIZE / KB;
+ stats->hiwater_vm = mm->hiwater_vm * PAGE_SIZE / KB;
+ mmput(mm);
}
stats->read_char = p->rchar;
stats->write_char = p->wchar;
diff --git a/kernel/unwind.c b/kernel/unwind.c
index 2e2368607aab..f7e50d16dbf6 100644
--- a/kernel/unwind.c
+++ b/kernel/unwind.c
@@ -11,13 +11,15 @@
#include <linux/unwind.h>
#include <linux/module.h>
-#include <linux/delay.h>
+#include <linux/bootmem.h>
+#include <linux/sort.h>
#include <linux/stop_machine.h>
#include <asm/sections.h>
#include <asm/uaccess.h>
#include <asm/unaligned.h>
extern char __start_unwind[], __end_unwind[];
+extern const u8 __start_unwind_hdr[], __end_unwind_hdr[];
#define MAX_STACK_DEPTH 8
@@ -100,6 +102,8 @@ static struct unwind_table {
} core, init;
const void *address;
unsigned long size;
+ const unsigned char *header;
+ unsigned long hdrsz;
struct unwind_table *link;
const char *name;
} root_table;
@@ -145,6 +149,10 @@ static struct unwind_table *find_table(unsigned long pc)
return table;
}
+static unsigned long read_pointer(const u8 **pLoc,
+ const void *end,
+ signed ptrType);
+
static void init_unwind_table(struct unwind_table *table,
const char *name,
const void *core_start,
@@ -152,14 +160,30 @@ static void init_unwind_table(struct unwind_table *table,
const void *init_start,
unsigned long init_size,
const void *table_start,
- unsigned long table_size)
+ unsigned long table_size,
+ const u8 *header_start,
+ unsigned long header_size)
{
+ const u8 *ptr = header_start + 4;
+ const u8 *end = header_start + header_size;
+
table->core.pc = (unsigned long)core_start;
table->core.range = core_size;
table->init.pc = (unsigned long)init_start;
table->init.range = init_size;
table->address = table_start;
table->size = table_size;
+ /* See if the linker provided table looks valid. */
+ if (header_size <= 4
+ || header_start[0] != 1
+ || (void *)read_pointer(&ptr, end, header_start[1]) != table_start
+ || header_start[2] == DW_EH_PE_omit
+ || read_pointer(&ptr, end, header_start[2]) <= 0
+ || header_start[3] == DW_EH_PE_omit)
+ header_start = NULL;
+ table->hdrsz = header_size;
+ smp_wmb();
+ table->header = header_start;
table->link = NULL;
table->name = name;
}
@@ -169,7 +193,143 @@ void __init unwind_init(void)
init_unwind_table(&root_table, "kernel",
_text, _end - _text,
NULL, 0,
- __start_unwind, __end_unwind - __start_unwind);
+ __start_unwind, __end_unwind - __start_unwind,
+ __start_unwind_hdr, __end_unwind_hdr - __start_unwind_hdr);
+}
+
+static const u32 bad_cie, not_fde;
+static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *);
+static signed fde_pointer_type(const u32 *cie);
+
+struct eh_frame_hdr_table_entry {
+ unsigned long start, fde;
+};
+
+static int cmp_eh_frame_hdr_table_entries(const void *p1, const void *p2)
+{
+ const struct eh_frame_hdr_table_entry *e1 = p1;
+ const struct eh_frame_hdr_table_entry *e2 = p2;
+
+ return (e1->start > e2->start) - (e1->start < e2->start);
+}
+
+static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size)
+{
+ struct eh_frame_hdr_table_entry *e1 = p1;
+ struct eh_frame_hdr_table_entry *e2 = p2;
+ unsigned long v;
+
+ v = e1->start;
+ e1->start = e2->start;
+ e2->start = v;
+ v = e1->fde;
+ e1->fde = e2->fde;
+ e2->fde = v;
+}
+
+static void __init setup_unwind_table(struct unwind_table *table,
+ void *(*alloc)(unsigned long))
+{
+ const u8 *ptr;
+ unsigned long tableSize = table->size, hdrSize;
+ unsigned n;
+ const u32 *fde;
+ struct {
+ u8 version;
+ u8 eh_frame_ptr_enc;
+ u8 fde_count_enc;
+ u8 table_enc;
+ unsigned long eh_frame_ptr;
+ unsigned int fde_count;
+ struct eh_frame_hdr_table_entry table[];
+ } __attribute__((__packed__)) *header;
+
+ if (table->header)
+ return;
+
+ if (table->hdrsz)
+ printk(KERN_WARNING ".eh_frame_hdr for '%s' present but unusable\n",
+ table->name);
+
+ if (tableSize & (sizeof(*fde) - 1))
+ return;
+
+ for (fde = table->address, n = 0;
+ tableSize > sizeof(*fde) && tableSize - sizeof(*fde) >= *fde;
+ tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) {
+ const u32 *cie = cie_for_fde(fde, table);
+ signed ptrType;
+
+ if (cie == &not_fde)
+ continue;
+ if (cie == NULL
+ || cie == &bad_cie
+ || (ptrType = fde_pointer_type(cie)) < 0)
+ return;
+ ptr = (const u8 *)(fde + 2);
+ if (!read_pointer(&ptr,
+ (const u8 *)(fde + 1) + *fde,
+ ptrType))
+ return;
+ ++n;
+ }
+
+ if (tableSize || !n)
+ return;
+
+ hdrSize = 4 + sizeof(unsigned long) + sizeof(unsigned int)
+ + 2 * n * sizeof(unsigned long);
+ header = alloc(hdrSize);
+ if (!header)
+ return;
+ header->version = 1;
+ header->eh_frame_ptr_enc = DW_EH_PE_abs|DW_EH_PE_native;
+ header->fde_count_enc = DW_EH_PE_abs|DW_EH_PE_data4;
+ header->table_enc = DW_EH_PE_abs|DW_EH_PE_native;
+ put_unaligned((unsigned long)table->address, &header->eh_frame_ptr);
+ BUILD_BUG_ON(offsetof(typeof(*header), fde_count)
+ % __alignof(typeof(header->fde_count)));
+ header->fde_count = n;
+
+ BUILD_BUG_ON(offsetof(typeof(*header), table)
+ % __alignof(typeof(*header->table)));
+ for (fde = table->address, tableSize = table->size, n = 0;
+ tableSize;
+ tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) {
+ const u32 *cie = fde + 1 - fde[1] / sizeof(*fde);
+
+ if (!fde[1])
+ continue; /* this is a CIE */
+ ptr = (const u8 *)(fde + 2);
+ header->table[n].start = read_pointer(&ptr,
+ (const u8 *)(fde + 1) + *fde,
+ fde_pointer_type(cie));
+ header->table[n].fde = (unsigned long)fde;
+ ++n;
+ }
+ WARN_ON(n != header->fde_count);
+
+ sort(header->table,
+ n,
+ sizeof(*header->table),
+ cmp_eh_frame_hdr_table_entries,
+ swap_eh_frame_hdr_table_entries);
+
+ table->hdrsz = hdrSize;
+ smp_wmb();
+ table->header = (const void *)header;
+}
+
+static void *__init balloc(unsigned long sz)
+{
+ return __alloc_bootmem_nopanic(sz,
+ sizeof(unsigned int),
+ __pa(MAX_DMA_ADDRESS));
+}
+
+void __init unwind_setup(void)
+{
+ setup_unwind_table(&root_table, balloc);
}
#ifdef CONFIG_MODULES
@@ -193,7 +353,8 @@ void *unwind_add_table(struct module *module,
init_unwind_table(table, module->name,
module->module_core, module->core_size,
module->module_init, module->init_size,
- table_start, table_size);
+ table_start, table_size,
+ NULL, 0);
if (last_table)
last_table->link = table;
@@ -303,6 +464,26 @@ static sleb128_t get_sleb128(const u8 **pcur, const u8 *end)
return value;
}
+static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *table)
+{
+ const u32 *cie;
+
+ if (!*fde || (*fde & (sizeof(*fde) - 1)))
+ return &bad_cie;
+ if (!fde[1])
+ return &not_fde; /* this is a CIE */
+ if ((fde[1] & (sizeof(*fde) - 1))
+ || fde[1] > (unsigned long)(fde + 1) - (unsigned long)table->address)
+ return NULL; /* this is not a valid FDE */
+ cie = fde + 1 - fde[1] / sizeof(*fde);
+ if (*cie <= sizeof(*cie) + 4
+ || *cie >= fde[1] - sizeof(*fde)
+ || (*cie & (sizeof(*cie) - 1))
+ || cie[1])
+ return NULL; /* this is not a (valid) CIE */
+ return cie;
+}
+
static unsigned long read_pointer(const u8 **pLoc,
const void *end,
signed ptrType)
@@ -610,49 +791,108 @@ int unwind(struct unwind_frame_info *frame)
unsigned i;
signed ptrType = -1;
uleb128_t retAddrReg = 0;
- struct unwind_table *table;
+ const struct unwind_table *table;
struct unwind_state state;
if (UNW_PC(frame) == 0)
return -EINVAL;
if ((table = find_table(pc)) != NULL
&& !(table->size & (sizeof(*fde) - 1))) {
- unsigned long tableSize = table->size;
-
- for (fde = table->address;
- tableSize > sizeof(*fde) && tableSize - sizeof(*fde) >= *fde;
- tableSize -= sizeof(*fde) + *fde,
- fde += 1 + *fde / sizeof(*fde)) {
- if (!*fde || (*fde & (sizeof(*fde) - 1)))
- break;
- if (!fde[1])
- continue; /* this is a CIE */
- if ((fde[1] & (sizeof(*fde) - 1))
- || fde[1] > (unsigned long)(fde + 1)
- - (unsigned long)table->address)
- continue; /* this is not a valid FDE */
- cie = fde + 1 - fde[1] / sizeof(*fde);
- if (*cie <= sizeof(*cie) + 4
- || *cie >= fde[1] - sizeof(*fde)
- || (*cie & (sizeof(*cie) - 1))
- || cie[1]
- || (ptrType = fde_pointer_type(cie)) < 0) {
- cie = NULL; /* this is not a (valid) CIE */
- continue;
+ const u8 *hdr = table->header;
+ unsigned long tableSize;
+
+ smp_rmb();
+ if (hdr && hdr[0] == 1) {
+ switch(hdr[3] & DW_EH_PE_FORM) {
+ case DW_EH_PE_native: tableSize = sizeof(unsigned long); break;
+ case DW_EH_PE_data2: tableSize = 2; break;
+ case DW_EH_PE_data4: tableSize = 4; break;
+ case DW_EH_PE_data8: tableSize = 8; break;
+ default: tableSize = 0; break;
+ }
+ ptr = hdr + 4;
+ end = hdr + table->hdrsz;
+ if (tableSize
+ && read_pointer(&ptr, end, hdr[1])
+ == (unsigned long)table->address
+ && (i = read_pointer(&ptr, end, hdr[2])) > 0
+ && i == (end - ptr) / (2 * tableSize)
+ && !((end - ptr) % (2 * tableSize))) {
+ do {
+ const u8 *cur = ptr + (i / 2) * (2 * tableSize);
+
+ startLoc = read_pointer(&cur,
+ cur + tableSize,
+ hdr[3]);
+ if (pc < startLoc)
+ i /= 2;
+ else {
+ ptr = cur - tableSize;
+ i = (i + 1) / 2;
+ }
+ } while (startLoc && i > 1);
+ if (i == 1
+ && (startLoc = read_pointer(&ptr,
+ ptr + tableSize,
+ hdr[3])) != 0
+ && pc >= startLoc)
+ fde = (void *)read_pointer(&ptr,
+ ptr + tableSize,
+ hdr[3]);
}
+ }
+
+ if (fde != NULL) {
+ cie = cie_for_fde(fde, table);
ptr = (const u8 *)(fde + 2);
- startLoc = read_pointer(&ptr,
- (const u8 *)(fde + 1) + *fde,
- ptrType);
- endLoc = startLoc
- + read_pointer(&ptr,
- (const u8 *)(fde + 1) + *fde,
- ptrType & DW_EH_PE_indirect
- ? ptrType
- : ptrType & (DW_EH_PE_FORM|DW_EH_PE_signed));
- if (pc >= startLoc && pc < endLoc)
- break;
- cie = NULL;
+ if(cie != NULL
+ && cie != &bad_cie
+ && cie != &not_fde
+ && (ptrType = fde_pointer_type(cie)) >= 0
+ && read_pointer(&ptr,
+ (const u8 *)(fde + 1) + *fde,
+ ptrType) == startLoc) {
+ if (!(ptrType & DW_EH_PE_indirect))
+ ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed;
+ endLoc = startLoc
+ + read_pointer(&ptr,
+ (const u8 *)(fde + 1) + *fde,
+ ptrType);
+ if(pc >= endLoc)
+ fde = NULL;
+ } else
+ fde = NULL;
+ }
+ if (fde == NULL) {
+ for (fde = table->address, tableSize = table->size;
+ cie = NULL, tableSize > sizeof(*fde)
+ && tableSize - sizeof(*fde) >= *fde;
+ tableSize -= sizeof(*fde) + *fde,
+ fde += 1 + *fde / sizeof(*fde)) {
+ cie = cie_for_fde(fde, table);
+ if (cie == &bad_cie) {
+ cie = NULL;
+ break;
+ }
+ if (cie == NULL
+ || cie == &not_fde
+ || (ptrType = fde_pointer_type(cie)) < 0)
+ continue;
+ ptr = (const u8 *)(fde + 2);
+ startLoc = read_pointer(&ptr,
+ (const u8 *)(fde + 1) + *fde,
+ ptrType);
+ if (!startLoc)
+ continue;
+ if (!(ptrType & DW_EH_PE_indirect))
+ ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed;
+ endLoc = startLoc
+ + read_pointer(&ptr,
+ (const u8 *)(fde + 1) + *fde,
+ ptrType);
+ if (pc >= startLoc && pc < endLoc)
+ break;
+ }
}
}
if (cie != NULL) {
diff --git a/kernel/user.c b/kernel/user.c
index 6408c0424291..220e586127a0 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -187,6 +187,17 @@ void switch_uid(struct user_struct *new_user)
atomic_dec(&old_user->processes);
switch_uid_keyring(new_user);
current->user = new_user;
+
+ /*
+ * We need to synchronize with __sigqueue_alloc()
+ * doing a get_uid(p->user).. If that saw the old
+ * user value, we need to wait until it has exited
+ * its critical region before we can free the old
+ * structure.
+ */
+ smp_mb();
+ spin_unlock_wait(&current->sighand->siglock);
+
free_uid(old_user);
suid_keys(current);
}
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index cfc737bffe6d..17c2f03d2c27 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -28,6 +28,7 @@
#include <linux/notifier.h>
#include <linux/kthread.h>
#include <linux/hardirq.h>
+#include <linux/mempolicy.h>
/*
* The per-CPU workqueue (if single thread, we always use the first
@@ -98,7 +99,7 @@ static void __queue_work(struct cpu_workqueue_struct *cwq,
* @wq: workqueue to use
* @work: work to queue
*
- * Returns non-zero if it was successfully added.
+ * Returns 0 if @work was already on a queue, non-zero otherwise.
*
* We queue the work to the CPU it was submitted, but there is no
* guarantee that it will be processed by that CPU.
@@ -137,7 +138,7 @@ static void delayed_work_timer_fn(unsigned long __data)
* @work: work to queue
* @delay: number of jiffies to wait before queueing
*
- * Returns non-zero if it was successfully added.
+ * Returns 0 if @work was already on a queue, non-zero otherwise.
*/
int fastcall queue_delayed_work(struct workqueue_struct *wq,
struct work_struct *work, unsigned long delay)
@@ -168,7 +169,7 @@ EXPORT_SYMBOL_GPL(queue_delayed_work);
* @work: work to queue
* @delay: number of jiffies to wait before queueing
*
- * Returns non-zero if it was successfully added.
+ * Returns 0 if @work was already on a queue, non-zero otherwise.
*/
int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
struct work_struct *work, unsigned long delay)
@@ -245,6 +246,12 @@ static int worker_thread(void *__cwq)
sigprocmask(SIG_BLOCK, &blocked, NULL);
flush_signals(current);
+ /*
+ * We inherited MPOL_INTERLEAVE from the booting kernel.
+ * Set MPOL_DEFAULT to insure node local allocations.
+ */
+ numa_default_policy();
+
/* SIG_IGN makes children autoreap: see do_notify_parent(). */
sa.sa.sa_handler = SIG_IGN;
sa.sa.sa_flags = 0;
OpenPOWER on IntegriCloud