summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile4
-rw-r--r--kernel/cgroup.c21
-rw-r--r--kernel/cpu.c2
-rw-r--r--kernel/cpuset.c33
-rw-r--r--kernel/irq/internals.h2
-rw-r--r--kernel/irq/manage.c68
-rw-r--r--kernel/irq/migration.c11
-rw-r--r--kernel/irq/proc.c2
-rw-r--r--kernel/kallsyms.c17
-rw-r--r--kernel/lockdep.c4
-rw-r--r--kernel/panic.c1
-rw-r--r--kernel/posix-cpu-timers.c9
-rw-r--r--kernel/power/main.c2
-rw-r--r--kernel/power/swap.c2
-rw-r--r--kernel/profile.c6
-rw-r--r--kernel/ptrace.c4
-rw-r--r--kernel/relay.c9
-rw-r--r--kernel/sched.c18
-rw-r--r--kernel/sched_debug.c5
-rw-r--r--kernel/sched_stats.h15
-rw-r--r--kernel/softlockup.c2
-rw-r--r--kernel/sys_ni.c2
-rw-r--r--kernel/sysctl.c10
-rw-r--r--kernel/time/timekeeping.c22
-rw-r--r--kernel/trace/ftrace.c147
-rw-r--r--kernel/trace/ring_buffer.c118
-rw-r--r--kernel/trace/trace.c1
-rw-r--r--kernel/trace/trace_mmiotrace.c16
-rw-r--r--kernel/trace/trace_stack.c24
29 files changed, 397 insertions, 180 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 9a3ec66a9d84..19fad003b19d 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -11,8 +11,6 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \
hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
notifier.o ksysfs.o pm_qos_params.o sched_clock.o
-CFLAGS_REMOVE_sched.o = -mno-spe
-
ifdef CONFIG_FUNCTION_TRACER
# Do not trace debug files and internal ftrace files
CFLAGS_REMOVE_lockdep.o = -pg
@@ -21,7 +19,7 @@ CFLAGS_REMOVE_mutex-debug.o = -pg
CFLAGS_REMOVE_rtmutex-debug.o = -pg
CFLAGS_REMOVE_cgroup-debug.o = -pg
CFLAGS_REMOVE_sched_clock.o = -pg
-CFLAGS_REMOVE_sched.o = -mno-spe -pg
+CFLAGS_REMOVE_sched.o = -pg
endif
obj-$(CONFIG_FREEZER) += freezer.o
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 358e77564e6f..fe00b3b983a8 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2039,10 +2039,13 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
struct cgroup *cgrp;
struct cgroup_iter it;
struct task_struct *tsk;
+
/*
- * Validate dentry by checking the superblock operations
+ * Validate dentry by checking the superblock operations,
+ * and make sure it's a directory.
*/
- if (dentry->d_sb->s_op != &cgroup_ops)
+ if (dentry->d_sb->s_op != &cgroup_ops ||
+ !S_ISDIR(dentry->d_inode->i_mode))
goto err;
ret = 0;
@@ -2472,10 +2475,7 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
mutex_unlock(&cgroup_mutex);
return -EBUSY;
}
-
- parent = cgrp->parent;
- root = cgrp->root;
- sb = root->sb;
+ mutex_unlock(&cgroup_mutex);
/*
* Call pre_destroy handlers of subsys. Notify subsystems
@@ -2483,7 +2483,14 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
*/
cgroup_call_pre_destroy(cgrp);
- if (cgroup_has_css_refs(cgrp)) {
+ mutex_lock(&cgroup_mutex);
+ parent = cgrp->parent;
+ root = cgrp->root;
+ sb = root->sb;
+
+ if (atomic_read(&cgrp->count)
+ || !list_empty(&cgrp->children)
+ || cgroup_has_css_refs(cgrp)) {
mutex_unlock(&cgroup_mutex);
return -EBUSY;
}
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 5a732c5ef08b..8ea32e8d68b0 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -462,7 +462,7 @@ out:
* It must be called by the arch code on the new cpu, before the new cpu
* enables interrupts and before the "boot" cpu returns from __cpu_up().
*/
-void notify_cpu_starting(unsigned int cpu)
+void __cpuinit notify_cpu_starting(unsigned int cpu)
{
unsigned long val = CPU_STARTING;
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 3e00526f52ec..96c0ba13b8cd 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -36,6 +36,7 @@
#include <linux/list.h>
#include <linux/mempolicy.h>
#include <linux/mm.h>
+#include <linux/memory.h>
#include <linux/module.h>
#include <linux/mount.h>
#include <linux/namei.h>
@@ -584,10 +585,9 @@ static int generate_sched_domains(cpumask_t **domains,
int i, j, k; /* indices for partition finding loops */
cpumask_t *doms; /* resulting partition; i.e. sched domains */
struct sched_domain_attr *dattr; /* attributes for custom domains */
- int ndoms; /* number of sched domains in result */
+ int ndoms = 0; /* number of sched domains in result */
int nslot; /* next empty doms[] cpumask_t slot */
- ndoms = 0;
doms = NULL;
dattr = NULL;
csa = NULL;
@@ -674,10 +674,8 @@ restart:
* Convert <csn, csa> to <ndoms, doms> and populate cpu masks.
*/
doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL);
- if (!doms) {
- ndoms = 0;
+ if (!doms)
goto done;
- }
/*
* The rest of the code, including the scheduler, can deal with
@@ -732,6 +730,13 @@ restart:
done:
kfree(csa);
+ /*
+ * Fallback to the default domain if kmalloc() failed.
+ * See comments in partition_sched_domains().
+ */
+ if (doms == NULL)
+ ndoms = 1;
+
*domains = doms;
*attributes = dattr;
return ndoms;
@@ -2011,12 +2016,23 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
* Call this routine anytime after node_states[N_HIGH_MEMORY] changes.
* See also the previous routine cpuset_track_online_cpus().
*/
-void cpuset_track_online_nodes(void)
+static int cpuset_track_online_nodes(struct notifier_block *self,
+ unsigned long action, void *arg)
{
cgroup_lock();
- top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
- scan_for_empty_cpusets(&top_cpuset);
+ switch (action) {
+ case MEM_ONLINE:
+ top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
+ break;
+ case MEM_OFFLINE:
+ top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
+ scan_for_empty_cpusets(&top_cpuset);
+ break;
+ default:
+ break;
+ }
cgroup_unlock();
+ return NOTIFY_OK;
}
#endif
@@ -2032,6 +2048,7 @@ void __init cpuset_init_smp(void)
top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
hotcpu_notifier(cpuset_track_online_cpus, 0);
+ hotplug_memory_notifier(cpuset_track_online_nodes, 10);
}
/**
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index c9767e641980..64c1c7253dae 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -25,6 +25,8 @@ static inline void unregister_handler_proc(unsigned int irq,
struct irqaction *action) { }
#endif
+extern int irq_select_affinity_usr(unsigned int irq);
+
/*
* Debugging printout:
*/
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index c498a1b8c621..801addda3c43 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -82,24 +82,27 @@ int irq_can_set_affinity(unsigned int irq)
int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
{
struct irq_desc *desc = irq_to_desc(irq);
+ unsigned long flags;
if (!desc->chip->set_affinity)
return -EINVAL;
+ spin_lock_irqsave(&desc->lock, flags);
+
#ifdef CONFIG_GENERIC_PENDING_IRQ
if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) {
- unsigned long flags;
-
- spin_lock_irqsave(&desc->lock, flags);
desc->affinity = cpumask;
desc->chip->set_affinity(irq, cpumask);
- spin_unlock_irqrestore(&desc->lock, flags);
- } else
- set_pending_irq(irq, cpumask);
+ } else {
+ desc->status |= IRQ_MOVE_PENDING;
+ desc->pending_mask = cpumask;
+ }
#else
desc->affinity = cpumask;
desc->chip->set_affinity(irq, cpumask);
#endif
+ desc->status |= IRQ_AFFINITY_SET;
+ spin_unlock_irqrestore(&desc->lock, flags);
return 0;
}
@@ -107,24 +110,59 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
/*
* Generic version of the affinity autoselector.
*/
-int irq_select_affinity(unsigned int irq)
+int do_irq_select_affinity(unsigned int irq, struct irq_desc *desc)
{
cpumask_t mask;
- struct irq_desc *desc;
if (!irq_can_set_affinity(irq))
return 0;
cpus_and(mask, cpu_online_map, irq_default_affinity);
- desc = irq_to_desc(irq);
+ /*
+ * Preserve an userspace affinity setup, but make sure that
+ * one of the targets is online.
+ */
+ if (desc->status & (IRQ_AFFINITY_SET | IRQ_NO_BALANCING)) {
+ if (cpus_intersects(desc->affinity, cpu_online_map))
+ mask = desc->affinity;
+ else
+ desc->status &= ~IRQ_AFFINITY_SET;
+ }
+
desc->affinity = mask;
desc->chip->set_affinity(irq, mask);
return 0;
}
+#else
+static inline int do_irq_select_affinity(unsigned int irq, struct irq_desc *d)
+{
+ return irq_select_affinity(irq);
+}
#endif
+/*
+ * Called when affinity is set via /proc/irq
+ */
+int irq_select_affinity_usr(unsigned int irq)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&desc->lock, flags);
+ ret = do_irq_select_affinity(irq, desc);
+ spin_unlock_irqrestore(&desc->lock, flags);
+
+ return ret;
+}
+
+#else
+static inline int do_irq_select_affinity(int irq, struct irq_desc *desc)
+{
+ return 0;
+}
#endif
/**
@@ -327,7 +365,7 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
* IRQF_TRIGGER_* but the PIC does not support multiple
* flow-types?
*/
- pr_warning("No set_type function for IRQ %d (%s)\n", irq,
+ pr_debug("No set_type function for IRQ %d (%s)\n", irq,
chip ? (chip->name ? : "unknown") : "unknown");
return 0;
}
@@ -445,8 +483,12 @@ __setup_irq(unsigned int irq, struct irq_desc * desc, struct irqaction *new)
/* Undo nested disables: */
desc->depth = 1;
+ /* Exclude IRQ from balancing if requested */
+ if (new->flags & IRQF_NOBALANCING)
+ desc->status |= IRQ_NO_BALANCING;
+
/* Set default affinity mask once everything is setup */
- irq_select_affinity(irq);
+ do_irq_select_affinity(irq, desc);
} else if ((new->flags & IRQF_TRIGGER_MASK)
&& (new->flags & IRQF_TRIGGER_MASK)
@@ -459,10 +501,6 @@ __setup_irq(unsigned int irq, struct irq_desc * desc, struct irqaction *new)
*p = new;
- /* Exclude IRQ from balancing */
- if (new->flags & IRQF_NOBALANCING)
- desc->status |= IRQ_NO_BALANCING;
-
/* Reset broken irq detection when installing new handler */
desc->irq_count = 0;
desc->irqs_unhandled = 0;
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index 90b920d3f52b..9db681d95814 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -1,17 +1,6 @@
#include <linux/irq.h>
-void set_pending_irq(unsigned int irq, cpumask_t mask)
-{
- struct irq_desc *desc = irq_to_desc(irq);
- unsigned long flags;
-
- spin_lock_irqsave(&desc->lock, flags);
- desc->status |= IRQ_MOVE_PENDING;
- desc->pending_mask = mask;
- spin_unlock_irqrestore(&desc->lock, flags);
-}
-
void move_masked_irq(int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 4d161c70ba55..d257e7d6a8a4 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -62,7 +62,7 @@ static ssize_t irq_affinity_proc_write(struct file *file,
if (!cpus_intersects(new_value, cpu_online_map))
/* Special case for empty set - allow the architecture
code to set default SMP affinity. */
- return irq_select_affinity(irq) ? -EINVAL : count;
+ return irq_select_affinity_usr(irq) ? -EINVAL : count;
irq_set_affinity(irq, new_value);
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 5072cf1685a2..7b8b0f21a5b1 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -304,17 +304,24 @@ int sprint_symbol(char *buffer, unsigned long address)
char *modname;
const char *name;
unsigned long offset, size;
- char namebuf[KSYM_NAME_LEN];
+ int len;
- name = kallsyms_lookup(address, &size, &offset, &modname, namebuf);
+ name = kallsyms_lookup(address, &size, &offset, &modname, buffer);
if (!name)
return sprintf(buffer, "0x%lx", address);
+ if (name != buffer)
+ strcpy(buffer, name);
+ len = strlen(buffer);
+ buffer += len;
+
if (modname)
- return sprintf(buffer, "%s+%#lx/%#lx [%s]", name, offset,
- size, modname);
+ len += sprintf(buffer, "+%#lx/%#lx [%s]",
+ offset, size, modname);
else
- return sprintf(buffer, "%s+%#lx/%#lx", name, offset, size);
+ len += sprintf(buffer, "+%#lx/%#lx", offset, size);
+
+ return len;
}
/* Look up a kernel symbol and print it to the kernel messages. */
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 06e157119d2b..46a404173db2 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3276,10 +3276,10 @@ void __init lockdep_info(void)
{
printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n");
- printk("... MAX_LOCKDEP_SUBCLASSES: %lu\n", MAX_LOCKDEP_SUBCLASSES);
+ printk("... MAX_LOCKDEP_SUBCLASSES: %lu\n", MAX_LOCKDEP_SUBCLASSES);
printk("... MAX_LOCK_DEPTH: %lu\n", MAX_LOCK_DEPTH);
printk("... MAX_LOCKDEP_KEYS: %lu\n", MAX_LOCKDEP_KEYS);
- printk("... CLASSHASH_SIZE: %lu\n", CLASSHASH_SIZE);
+ printk("... CLASSHASH_SIZE: %lu\n", CLASSHASH_SIZE);
printk("... MAX_LOCKDEP_ENTRIES: %lu\n", MAX_LOCKDEP_ENTRIES);
printk("... MAX_LOCKDEP_CHAINS: %lu\n", MAX_LOCKDEP_CHAINS);
printk("... CHAINHASH_SIZE: %lu\n", CHAINHASH_SIZE);
diff --git a/kernel/panic.c b/kernel/panic.c
index 6513aac8e992..4d5088355bfe 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -167,6 +167,7 @@ static const struct tnt tnts[] = {
* 'M' - System experienced a machine check exception.
* 'B' - System has hit bad_page.
* 'U' - Userspace-defined naughtiness.
+ * 'D' - Kernel has oopsed before
* 'A' - ACPI table overridden.
* 'W' - Taint on warning.
* 'C' - modules from drivers/staging are loaded.
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 153dcb2639c3..4e5288a831de 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -311,7 +311,7 @@ static int cpu_clock_sample_group(const clockid_t which_clock,
struct task_cputime cputime;
thread_group_cputime(p, &cputime);
- switch (which_clock) {
+ switch (CPUCLOCK_WHICH(which_clock)) {
default:
return -EINVAL;
case CPUCLOCK_PROF:
@@ -1308,9 +1308,10 @@ static inline int task_cputime_expired(const struct task_cputime *sample,
*/
static inline int fastpath_timer_check(struct task_struct *tsk)
{
- struct signal_struct *sig = tsk->signal;
+ struct signal_struct *sig;
- if (unlikely(!sig))
+ /* tsk == current, ensure it is safe to use ->signal/sighand */
+ if (unlikely(tsk->exit_state))
return 0;
if (!task_cputime_zero(&tsk->cputime_expires)) {
@@ -1323,6 +1324,8 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
return 1;
}
+
+ sig = tsk->signal;
if (!task_cputime_zero(&sig->cputime_expires)) {
struct task_cputime group_sample;
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 19122cf6d827..b8f7ce9473e8 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -174,7 +174,7 @@ static void suspend_test_finish(const char *label)
* has some performance issues. The stack dump of a WARN_ON
* is more likely to get the right attention than a printk...
*/
- WARN_ON(msec > (TEST_SUSPEND_SECONDS * 1000));
+ WARN(msec > (TEST_SUSPEND_SECONDS * 1000), "Component: %s\n", label);
}
#else
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index b7713b53d07a..6da14358537c 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -633,7 +633,7 @@ void swsusp_close(fmode_t mode)
return;
}
- blkdev_put(resume_bdev, mode); /* move up */
+ blkdev_put(resume_bdev, mode);
}
static int swsusp_header_init(void)
diff --git a/kernel/profile.c b/kernel/profile.c
index 9830a037d8db..dc41827fbfee 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -351,7 +351,7 @@ out:
put_cpu();
}
-static int __devinit profile_cpu_callback(struct notifier_block *info,
+static int __cpuinit profile_cpu_callback(struct notifier_block *info,
unsigned long action, void *__cpu)
{
int node, cpu = (unsigned long)__cpu;
@@ -544,7 +544,7 @@ static const struct file_operations proc_profile_operations = {
};
#ifdef CONFIG_SMP
-static void __init profile_nop(void *unused)
+static inline void profile_nop(void *unused)
{
}
@@ -596,7 +596,7 @@ out_cleanup:
#define create_hash_tables() ({ 0; })
#endif
-int create_proc_profile(void)
+int __ref create_proc_profile(void) /* false positive from hotcpu_notifier */
{
struct proc_dir_entry *entry;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 1e68e4c39e2c..4c8bcd7dd8e0 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -612,7 +612,7 @@ int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data)
return (copied == sizeof(data)) ? 0 : -EIO;
}
-#if defined CONFIG_COMPAT && defined __ARCH_WANT_COMPAT_SYS_PTRACE
+#if defined CONFIG_COMPAT
#include <linux/compat.h>
int compat_ptrace_request(struct task_struct *child, compat_long_t request,
@@ -709,4 +709,4 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
unlock_kernel();
return ret;
}
-#endif /* CONFIG_COMPAT && __ARCH_WANT_COMPAT_SYS_PTRACE */
+#endif /* CONFIG_COMPAT */
diff --git a/kernel/relay.c b/kernel/relay.c
index 8d13a7855c08..32b0befdcb6a 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -400,7 +400,7 @@ void relay_reset(struct rchan *chan)
}
mutex_lock(&relay_channels_mutex);
- for_each_online_cpu(i)
+ for_each_possible_cpu(i)
if (chan->buf[i])
__relay_reset(chan->buf[i], 0);
mutex_unlock(&relay_channels_mutex);
@@ -611,10 +611,9 @@ struct rchan *relay_open(const char *base_filename,
return chan;
free_bufs:
- for_each_online_cpu(i) {
- if (!chan->buf[i])
- break;
- relay_close_buf(chan->buf[i]);
+ for_each_possible_cpu(i) {
+ if (chan->buf[i])
+ relay_close_buf(chan->buf[i]);
}
kref_put(&chan->kref, relay_destroy_channel);
diff --git a/kernel/sched.c b/kernel/sched.c
index c94baf2969e7..b7480fb5c3dc 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1453,9 +1453,10 @@ static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
static unsigned long cpu_avg_load_per_task(int cpu)
{
struct rq *rq = cpu_rq(cpu);
+ unsigned long nr_running = ACCESS_ONCE(rq->nr_running);
- if (rq->nr_running)
- rq->avg_load_per_task = rq->load.weight / rq->nr_running;
+ if (nr_running)
+ rq->avg_load_per_task = rq->load.weight / nr_running;
else
rq->avg_load_per_task = 0;
@@ -7789,13 +7790,14 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
*
* The passed in 'doms_new' should be kmalloc'd. This routine takes
* ownership of it and will kfree it when done with it. If the caller
- * failed the kmalloc call, then it can pass in doms_new == NULL,
- * and partition_sched_domains() will fallback to the single partition
- * 'fallback_doms', it also forces the domains to be rebuilt.
+ * failed the kmalloc call, then it can pass in doms_new == NULL &&
+ * ndoms_new == 1, and partition_sched_domains() will fallback to
+ * the single partition 'fallback_doms', it also forces the domains
+ * to be rebuilt.
*
- * If doms_new==NULL it will be replaced with cpu_online_map.
- * ndoms_new==0 is a special case for destroying existing domains.
- * It will not create the default domain.
+ * If doms_new == NULL it will be replaced with cpu_online_map.
+ * ndoms_new == 0 is a special case for destroying existing domains,
+ * and it will not create the default domain.
*
* Call with hotplug lock held
*/
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 48ecc51e7701..26ed8e3d1c15 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -423,10 +423,11 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
#undef __P
{
+ unsigned int this_cpu = raw_smp_processor_id();
u64 t0, t1;
- t0 = sched_clock();
- t1 = sched_clock();
+ t0 = cpu_clock(this_cpu);
+ t1 = cpu_clock(this_cpu);
SEQ_printf(m, "%-35s:%21Ld\n",
"clock-delta", (long long)(t1-t0));
}
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index ee71bec1da66..7dbf72a2b02c 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -298,9 +298,11 @@ static inline void account_group_user_time(struct task_struct *tsk,
{
struct signal_struct *sig;
- sig = tsk->signal;
- if (unlikely(!sig))
+ /* tsk == current, ensure it is safe to use ->signal */
+ if (unlikely(tsk->exit_state))
return;
+
+ sig = tsk->signal;
if (sig->cputime.totals) {
struct task_cputime *times;
@@ -325,9 +327,11 @@ static inline void account_group_system_time(struct task_struct *tsk,
{
struct signal_struct *sig;
- sig = tsk->signal;
- if (unlikely(!sig))
+ /* tsk == current, ensure it is safe to use ->signal */
+ if (unlikely(tsk->exit_state))
return;
+
+ sig = tsk->signal;
if (sig->cputime.totals) {
struct task_cputime *times;
@@ -353,8 +357,11 @@ static inline void account_group_exec_runtime(struct task_struct *tsk,
struct signal_struct *sig;
sig = tsk->signal;
+ /* see __exit_signal()->task_rq_unlock_wait() */
+ barrier();
if (unlikely(!sig))
return;
+
if (sig->cputime.totals) {
struct task_cputime *times;
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 3953e4aed733..dc0b3be6b7d5 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -188,7 +188,7 @@ static void check_hung_task(struct task_struct *t, unsigned long now)
if ((long)(now - t->last_switch_timestamp) <
sysctl_hung_task_timeout_secs)
return;
- if (sysctl_hung_task_warnings < 0)
+ if (!sysctl_hung_task_warnings)
return;
sysctl_hung_task_warnings--;
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index a77b27b11b04..e14a23281707 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -31,7 +31,7 @@ cond_syscall(sys_socketpair);
cond_syscall(sys_bind);
cond_syscall(sys_listen);
cond_syscall(sys_accept);
-cond_syscall(sys_paccept);
+cond_syscall(sys_accept4);
cond_syscall(sys_connect);
cond_syscall(sys_getsockname);
cond_syscall(sys_getpeername);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 9d048fa2d902..3d56fe7570da 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -176,6 +176,9 @@ extern struct ctl_table random_table[];
#ifdef CONFIG_INOTIFY_USER
extern struct ctl_table inotify_table[];
#endif
+#ifdef CONFIG_EPOLL
+extern struct ctl_table epoll_table[];
+#endif
#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
int sysctl_legacy_va_layout;
@@ -1325,6 +1328,13 @@ static struct ctl_table fs_table[] = {
.child = inotify_table,
},
#endif
+#ifdef CONFIG_EPOLL
+ {
+ .procname = "epoll",
+ .mode = 0555,
+ .child = epoll_table,
+ },
+#endif
#endif
{
.ctl_name = KERN_SETUID_DUMPABLE,
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index e7acfb482a68..fa05e88aa76f 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -518,6 +518,28 @@ void update_wall_time(void)
/* correct the clock when NTP error is too big */
clocksource_adjust(offset);
+ /*
+ * Since in the loop above, we accumulate any amount of time
+ * in xtime_nsec over a second into xtime.tv_sec, its possible for
+ * xtime_nsec to be fairly small after the loop. Further, if we're
+ * slightly speeding the clocksource up in clocksource_adjust(),
+ * its possible the required corrective factor to xtime_nsec could
+ * cause it to underflow.
+ *
+ * Now, we cannot simply roll the accumulated second back, since
+ * the NTP subsystem has been notified via second_overflow. So
+ * instead we push xtime_nsec forward by the amount we underflowed,
+ * and add that amount into the error.
+ *
+ * We'll correct this error next time through this function, when
+ * xtime_nsec is not as small.
+ */
+ if (unlikely((s64)clock->xtime_nsec < 0)) {
+ s64 neg = -(s64)clock->xtime_nsec;
+ clock->xtime_nsec = 0;
+ clock->error += neg << (NTP_SCALE_SHIFT - clock->shift);
+ }
+
/* store full nanoseconds into xtime after rounding it up and
* add the remainder to the error difference.
*/
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 4a39d24568c8..78db083390f0 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -185,7 +185,6 @@ enum {
};
static int ftrace_filtered;
-static int tracing_on;
static LIST_HEAD(ftrace_new_addrs);
@@ -327,96 +326,89 @@ ftrace_record_ip(unsigned long ip)
static int
__ftrace_replace_code(struct dyn_ftrace *rec,
- unsigned char *old, unsigned char *new, int enable)
+ unsigned char *nop, int enable)
{
unsigned long ip, fl;
+ unsigned char *call, *old, *new;
ip = rec->ip;
- if (ftrace_filtered && enable) {
+ /*
+ * If this record is not to be traced and
+ * it is not enabled then do nothing.
+ *
+ * If this record is not to be traced and
+ * it is enabled then disabled it.
+ *
+ */
+ if (rec->flags & FTRACE_FL_NOTRACE) {
+ if (rec->flags & FTRACE_FL_ENABLED)
+ rec->flags &= ~FTRACE_FL_ENABLED;
+ else
+ return 0;
+
+ } else if (ftrace_filtered && enable) {
/*
- * If filtering is on:
- *
- * If this record is set to be filtered and
- * is enabled then do nothing.
- *
- * If this record is set to be filtered and
- * it is not enabled, enable it.
- *
- * If this record is not set to be filtered
- * and it is not enabled do nothing.
- *
- * If this record is set not to trace then
- * do nothing.
- *
- * If this record is set not to trace and
- * it is enabled then disable it.
- *
- * If this record is not set to be filtered and
- * it is enabled, disable it.
+ * Filtering is on:
*/
- fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE |
- FTRACE_FL_ENABLED);
+ fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_ENABLED);
- if ((fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) ||
- (fl == (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE)) ||
- !fl || (fl == FTRACE_FL_NOTRACE))
+ /* Record is filtered and enabled, do nothing */
+ if (fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED))
return 0;
- /*
- * If it is enabled disable it,
- * otherwise enable it!
- */
- if (fl & FTRACE_FL_ENABLED) {
- /* swap new and old */
- new = old;
- old = ftrace_call_replace(ip, FTRACE_ADDR);
+ /* Record is not filtered and is not enabled do nothing */
+ if (!fl)
+ return 0;
+
+ /* Record is not filtered but enabled, disable it */
+ if (fl == FTRACE_FL_ENABLED)
rec->flags &= ~FTRACE_FL_ENABLED;
- } else {
- new = ftrace_call_replace(ip, FTRACE_ADDR);
+ else
+ /* Otherwise record is filtered but not enabled, enable it */
rec->flags |= FTRACE_FL_ENABLED;
- }
} else {
+ /* Disable or not filtered */
if (enable) {
- /*
- * If this record is set not to trace and is
- * not enabled, do nothing.
- */
- fl = rec->flags & (FTRACE_FL_NOTRACE | FTRACE_FL_ENABLED);
- if (fl == FTRACE_FL_NOTRACE)
- return 0;
-
- new = ftrace_call_replace(ip, FTRACE_ADDR);
- } else
- old = ftrace_call_replace(ip, FTRACE_ADDR);
-
- if (enable) {
+ /* if record is enabled, do nothing */
if (rec->flags & FTRACE_FL_ENABLED)
return 0;
+
rec->flags |= FTRACE_FL_ENABLED;
+
} else {
+
+ /* if record is not enabled do nothing */
if (!(rec->flags & FTRACE_FL_ENABLED))
return 0;
+
rec->flags &= ~FTRACE_FL_ENABLED;
}
}
+ call = ftrace_call_replace(ip, FTRACE_ADDR);
+
+ if (rec->flags & FTRACE_FL_ENABLED) {
+ old = nop;
+ new = call;
+ } else {
+ old = call;
+ new = nop;
+ }
+
return ftrace_modify_code(ip, old, new);
}
static void ftrace_replace_code(int enable)
{
int i, failed;
- unsigned char *new = NULL, *old = NULL;
+ unsigned char *nop = NULL;
struct dyn_ftrace *rec;
struct ftrace_page *pg;
- if (enable)
- old = ftrace_nop_replace();
- else
- new = ftrace_nop_replace();
+ nop = ftrace_nop_replace();
for (pg = ftrace_pages_start; pg; pg = pg->next) {
for (i = 0; i < pg->index; i++) {
@@ -434,7 +426,7 @@ static void ftrace_replace_code(int enable)
unfreeze_record(rec);
}
- failed = __ftrace_replace_code(rec, old, new, enable);
+ failed = __ftrace_replace_code(rec, nop, enable);
if (failed && (rec->flags & FTRACE_FL_CONVERTED)) {
rec->flags |= FTRACE_FL_FAILED;
if ((system_state == SYSTEM_BOOTING) ||
@@ -506,13 +498,10 @@ static int __ftrace_modify_code(void *data)
{
int *command = data;
- if (*command & FTRACE_ENABLE_CALLS) {
+ if (*command & FTRACE_ENABLE_CALLS)
ftrace_replace_code(1);
- tracing_on = 1;
- } else if (*command & FTRACE_DISABLE_CALLS) {
+ else if (*command & FTRACE_DISABLE_CALLS)
ftrace_replace_code(0);
- tracing_on = 0;
- }
if (*command & FTRACE_UPDATE_TRACE_FUNC)
ftrace_update_ftrace_func(ftrace_trace_function);
@@ -538,8 +527,7 @@ static void ftrace_startup(void)
mutex_lock(&ftrace_start_lock);
ftrace_start++;
- if (ftrace_start == 1)
- command |= FTRACE_ENABLE_CALLS;
+ command |= FTRACE_ENABLE_CALLS;
if (saved_ftrace_func != ftrace_trace_function) {
saved_ftrace_func = ftrace_trace_function;
@@ -677,7 +665,7 @@ static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
cnt = num_to_init / ENTRIES_PER_PAGE;
pr_info("ftrace: allocating %ld entries in %d pages\n",
- num_to_init, cnt);
+ num_to_init, cnt + 1);
for (i = 0; i < cnt; i++) {
pg->next = (void *)get_zeroed_page(GFP_KERNEL);
@@ -738,6 +726,9 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
((iter->flags & FTRACE_ITER_FAILURES) &&
!(rec->flags & FTRACE_FL_FAILED)) ||
+ ((iter->flags & FTRACE_ITER_FILTER) &&
+ !(rec->flags & FTRACE_FL_FILTER)) ||
+
((iter->flags & FTRACE_ITER_NOTRACE) &&
!(rec->flags & FTRACE_FL_NOTRACE))) {
rec = NULL;
@@ -757,13 +748,11 @@ static void *t_start(struct seq_file *m, loff_t *pos)
void *p = NULL;
loff_t l = -1;
- if (*pos != iter->pos) {
- for (p = t_next(m, p, &l); p && l < *pos; p = t_next(m, p, &l))
- ;
- } else {
- l = *pos;
- p = t_next(m, p, &l);
- }
+ if (*pos > iter->pos)
+ *pos = iter->pos;
+
+ l = *pos;
+ p = t_next(m, p, &l);
return p;
}
@@ -774,15 +763,21 @@ static void t_stop(struct seq_file *m, void *p)
static int t_show(struct seq_file *m, void *v)
{
+ struct ftrace_iterator *iter = m->private;
struct dyn_ftrace *rec = v;
char str[KSYM_SYMBOL_LEN];
+ int ret = 0;
if (!rec)
return 0;
kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
- seq_printf(m, "%s\n", str);
+ ret = seq_printf(m, "%s\n", str);
+ if (ret < 0) {
+ iter->pos--;
+ iter->idx--;
+ }
return 0;
}
@@ -808,7 +803,7 @@ ftrace_avail_open(struct inode *inode, struct file *file)
return -ENOMEM;
iter->pg = ftrace_pages_start;
- iter->pos = -1;
+ iter->pos = 0;
ret = seq_open(file, &show_ftrace_seq_ops);
if (!ret) {
@@ -895,7 +890,7 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)
if (file->f_mode & FMODE_READ) {
iter->pg = ftrace_pages_start;
- iter->pos = -1;
+ iter->pos = 0;
iter->flags = enable ? FTRACE_ITER_FILTER :
FTRACE_ITER_NOTRACE;
@@ -1186,7 +1181,7 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
mutex_lock(&ftrace_sysctl_lock);
mutex_lock(&ftrace_start_lock);
- if (iter->filtered && ftrace_start && ftrace_enabled)
+ if (ftrace_start && ftrace_enabled)
ftrace_run_update_code(FTRACE_ENABLE_CALLS);
mutex_unlock(&ftrace_start_lock);
mutex_unlock(&ftrace_sysctl_lock);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 2f76193c3489..668bbb5ef2bd 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -16,14 +16,49 @@
#include <linux/list.h>
#include <linux/fs.h>
+#include "trace.h"
+
+/* Global flag to disable all recording to ring buffers */
+static int ring_buffers_off __read_mostly;
+
+/**
+ * tracing_on - enable all tracing buffers
+ *
+ * This function enables all tracing buffers that may have been
+ * disabled with tracing_off.
+ */
+void tracing_on(void)
+{
+ ring_buffers_off = 0;
+}
+
+/**
+ * tracing_off - turn off all tracing buffers
+ *
+ * This function stops all tracing buffers from recording data.
+ * It does not disable any overhead the tracers themselves may
+ * be causing. This function simply causes all recording to
+ * the ring buffers to fail.
+ */
+void tracing_off(void)
+{
+ ring_buffers_off = 1;
+}
+
/* Up this if you want to test the TIME_EXTENTS and normalization */
#define DEBUG_SHIFT 0
/* FIXME!!! */
u64 ring_buffer_time_stamp(int cpu)
{
+ u64 time;
+
+ preempt_disable_notrace();
/* shift to debug/test normalization and TIME_EXTENTS */
- return sched_clock() << DEBUG_SHIFT;
+ time = sched_clock() << DEBUG_SHIFT;
+ preempt_enable_notrace();
+
+ return time;
}
void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
@@ -503,6 +538,12 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
LIST_HEAD(pages);
int i, cpu;
+ /*
+ * Always succeed at resizing a non-existent buffer:
+ */
+ if (!buffer)
+ return size;
+
size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
size *= BUF_PAGE_SIZE;
buffer_size = buffer->pages * BUF_PAGE_SIZE;
@@ -576,6 +617,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
list_del_init(&page->list);
free_buffer_page(page);
}
+ mutex_unlock(&buffer->mutex);
return -ENOMEM;
}
@@ -1133,6 +1175,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
struct ring_buffer_event *event;
int cpu, resched;
+ if (ring_buffers_off)
+ return NULL;
+
if (atomic_read(&buffer->record_disabled))
return NULL;
@@ -1170,7 +1215,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
out:
if (resched)
- preempt_enable_notrace();
+ preempt_enable_no_resched_notrace();
else
preempt_enable_notrace();
return NULL;
@@ -1249,6 +1294,9 @@ int ring_buffer_write(struct ring_buffer *buffer,
int ret = -EBUSY;
int cpu, resched;
+ if (ring_buffers_off)
+ return -EBUSY;
+
if (atomic_read(&buffer->record_disabled))
return -EBUSY;
@@ -2070,3 +2118,69 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
return 0;
}
+static ssize_t
+rb_simple_read(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ int *p = filp->private_data;
+ char buf[64];
+ int r;
+
+ /* !ring_buffers_off == tracing_on */
+ r = sprintf(buf, "%d\n", !*p);
+
+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t
+rb_simple_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ int *p = filp->private_data;
+ char buf[64];
+ long val;
+ int ret;
+
+ if (cnt >= sizeof(buf))
+ return -EINVAL;
+
+ if (copy_from_user(&buf, ubuf, cnt))
+ return -EFAULT;
+
+ buf[cnt] = 0;
+
+ ret = strict_strtoul(buf, 10, &val);
+ if (ret < 0)
+ return ret;
+
+ /* !ring_buffers_off == tracing_on */
+ *p = !val;
+
+ (*ppos)++;
+
+ return cnt;
+}
+
+static struct file_operations rb_simple_fops = {
+ .open = tracing_open_generic,
+ .read = rb_simple_read,
+ .write = rb_simple_write,
+};
+
+
+static __init int rb_init_debugfs(void)
+{
+ struct dentry *d_tracer;
+ struct dentry *entry;
+
+ d_tracer = tracing_init_dentry();
+
+ entry = debugfs_create_file("tracing_on", 0644, d_tracer,
+ &ring_buffers_off, &rb_simple_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs 'tracing_on' entry\n");
+
+ return 0;
+}
+
+fs_initcall(rb_init_debugfs);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 697eda36b86a..d86e3252f300 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1936,6 +1936,7 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
ring_buffer_read_finish(iter->buffer_iter[cpu]);
}
mutex_unlock(&trace_types_lock);
+ kfree(iter);
return ERR_PTR(-ENOMEM);
}
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index f28484618ff0..e62cbf78eab6 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -18,12 +18,14 @@ struct header_iter {
static struct trace_array *mmio_trace_array;
static bool overrun_detected;
+static unsigned long prev_overruns;
static void mmio_reset_data(struct trace_array *tr)
{
int cpu;
overrun_detected = false;
+ prev_overruns = 0;
tr->time_start = ftrace_now(tr->cpu);
for_each_online_cpu(cpu)
@@ -128,16 +130,12 @@ static void mmio_close(struct trace_iterator *iter)
static unsigned long count_overruns(struct trace_iterator *iter)
{
- int cpu;
unsigned long cnt = 0;
-/* FIXME: */
-#if 0
- for_each_online_cpu(cpu) {
- cnt += iter->overrun[cpu];
- iter->overrun[cpu] = 0;
- }
-#endif
- (void)cpu;
+ unsigned long over = ring_buffer_overruns(iter->tr->buffer);
+
+ if (over > prev_overruns)
+ cnt = over - prev_overruns;
+ prev_overruns = over;
return cnt;
}
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index be682b62fe58..3bdb44bde4b7 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -184,11 +184,16 @@ static struct file_operations stack_max_size_fops = {
static void *
t_next(struct seq_file *m, void *v, loff_t *pos)
{
- long i = (long)m->private;
+ long i;
(*pos)++;
- i++;
+ if (v == SEQ_START_TOKEN)
+ i = 0;
+ else {
+ i = *(long *)v;
+ i++;
+ }
if (i >= max_stack_trace.nr_entries ||
stack_dump_trace[i] == ULONG_MAX)
@@ -201,12 +206,15 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
static void *t_start(struct seq_file *m, loff_t *pos)
{
- void *t = &m->private;
+ void *t = SEQ_START_TOKEN;
loff_t l = 0;
local_irq_disable();
__raw_spin_lock(&max_stack_lock);
+ if (*pos == 0)
+ return SEQ_START_TOKEN;
+
for (; t && l < *pos; t = t_next(m, t, &l))
;
@@ -235,10 +243,10 @@ static int trace_lookup_stack(struct seq_file *m, long i)
static int t_show(struct seq_file *m, void *v)
{
- long i = *(long *)v;
+ long i;
int size;
- if (i < 0) {
+ if (v == SEQ_START_TOKEN) {
seq_printf(m, " Depth Size Location"
" (%d entries)\n"
" ----- ---- --------\n",
@@ -246,6 +254,8 @@ static int t_show(struct seq_file *m, void *v)
return 0;
}
+ i = *(long *)v;
+
if (i >= max_stack_trace.nr_entries ||
stack_dump_trace[i] == ULONG_MAX)
return 0;
@@ -275,10 +285,6 @@ static int stack_trace_open(struct inode *inode, struct file *file)
int ret;
ret = seq_open(file, &stack_trace_seq_ops);
- if (!ret) {
- struct seq_file *m = file->private_data;
- m->private = (void *)-1;
- }
return ret;
}
OpenPOWER on IntegriCloud