summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorJiri Kosina <jkosina@suse.cz>2010-04-23 02:08:44 +0200
committerJiri Kosina <jkosina@suse.cz>2010-04-23 02:08:44 +0200
commit6c9468e9eb1252eaefd94ce7f06e1be9b0b641b1 (patch)
tree797676a336b050bfa1ef879377c07e541b9075d6 /kernel
parent4cb3ca7cd7e2cae8d1daf5345ec99a1e8502cf3f (diff)
parentc81eddb0e3728661d1585fbc564449c94165cc36 (diff)
downloadblackbird-op-linux-6c9468e9eb1252eaefd94ce7f06e1be9b0b641b1.tar.gz
blackbird-op-linux-6c9468e9eb1252eaefd94ce7f06e1be9b0b641b1.zip
Merge branch 'master' into for-next
Diffstat (limited to 'kernel')
-rw-r--r--kernel/async.c1
-rw-r--r--kernel/audit.c1
-rw-r--r--kernel/audit_tree.c1
-rw-r--r--kernel/audit_watch.c1
-rw-r--r--kernel/auditfilter.c1
-rw-r--r--kernel/auditsc.c3
-rw-r--r--kernel/cgroup.c1
-rw-r--r--kernel/cgroup_freezer.c10
-rw-r--r--kernel/compat.c1
-rw-r--r--kernel/cpu.c1
-rw-r--r--kernel/cpuset.c106
-rw-r--r--kernel/cred.c11
-rw-r--r--kernel/early_res.c6
-rw-r--r--kernel/exit.c5
-rw-r--r--kernel/fork.c12
-rw-r--r--kernel/hw_breakpoint.c11
-rw-r--r--kernel/irq/chip.c35
-rw-r--r--kernel/irq/manage.c32
-rw-r--r--kernel/irq/numa_migrate.c1
-rw-r--r--kernel/irq/proc.c1
-rw-r--r--kernel/kallsyms.c1
-rw-r--r--kernel/kgdb.c205
-rw-r--r--kernel/kprobes.c3
-rw-r--r--kernel/kthread.c2
-rw-r--r--kernel/latencytop.c1
-rw-r--r--kernel/lockdep.c32
-rw-r--r--kernel/module.c139
-rw-r--r--kernel/nsproxy.c1
-rw-r--r--kernel/padata.c1
-rw-r--r--kernel/perf_event.c137
-rw-r--r--kernel/pid.c4
-rw-r--r--kernel/pid_namespace.c1
-rw-r--r--kernel/posix-cpu-timers.c10
-rw-r--r--kernel/power/hibernate.c1
-rw-r--r--kernel/power/hibernate_nvs.c1
-rw-r--r--kernel/power/process.c5
-rw-r--r--kernel/power/snapshot.c1
-rw-r--r--kernel/power/suspend.c1
-rw-r--r--kernel/power/swap.c1
-rw-r--r--kernel/power/user.c2
-rw-r--r--kernel/rcupdate.c30
-rw-r--r--kernel/rcutree.h21
-rw-r--r--kernel/rcutree_plugin.h8
-rw-r--r--kernel/res_counter.c1
-rw-r--r--kernel/resource.c44
-rw-r--r--kernel/sched.c19
-rw-r--r--kernel/sched_cpupri.c1
-rw-r--r--kernel/sched_debug.c4
-rw-r--r--kernel/sched_fair.c2
-rw-r--r--kernel/sched_rt.c7
-rw-r--r--kernel/slow-work.c2
-rw-r--r--kernel/slow-work.h8
-rw-r--r--kernel/smp.c1
-rw-r--r--kernel/softlockup.c4
-rw-r--r--kernel/srcu.c1
-rw-r--r--kernel/sys.c1
-rw-r--r--kernel/sysctl_binary.c1
-rw-r--r--kernel/taskstats.c1
-rw-r--r--kernel/time.c1
-rw-r--r--kernel/time/tick-oneshot.c52
-rw-r--r--kernel/time/timecompare.c1
-rw-r--r--kernel/time/timekeeping.c3
-rw-r--r--kernel/time/timer_list.c3
-rw-r--r--kernel/timer.c2
-rw-r--r--kernel/trace/Makefile2
-rw-r--r--kernel/trace/blktrace.c1
-rw-r--r--kernel/trace/ftrace.c31
-rw-r--r--kernel/trace/power-traces.c1
-rw-r--r--kernel/trace/ring_buffer.c35
-rw-r--r--kernel/trace/trace.c51
-rw-r--r--kernel/trace/trace.h3
-rw-r--r--kernel/trace/trace_clock.c5
-rw-r--r--kernel/trace/trace_event_perf.c (renamed from kernel/trace/trace_event_profile.c)63
-rw-r--r--kernel/trace/trace_events.c3
-rw-r--r--kernel/trace/trace_events_filter.c1
-rw-r--r--kernel/trace/trace_functions_graph.c28
-rw-r--r--kernel/trace/trace_kprobe.c29
-rw-r--r--kernel/trace/trace_ksym.c1
-rw-r--r--kernel/trace/trace_mmiotrace.c1
-rw-r--r--kernel/trace/trace_selftest.c1
-rw-r--r--kernel/trace/trace_stat.c1
-rw-r--r--kernel/trace/trace_syscalls.c73
-rw-r--r--kernel/trace/trace_workqueue.c1
83 files changed, 856 insertions, 483 deletions
diff --git a/kernel/async.c b/kernel/async.c
index 27235f5de198..15319d6c18fe 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -56,6 +56,7 @@ asynchronous and synchronous parts of the kernel.
#include <linux/init.h>
#include <linux/kthread.h>
#include <linux/delay.h>
+#include <linux/slab.h>
#include <asm/atomic.h>
static async_cookie_t next_cookie = 1;
diff --git a/kernel/audit.c b/kernel/audit.c
index 78f7f86aa238..c71bd26631a2 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -46,6 +46,7 @@
#include <asm/atomic.h>
#include <linux/mm.h>
#include <linux/module.h>
+#include <linux/slab.h>
#include <linux/err.h>
#include <linux/kthread.h>
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 028e85663f27..46a57b57a335 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -3,6 +3,7 @@
#include <linux/namei.h>
#include <linux/mount.h>
#include <linux/kthread.h>
+#include <linux/slab.h>
struct audit_tree;
struct audit_chunk;
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index cc7e87936cbc..8df43696f4ba 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -27,6 +27,7 @@
#include <linux/namei.h>
#include <linux/netlink.h>
#include <linux/sched.h>
+#include <linux/slab.h>
#include <linux/inotify.h>
#include <linux/security.h>
#include "audit.h"
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index a70604047f3c..ce08041f578d 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -27,6 +27,7 @@
#include <linux/namei.h>
#include <linux/netlink.h>
#include <linux/sched.h>
+#include <linux/slab.h>
#include <linux/security.h>
#include "audit.h"
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index f3a461c0970a..3828ad5fb8f1 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -49,6 +49,7 @@
#include <linux/namei.h>
#include <linux/mm.h>
#include <linux/module.h>
+#include <linux/slab.h>
#include <linux/mount.h>
#include <linux/socket.h>
#include <linux/mqueue.h>
@@ -1893,7 +1894,7 @@ static int audit_inc_name_count(struct audit_context *context,
{
if (context->name_count >= AUDIT_NAMES) {
if (inode)
- printk(KERN_DEBUG "name_count maxed, losing inode data: "
+ printk(KERN_DEBUG "audit: name_count maxed, losing inode data: "
"dev=%02x:%02x, inode=%lu\n",
MAJOR(inode->i_sb->s_dev),
MINOR(inode->i_sb->s_dev),
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a3b0f24bddbb..06dbf97a7590 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -27,7 +27,6 @@
*/
#include <linux/cgroup.h>
-#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/errno.h>
#include <linux/fs.h>
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 59e9ef6aab40..da5e13975531 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -15,6 +15,7 @@
*/
#include <linux/module.h>
+#include <linux/slab.h>
#include <linux/cgroup.h>
#include <linux/fs.h>
#include <linux/uaccess.h>
@@ -47,17 +48,20 @@ static inline struct freezer *task_freezer(struct task_struct *task)
struct freezer, css);
}
-int cgroup_frozen(struct task_struct *task)
+int cgroup_freezing_or_frozen(struct task_struct *task)
{
struct freezer *freezer;
enum freezer_state state;
task_lock(task);
freezer = task_freezer(task);
- state = freezer->state;
+ if (!freezer->css.cgroup->parent)
+ state = CGROUP_THAWED; /* root cgroup can't be frozen */
+ else
+ state = freezer->state;
task_unlock(task);
- return state == CGROUP_FROZEN;
+ return (state == CGROUP_FREEZING) || (state == CGROUP_FROZEN);
}
/*
diff --git a/kernel/compat.c b/kernel/compat.c
index f6c204f07ea6..7f40e9275fd9 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -25,6 +25,7 @@
#include <linux/posix-timers.h>
#include <linux/times.h>
#include <linux/ptrace.h>
+#include <linux/gfp.h>
#include <asm/uaccess.h>
diff --git a/kernel/cpu.c b/kernel/cpu.c
index f8cced2692b3..25bba73b1be3 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -14,6 +14,7 @@
#include <linux/kthread.h>
#include <linux/stop_machine.h>
#include <linux/mutex.h>
+#include <linux/gfp.h>
#ifdef CONFIG_SMP
/* Serializes the updates to cpu_online_mask, cpu_present_mask */
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index ba401fab459f..d10946748ec2 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -920,9 +920,6 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
* call to guarantee_online_mems(), as we know no one is changing
* our task's cpuset.
*
- * Hold callback_mutex around the two modifications of our tasks
- * mems_allowed to synchronize with cpuset_mems_allowed().
- *
* While the mm_struct we are migrating is typically from some
* other task, the task_struct mems_allowed that we are hacking
* is for our current task, which must allocate new pages for that
@@ -973,15 +970,20 @@ static void cpuset_change_nodemask(struct task_struct *p,
struct cpuset *cs;
int migrate;
const nodemask_t *oldmem = scan->data;
- nodemask_t newmems;
+ NODEMASK_ALLOC(nodemask_t, newmems, GFP_KERNEL);
+
+ if (!newmems)
+ return;
cs = cgroup_cs(scan->cg);
- guarantee_online_mems(cs, &newmems);
+ guarantee_online_mems(cs, newmems);
task_lock(p);
- cpuset_change_task_nodemask(p, &newmems);
+ cpuset_change_task_nodemask(p, newmems);
task_unlock(p);
+ NODEMASK_FREE(newmems);
+
mm = get_task_mm(p);
if (!mm)
return;
@@ -1051,16 +1053,21 @@ static void update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem,
static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
const char *buf)
{
- nodemask_t oldmem;
+ NODEMASK_ALLOC(nodemask_t, oldmem, GFP_KERNEL);
int retval;
struct ptr_heap heap;
+ if (!oldmem)
+ return -ENOMEM;
+
/*
* top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY];
* it's read-only
*/
- if (cs == &top_cpuset)
- return -EACCES;
+ if (cs == &top_cpuset) {
+ retval = -EACCES;
+ goto done;
+ }
/*
* An empty mems_allowed is ok iff there are no tasks in the cpuset.
@@ -1076,11 +1083,13 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
goto done;
if (!nodes_subset(trialcs->mems_allowed,
- node_states[N_HIGH_MEMORY]))
- return -EINVAL;
+ node_states[N_HIGH_MEMORY])) {
+ retval = -EINVAL;
+ goto done;
+ }
}
- oldmem = cs->mems_allowed;
- if (nodes_equal(oldmem, trialcs->mems_allowed)) {
+ *oldmem = cs->mems_allowed;
+ if (nodes_equal(*oldmem, trialcs->mems_allowed)) {
retval = 0; /* Too easy - nothing to do */
goto done;
}
@@ -1096,10 +1105,11 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
cs->mems_allowed = trialcs->mems_allowed;
mutex_unlock(&callback_mutex);
- update_tasks_nodemask(cs, &oldmem, &heap);
+ update_tasks_nodemask(cs, oldmem, &heap);
heap_free(&heap);
done:
+ NODEMASK_FREE(oldmem);
return retval;
}
@@ -1384,40 +1394,47 @@ static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont,
struct cgroup *oldcont, struct task_struct *tsk,
bool threadgroup)
{
- nodemask_t from, to;
struct mm_struct *mm;
struct cpuset *cs = cgroup_cs(cont);
struct cpuset *oldcs = cgroup_cs(oldcont);
+ NODEMASK_ALLOC(nodemask_t, from, GFP_KERNEL);
+ NODEMASK_ALLOC(nodemask_t, to, GFP_KERNEL);
+
+ if (from == NULL || to == NULL)
+ goto alloc_fail;
if (cs == &top_cpuset) {
cpumask_copy(cpus_attach, cpu_possible_mask);
- to = node_possible_map;
} else {
guarantee_online_cpus(cs, cpus_attach);
- guarantee_online_mems(cs, &to);
}
+ guarantee_online_mems(cs, to);
/* do per-task migration stuff possibly for each in the threadgroup */
- cpuset_attach_task(tsk, &to, cs);
+ cpuset_attach_task(tsk, to, cs);
if (threadgroup) {
struct task_struct *c;
rcu_read_lock();
list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
- cpuset_attach_task(c, &to, cs);
+ cpuset_attach_task(c, to, cs);
}
rcu_read_unlock();
}
/* change mm; only needs to be done once even if threadgroup */
- from = oldcs->mems_allowed;
- to = cs->mems_allowed;
+ *from = oldcs->mems_allowed;
+ *to = cs->mems_allowed;
mm = get_task_mm(tsk);
if (mm) {
- mpol_rebind_mm(mm, &to);
+ mpol_rebind_mm(mm, to);
if (is_memory_migrate(cs))
- cpuset_migrate_mm(mm, &from, &to);
+ cpuset_migrate_mm(mm, from, to);
mmput(mm);
}
+
+alloc_fail:
+ NODEMASK_FREE(from);
+ NODEMASK_FREE(to);
}
/* The various types of files and directories in a cpuset file system */
@@ -1562,13 +1579,21 @@ static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
{
- nodemask_t mask;
+ NODEMASK_ALLOC(nodemask_t, mask, GFP_KERNEL);
+ int retval;
+
+ if (mask == NULL)
+ return -ENOMEM;
mutex_lock(&callback_mutex);
- mask = cs->mems_allowed;
+ *mask = cs->mems_allowed;
mutex_unlock(&callback_mutex);
- return nodelist_scnprintf(page, PAGE_SIZE, mask);
+ retval = nodelist_scnprintf(page, PAGE_SIZE, *mask);
+
+ NODEMASK_FREE(mask);
+
+ return retval;
}
static ssize_t cpuset_common_file_read(struct cgroup *cont,
@@ -1997,7 +2022,10 @@ static void scan_for_empty_cpusets(struct cpuset *root)
struct cpuset *cp; /* scans cpusets being updated */
struct cpuset *child; /* scans child cpusets of cp */
struct cgroup *cont;
- nodemask_t oldmems;
+ NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL);
+
+ if (oldmems == NULL)
+ return;
list_add_tail((struct list_head *)&root->stack_list, &queue);
@@ -2014,7 +2042,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
continue;
- oldmems = cp->mems_allowed;
+ *oldmems = cp->mems_allowed;
/* Remove offline cpus and mems from this cpuset. */
mutex_lock(&callback_mutex);
@@ -2030,9 +2058,10 @@ static void scan_for_empty_cpusets(struct cpuset *root)
remove_tasks_in_empty_cpuset(cp);
else {
update_tasks_cpumask(cp, NULL);
- update_tasks_nodemask(cp, &oldmems, NULL);
+ update_tasks_nodemask(cp, oldmems, NULL);
}
}
+ NODEMASK_FREE(oldmems);
}
/*
@@ -2090,20 +2119,33 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
static int cpuset_track_online_nodes(struct notifier_block *self,
unsigned long action, void *arg)
{
+ NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL);
+
+ if (oldmems == NULL)
+ return NOTIFY_DONE;
+
cgroup_lock();
switch (action) {
case MEM_ONLINE:
- case MEM_OFFLINE:
+ *oldmems = top_cpuset.mems_allowed;
mutex_lock(&callback_mutex);
top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
mutex_unlock(&callback_mutex);
- if (action == MEM_OFFLINE)
- scan_for_empty_cpusets(&top_cpuset);
+ update_tasks_nodemask(&top_cpuset, oldmems, NULL);
+ break;
+ case MEM_OFFLINE:
+ /*
+ * needn't update top_cpuset.mems_allowed explicitly because
+ * scan_for_empty_cpusets() will update it.
+ */
+ scan_for_empty_cpusets(&top_cpuset);
break;
default:
break;
}
cgroup_unlock();
+
+ NODEMASK_FREE(oldmems);
return NOTIFY_OK;
}
#endif
diff --git a/kernel/cred.c b/kernel/cred.c
index 1ed8ca18790c..62af1816c235 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -10,6 +10,7 @@
*/
#include <linux/module.h>
#include <linux/cred.h>
+#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/key.h>
#include <linux/keyctl.h>
@@ -364,7 +365,7 @@ struct cred *prepare_usermodehelper_creds(void)
new = kmem_cache_alloc(cred_jar, GFP_ATOMIC);
if (!new)
- return NULL;
+ goto free_tgcred;
kdebug("prepare_usermodehelper_creds() alloc %p", new);
@@ -398,6 +399,12 @@ struct cred *prepare_usermodehelper_creds(void)
error:
put_cred(new);
return NULL;
+
+free_tgcred:
+#ifdef CONFIG_KEYS
+ kfree(tgcred);
+#endif
+ return NULL;
}
/*
@@ -786,8 +793,6 @@ bool creds_are_invalid(const struct cred *cred)
{
if (cred->magic != CRED_MAGIC)
return true;
- if (atomic_read(&cred->usage) < atomic_read(&cred->subscribers))
- return true;
#ifdef CONFIG_SECURITY_SELINUX
if (selinux_is_enabled()) {
if ((unsigned long) cred->security < PAGE_SIZE)
diff --git a/kernel/early_res.c b/kernel/early_res.c
index 3cb2c661bb78..31aa9332ef3f 100644
--- a/kernel/early_res.c
+++ b/kernel/early_res.c
@@ -333,6 +333,12 @@ void __init free_early_partial(u64 start, u64 end)
struct early_res *r;
int i;
+ if (start == end)
+ return;
+
+ if (WARN_ONCE(start > end, " wrong range [%#llx, %#llx]\n", start, end))
+ return;
+
try_next:
i = find_overlapped_early(start, end);
if (i >= max_early_res)
diff --git a/kernel/exit.c b/kernel/exit.c
index ce1e48c2d93d..7f2683a10ac4 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -87,7 +87,7 @@ static void __exit_signal(struct task_struct *tsk)
sighand = rcu_dereference_check(tsk->sighand,
rcu_read_lock_held() ||
- lockdep_is_held(&tasklist_lock));
+ lockdep_tasklist_lock_is_held());
spin_lock(&sighand->siglock);
posix_cpu_timers_exit(tsk);
@@ -953,7 +953,8 @@ NORET_TYPE void do_exit(long code)
acct_update_integrals(tsk);
/* sync mm's RSS info before statistics gathering */
- sync_mm_rss(tsk, tsk->mm);
+ if (tsk->mm)
+ sync_mm_rss(tsk, tsk->mm);
group_dead = atomic_dec_and_test(&tsk->signal->live);
if (group_dead) {
hrtimer_cancel(&tsk->signal->real_timer);
diff --git a/kernel/fork.c b/kernel/fork.c
index 1beb6c303c41..44b0791b0a2e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -86,7 +86,14 @@ int max_threads; /* tunable limit on nr_threads */
DEFINE_PER_CPU(unsigned long, process_counts) = 0;
__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
-EXPORT_SYMBOL_GPL(tasklist_lock);
+
+#ifdef CONFIG_PROVE_RCU
+int lockdep_tasklist_lock_is_held(void)
+{
+ return lockdep_is_held(&tasklist_lock);
+}
+EXPORT_SYMBOL_GPL(lockdep_tasklist_lock_is_held);
+#endif /* #ifdef CONFIG_PROVE_RCU */
int nr_processes(void)
{
@@ -1045,6 +1052,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->prev_utime = cputime_zero;
p->prev_stime = cputime_zero;
#endif
+#if defined(SPLIT_RSS_COUNTING)
+ memset(&p->rss_stat, 0, sizeof(p->rss_stat));
+#endif
p->default_timer_slack_ns = current->timer_slack_ns;
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 967e66143e11..03808ed342a6 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -413,17 +413,17 @@ EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
*
* @return a set of per_cpu pointers to perf events
*/
-struct perf_event **
+struct perf_event * __percpu *
register_wide_hw_breakpoint(struct perf_event_attr *attr,
perf_overflow_handler_t triggered)
{
- struct perf_event **cpu_events, **pevent, *bp;
+ struct perf_event * __percpu *cpu_events, **pevent, *bp;
long err;
int cpu;
cpu_events = alloc_percpu(typeof(*cpu_events));
if (!cpu_events)
- return ERR_PTR(-ENOMEM);
+ return (void __percpu __force *)ERR_PTR(-ENOMEM);
get_online_cpus();
for_each_online_cpu(cpu) {
@@ -451,7 +451,7 @@ fail:
put_online_cpus();
free_percpu(cpu_events);
- return ERR_PTR(err);
+ return (void __percpu __force *)ERR_PTR(err);
}
EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
@@ -459,7 +459,7 @@ EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
* unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel
* @cpu_events: the per cpu set of events to unregister
*/
-void unregister_wide_hw_breakpoint(struct perf_event **cpu_events)
+void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events)
{
int cpu;
struct perf_event **pevent;
@@ -489,5 +489,4 @@ struct pmu perf_ops_bp = {
.enable = arch_install_hw_breakpoint,
.disable = arch_uninstall_hw_breakpoint,
.read = hw_breakpoint_pmu_read,
- .unthrottle = hw_breakpoint_pmu_unthrottle
};
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 42ec11b2af8a..b7091d5ca2f8 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -359,6 +359,23 @@ static inline void mask_ack_irq(struct irq_desc *desc, int irq)
if (desc->chip->ack)
desc->chip->ack(irq);
}
+ desc->status |= IRQ_MASKED;
+}
+
+static inline void mask_irq(struct irq_desc *desc, int irq)
+{
+ if (desc->chip->mask) {
+ desc->chip->mask(irq);
+ desc->status |= IRQ_MASKED;
+ }
+}
+
+static inline void unmask_irq(struct irq_desc *desc, int irq)
+{
+ if (desc->chip->unmask) {
+ desc->chip->unmask(irq);
+ desc->status &= ~IRQ_MASKED;
+ }
}
/*
@@ -484,10 +501,8 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
raw_spin_lock(&desc->lock);
desc->status &= ~IRQ_INPROGRESS;
- if (unlikely(desc->status & IRQ_ONESHOT))
- desc->status |= IRQ_MASKED;
- else if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask)
- desc->chip->unmask(irq);
+ if (!(desc->status & (IRQ_DISABLED | IRQ_ONESHOT)))
+ unmask_irq(desc, irq);
out_unlock:
raw_spin_unlock(&desc->lock);
}
@@ -524,8 +539,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
action = desc->action;
if (unlikely(!action || (desc->status & IRQ_DISABLED))) {
desc->status |= IRQ_PENDING;
- if (desc->chip->mask)
- desc->chip->mask(irq);
+ mask_irq(desc, irq);
goto out;
}
@@ -593,7 +607,7 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
irqreturn_t action_ret;
if (unlikely(!action)) {
- desc->chip->mask(irq);
+ mask_irq(desc, irq);
goto out_unlock;
}
@@ -605,8 +619,7 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
if (unlikely((desc->status &
(IRQ_PENDING | IRQ_MASKED | IRQ_DISABLED)) ==
(IRQ_PENDING | IRQ_MASKED))) {
- desc->chip->unmask(irq);
- desc->status &= ~IRQ_MASKED;
+ unmask_irq(desc, irq);
}
desc->status &= ~IRQ_PENDING;
@@ -716,7 +729,7 @@ set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
__set_irq_handler(irq, handle, 0, name);
}
-void __init set_irq_noprobe(unsigned int irq)
+void set_irq_noprobe(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
unsigned long flags;
@@ -731,7 +744,7 @@ void __init set_irq_noprobe(unsigned int irq)
raw_spin_unlock_irqrestore(&desc->lock, flags);
}
-void __init set_irq_probe(unsigned int irq)
+void set_irq_probe(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
unsigned long flags;
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index eb6078ca60c7..704e488730a5 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -382,6 +382,7 @@ int can_request_irq(unsigned int irq, unsigned long irqflags)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irqaction *action;
+ unsigned long flags;
if (!desc)
return 0;
@@ -389,11 +390,14 @@ int can_request_irq(unsigned int irq, unsigned long irqflags)
if (desc->status & IRQ_NOREQUEST)
return 0;
+ raw_spin_lock_irqsave(&desc->lock, flags);
action = desc->action;
if (action)
if (irqflags & action->flags & IRQF_SHARED)
action = NULL;
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+
return !action;
}
@@ -483,8 +487,26 @@ static int irq_wait_for_interrupt(struct irqaction *action)
*/
static void irq_finalize_oneshot(unsigned int irq, struct irq_desc *desc)
{
+again:
chip_bus_lock(irq, desc);
raw_spin_lock_irq(&desc->lock);
+
+ /*
+ * Implausible though it may be we need to protect us against
+ * the following scenario:
+ *
+ * The thread is faster done than the hard interrupt handler
+ * on the other CPU. If we unmask the irq line then the
+ * interrupt can come in again and masks the line, leaves due
+ * to IRQ_INPROGRESS and the irq line is masked forever.
+ */
+ if (unlikely(desc->status & IRQ_INPROGRESS)) {
+ raw_spin_unlock_irq(&desc->lock);
+ chip_bus_sync_unlock(irq, desc);
+ cpu_relax();
+ goto again;
+ }
+
if (!(desc->status & IRQ_DISABLED) && (desc->status & IRQ_MASKED)) {
desc->status &= ~IRQ_MASKED;
desc->chip->unmask(irq);
@@ -735,6 +757,16 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
if (new->flags & IRQF_ONESHOT)
desc->status |= IRQ_ONESHOT;
+ /*
+ * Force MSI interrupts to run with interrupts
+ * disabled. The multi vector cards can cause stack
+ * overflows due to nested interrupts when enough of
+ * them are directed to a core and fire at the same
+ * time.
+ */
+ if (desc->msi_desc)
+ new->flags |= IRQF_DISABLED;
+
if (!(desc->status & IRQ_NOAUTOEN)) {
desc->depth = 0;
desc->status &= ~IRQ_DISABLED;
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index 963559dbd858..65d3845665ac 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -6,6 +6,7 @@
*/
#include <linux/irq.h>
+#include <linux/slab.h>
#include <linux/module.h>
#include <linux/random.h>
#include <linux/interrupt.h>
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 6f50eccc79c0..7a6eb04ef6b5 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -7,6 +7,7 @@
*/
#include <linux/irq.h>
+#include <linux/gfp.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/interrupt.h>
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 8e5288a8a355..13aff293f4de 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -21,6 +21,7 @@
#include <linux/sched.h> /* for cond_resched */
#include <linux/mm.h>
#include <linux/ctype.h>
+#include <linux/slab.h>
#include <asm/sections.h>
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index 761fdd2b3034..11f3515ca83f 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -69,9 +69,16 @@ struct kgdb_state {
struct pt_regs *linux_regs;
};
+/* Exception state values */
+#define DCPU_WANT_MASTER 0x1 /* Waiting to become a master kgdb cpu */
+#define DCPU_NEXT_MASTER 0x2 /* Transition from one master cpu to another */
+#define DCPU_IS_SLAVE 0x4 /* Slave cpu enter exception */
+#define DCPU_SSTEP 0x8 /* CPU is single stepping */
+
static struct debuggerinfo_struct {
void *debuggerinfo;
struct task_struct *task;
+ int exception_state;
} kgdb_info[NR_CPUS];
/**
@@ -391,27 +398,22 @@ int kgdb_mem2hex(char *mem, char *buf, int count)
/*
* Copy the binary array pointed to by buf into mem. Fix $, #, and
- * 0x7d escaped with 0x7d. Return a pointer to the character after
- * the last byte written.
+ * 0x7d escaped with 0x7d. Return -EFAULT on failure or 0 on success.
+ * The input buf is overwitten with the result to write to mem.
*/
static int kgdb_ebin2mem(char *buf, char *mem, int count)
{
- int err = 0;
- char c;
+ int size = 0;
+ char *c = buf;
while (count-- > 0) {
- c = *buf++;
- if (c == 0x7d)
- c = *buf++ ^ 0x20;
-
- err = probe_kernel_write(mem, &c, 1);
- if (err)
- break;
-
- mem++;
+ c[size] = *buf++;
+ if (c[size] == 0x7d)
+ c[size] = *buf++ ^ 0x20;
+ size++;
}
- return err;
+ return probe_kernel_write(mem, c, size);
}
/*
@@ -563,49 +565,6 @@ static struct task_struct *getthread(struct pt_regs *regs, int tid)
}
/*
- * CPU debug state control:
- */
-
-#ifdef CONFIG_SMP
-static void kgdb_wait(struct pt_regs *regs)
-{
- unsigned long flags;
- int cpu;
-
- local_irq_save(flags);
- cpu = raw_smp_processor_id();
- kgdb_info[cpu].debuggerinfo = regs;
- kgdb_info[cpu].task = current;
- /*
- * Make sure the above info reaches the primary CPU before
- * our cpu_in_kgdb[] flag setting does:
- */
- smp_wmb();
- atomic_set(&cpu_in_kgdb[cpu], 1);
-
- /* Disable any cpu specific hw breakpoints */
- kgdb_disable_hw_debug(regs);
-
- /* Wait till primary CPU is done with debugging */
- while (atomic_read(&passive_cpu_wait[cpu]))
- cpu_relax();
-
- kgdb_info[cpu].debuggerinfo = NULL;
- kgdb_info[cpu].task = NULL;
-
- /* fix up hardware debug registers on local cpu */
- if (arch_kgdb_ops.correct_hw_break)
- arch_kgdb_ops.correct_hw_break();
-
- /* Signal the primary CPU that we are done: */
- atomic_set(&cpu_in_kgdb[cpu], 0);
- touch_softlockup_watchdog_sync();
- clocksource_touch_watchdog();
- local_irq_restore(flags);
-}
-#endif
-
-/*
* Some architectures need cache flushes when we set/clear a
* breakpoint:
*/
@@ -1400,34 +1359,13 @@ static int kgdb_reenter_check(struct kgdb_state *ks)
return 1;
}
-/*
- * kgdb_handle_exception() - main entry point from a kernel exception
- *
- * Locking hierarchy:
- * interface locks, if any (begin_session)
- * kgdb lock (kgdb_active)
- */
-int
-kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs)
+static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs)
{
- struct kgdb_state kgdb_var;
- struct kgdb_state *ks = &kgdb_var;
unsigned long flags;
int sstep_tries = 100;
int error = 0;
int i, cpu;
-
- ks->cpu = raw_smp_processor_id();
- ks->ex_vector = evector;
- ks->signo = signo;
- ks->ex_vector = evector;
- ks->err_code = ecode;
- ks->kgdb_usethreadid = 0;
- ks->linux_regs = regs;
-
- if (kgdb_reenter_check(ks))
- return 0; /* Ouch, double exception ! */
-
+ int trace_on = 0;
acquirelock:
/*
* Interrupts will be restored by the 'trap return' code, except when
@@ -1435,13 +1373,43 @@ acquirelock:
*/
local_irq_save(flags);
- cpu = raw_smp_processor_id();
+ cpu = ks->cpu;
+ kgdb_info[cpu].debuggerinfo = regs;
+ kgdb_info[cpu].task = current;
+ /*
+ * Make sure the above info reaches the primary CPU before
+ * our cpu_in_kgdb[] flag setting does:
+ */
+ atomic_inc(&cpu_in_kgdb[cpu]);
/*
- * Acquire the kgdb_active lock:
+ * CPU will loop if it is a slave or request to become a kgdb
+ * master cpu and acquire the kgdb_active lock:
*/
- while (atomic_cmpxchg(&kgdb_active, -1, cpu) != -1)
+ while (1) {
+ if (kgdb_info[cpu].exception_state & DCPU_WANT_MASTER) {
+ if (atomic_cmpxchg(&kgdb_active, -1, cpu) == cpu)
+ break;
+ } else if (kgdb_info[cpu].exception_state & DCPU_IS_SLAVE) {
+ if (!atomic_read(&passive_cpu_wait[cpu]))
+ goto return_normal;
+ } else {
+return_normal:
+ /* Return to normal operation by executing any
+ * hw breakpoint fixup.
+ */
+ if (arch_kgdb_ops.correct_hw_break)
+ arch_kgdb_ops.correct_hw_break();
+ if (trace_on)
+ tracing_on();
+ atomic_dec(&cpu_in_kgdb[cpu]);
+ touch_softlockup_watchdog_sync();
+ clocksource_touch_watchdog();
+ local_irq_restore(flags);
+ return 0;
+ }
cpu_relax();
+ }
/*
* For single stepping, try to only enter on the processor
@@ -1475,9 +1443,6 @@ acquirelock:
if (kgdb_io_ops->pre_exception)
kgdb_io_ops->pre_exception();
- kgdb_info[ks->cpu].debuggerinfo = ks->linux_regs;
- kgdb_info[ks->cpu].task = current;
-
kgdb_disable_hw_debug(ks->linux_regs);
/*
@@ -1486,15 +1451,9 @@ acquirelock:
*/
if (!kgdb_single_step) {
for (i = 0; i < NR_CPUS; i++)
- atomic_set(&passive_cpu_wait[i], 1);
+ atomic_inc(&passive_cpu_wait[i]);
}
- /*
- * spin_lock code is good enough as a barrier so we don't
- * need one here:
- */
- atomic_set(&cpu_in_kgdb[ks->cpu], 1);
-
#ifdef CONFIG_SMP
/* Signal the other CPUs to enter kgdb_wait() */
if ((!kgdb_single_step) && kgdb_do_roundup)
@@ -1518,6 +1477,9 @@ acquirelock:
kgdb_single_step = 0;
kgdb_contthread = current;
exception_level = 0;
+ trace_on = tracing_is_on();
+ if (trace_on)
+ tracing_off();
/* Talk to debugger with gdbserial protocol */
error = gdb_serial_stub(ks);
@@ -1526,13 +1488,11 @@ acquirelock:
if (kgdb_io_ops->post_exception)
kgdb_io_ops->post_exception();
- kgdb_info[ks->cpu].debuggerinfo = NULL;
- kgdb_info[ks->cpu].task = NULL;
- atomic_set(&cpu_in_kgdb[ks->cpu], 0);
+ atomic_dec(&cpu_in_kgdb[ks->cpu]);
if (!kgdb_single_step) {
for (i = NR_CPUS-1; i >= 0; i--)
- atomic_set(&passive_cpu_wait[i], 0);
+ atomic_dec(&passive_cpu_wait[i]);
/*
* Wait till all the CPUs have quit
* from the debugger.
@@ -1551,6 +1511,8 @@ kgdb_restore:
else
kgdb_sstep_pid = 0;
}
+ if (trace_on)
+ tracing_on();
/* Free kgdb_active */
atomic_set(&kgdb_active, -1);
touch_softlockup_watchdog_sync();
@@ -1560,13 +1522,52 @@ kgdb_restore:
return error;
}
+/*
+ * kgdb_handle_exception() - main entry point from a kernel exception
+ *
+ * Locking hierarchy:
+ * interface locks, if any (begin_session)
+ * kgdb lock (kgdb_active)
+ */
+int
+kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs)
+{
+ struct kgdb_state kgdb_var;
+ struct kgdb_state *ks = &kgdb_var;
+ int ret;
+
+ ks->cpu = raw_smp_processor_id();
+ ks->ex_vector = evector;
+ ks->signo = signo;
+ ks->ex_vector = evector;
+ ks->err_code = ecode;
+ ks->kgdb_usethreadid = 0;
+ ks->linux_regs = regs;
+
+ if (kgdb_reenter_check(ks))
+ return 0; /* Ouch, double exception ! */
+ kgdb_info[ks->cpu].exception_state |= DCPU_WANT_MASTER;
+ ret = kgdb_cpu_enter(ks, regs);
+ kgdb_info[ks->cpu].exception_state &= ~DCPU_WANT_MASTER;
+ return ret;
+}
+
int kgdb_nmicallback(int cpu, void *regs)
{
#ifdef CONFIG_SMP
+ struct kgdb_state kgdb_var;
+ struct kgdb_state *ks = &kgdb_var;
+
+ memset(ks, 0, sizeof(struct kgdb_state));
+ ks->cpu = cpu;
+ ks->linux_regs = regs;
+
if (!atomic_read(&cpu_in_kgdb[cpu]) &&
- atomic_read(&kgdb_active) != cpu &&
- atomic_read(&cpu_in_kgdb[atomic_read(&kgdb_active)])) {
- kgdb_wait((struct pt_regs *)regs);
+ atomic_read(&kgdb_active) != -1 &&
+ atomic_read(&kgdb_active) != cpu) {
+ kgdb_info[cpu].exception_state |= DCPU_IS_SLAVE;
+ kgdb_cpu_enter(ks, regs);
+ kgdb_info[cpu].exception_state &= ~DCPU_IS_SLAVE;
return 0;
}
#endif
@@ -1742,11 +1743,11 @@ EXPORT_SYMBOL_GPL(kgdb_unregister_io_module);
*/
void kgdb_breakpoint(void)
{
- atomic_set(&kgdb_setting_breakpoint, 1);
+ atomic_inc(&kgdb_setting_breakpoint);
wmb(); /* Sync point before breakpoint */
arch_kgdb_breakpoint();
wmb(); /* Sync point after breakpoint */
- atomic_set(&kgdb_setting_breakpoint, 0);
+ atomic_dec(&kgdb_setting_breakpoint);
}
EXPORT_SYMBOL_GPL(kgdb_breakpoint);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index fa034d29cf73..0ed46f3e51e9 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -259,7 +259,8 @@ static void __kprobes __free_insn_slot(struct kprobe_insn_cache *c,
struct kprobe_insn_page *kip;
list_for_each_entry(kip, &c->pages, list) {
- long idx = ((long)slot - (long)kip->insns) / c->insn_size;
+ long idx = ((long)slot - (long)kip->insns) /
+ (c->insn_size * sizeof(kprobe_opcode_t));
if (idx >= 0 && idx < slots_per_page(c)) {
WARN_ON(kip->slot_used[idx] != SLOT_USED);
if (dirty) {
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 82ed0ea15194..83911c780175 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -219,7 +219,7 @@ int kthreadd(void *unused)
set_task_comm(tsk, "kthreadd");
ignore_signals(tsk);
set_cpus_allowed_ptr(tsk, cpu_all_mask);
- set_mems_allowed(node_possible_map);
+ set_mems_allowed(node_states[N_HIGH_MEMORY]);
current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG;
diff --git a/kernel/latencytop.c b/kernel/latencytop.c
index ca07c5c0c914..877fb306d415 100644
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -56,7 +56,6 @@
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/list.h>
-#include <linux/slab.h>
#include <linux/stacktrace.h>
static DEFINE_SPINLOCK(latency_lock);
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 0c30d0455de1..2594e1ce41cb 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -43,6 +43,7 @@
#include <linux/ftrace.h>
#include <linux/stringify.h>
#include <linux/bitops.h>
+#include <linux/gfp.h>
#include <asm/sections.h>
@@ -582,9 +583,6 @@ static int static_obj(void *obj)
unsigned long start = (unsigned long) &_stext,
end = (unsigned long) &_end,
addr = (unsigned long) obj;
-#ifdef CONFIG_SMP
- int i;
-#endif
/*
* static variable?
@@ -595,24 +593,16 @@ static int static_obj(void *obj)
if (arch_is_kernel_data(addr))
return 1;
-#ifdef CONFIG_SMP
/*
- * percpu var?
+ * in-kernel percpu var?
*/
- for_each_possible_cpu(i) {
- start = (unsigned long) &__per_cpu_start + per_cpu_offset(i);
- end = (unsigned long) &__per_cpu_start + PERCPU_ENOUGH_ROOM
- + per_cpu_offset(i);
-
- if ((addr >= start) && (addr < end))
- return 1;
- }
-#endif
+ if (is_kernel_percpu_address(addr))
+ return 1;
/*
- * module var?
+ * module static or percpu var?
*/
- return is_module_address(addr);
+ return is_module_address(addr) || is_module_percpu_address(addr);
}
/*
@@ -3211,8 +3201,6 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
{
unsigned long flags;
- trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip);
-
if (unlikely(current->lockdep_recursion))
return;
@@ -3220,6 +3208,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
check_flags(flags);
current->lockdep_recursion = 1;
+ trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip);
__lock_acquire(lock, subclass, trylock, read, check,
irqs_disabled_flags(flags), nest_lock, ip, 0);
current->lockdep_recursion = 0;
@@ -3232,14 +3221,13 @@ void lock_release(struct lockdep_map *lock, int nested,
{
unsigned long flags;
- trace_lock_release(lock, nested, ip);
-
if (unlikely(current->lockdep_recursion))
return;
raw_local_irq_save(flags);
check_flags(flags);
current->lockdep_recursion = 1;
+ trace_lock_release(lock, nested, ip);
__lock_release(lock, nested, ip);
current->lockdep_recursion = 0;
raw_local_irq_restore(flags);
@@ -3413,8 +3401,6 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
{
unsigned long flags;
- trace_lock_contended(lock, ip);
-
if (unlikely(!lock_stat))
return;
@@ -3424,6 +3410,7 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
raw_local_irq_save(flags);
check_flags(flags);
current->lockdep_recursion = 1;
+ trace_lock_contended(lock, ip);
__lock_contended(lock, ip);
current->lockdep_recursion = 0;
raw_local_irq_restore(flags);
@@ -3822,6 +3809,7 @@ void lockdep_rcu_dereference(const char *file, const int line)
printk("%s:%d invoked rcu_dereference_check() without protection!\n",
file, line);
printk("\nother info that might help us debug this:\n\n");
+ printk("\nrcu_scheduler_active = %d, debug_locks = %d\n", rcu_scheduler_active, debug_locks);
lockdep_print_held_locks(curr);
printk("\nstack backtrace:\n");
dump_stack();
diff --git a/kernel/module.c b/kernel/module.c
index c968d3606dca..1016b75b026a 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -370,27 +370,33 @@ EXPORT_SYMBOL_GPL(find_module);
#ifdef CONFIG_SMP
-static void *percpu_modalloc(unsigned long size, unsigned long align,
- const char *name)
+static inline void __percpu *mod_percpu(struct module *mod)
{
- void *ptr;
+ return mod->percpu;
+}
+static int percpu_modalloc(struct module *mod,
+ unsigned long size, unsigned long align)
+{
if (align > PAGE_SIZE) {
printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n",
- name, align, PAGE_SIZE);
+ mod->name, align, PAGE_SIZE);
align = PAGE_SIZE;
}
- ptr = __alloc_reserved_percpu(size, align);
- if (!ptr)
+ mod->percpu = __alloc_reserved_percpu(size, align);
+ if (!mod->percpu) {
printk(KERN_WARNING
"Could not allocate %lu bytes percpu data\n", size);
- return ptr;
+ return -ENOMEM;
+ }
+ mod->percpu_size = size;
+ return 0;
}
-static void percpu_modfree(void *freeme)
+static void percpu_modfree(struct module *mod)
{
- free_percpu(freeme);
+ free_percpu(mod->percpu);
}
static unsigned int find_pcpusec(Elf_Ehdr *hdr,
@@ -400,24 +406,62 @@ static unsigned int find_pcpusec(Elf_Ehdr *hdr,
return find_sec(hdr, sechdrs, secstrings, ".data.percpu");
}
-static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size)
+static void percpu_modcopy(struct module *mod,
+ const void *from, unsigned long size)
{
int cpu;
for_each_possible_cpu(cpu)
- memcpy(pcpudest + per_cpu_offset(cpu), from, size);
+ memcpy(per_cpu_ptr(mod->percpu, cpu), from, size);
+}
+
+/**
+ * is_module_percpu_address - test whether address is from module static percpu
+ * @addr: address to test
+ *
+ * Test whether @addr belongs to module static percpu area.
+ *
+ * RETURNS:
+ * %true if @addr is from module static percpu area
+ */
+bool is_module_percpu_address(unsigned long addr)
+{
+ struct module *mod;
+ unsigned int cpu;
+
+ preempt_disable();
+
+ list_for_each_entry_rcu(mod, &modules, list) {
+ if (!mod->percpu_size)
+ continue;
+ for_each_possible_cpu(cpu) {
+ void *start = per_cpu_ptr(mod->percpu, cpu);
+
+ if ((void *)addr >= start &&
+ (void *)addr < start + mod->percpu_size) {
+ preempt_enable();
+ return true;
+ }
+ }
+ }
+
+ preempt_enable();
+ return false;
}
#else /* ... !CONFIG_SMP */
-static inline void *percpu_modalloc(unsigned long size, unsigned long align,
- const char *name)
+static inline void __percpu *mod_percpu(struct module *mod)
{
return NULL;
}
-static inline void percpu_modfree(void *pcpuptr)
+static inline int percpu_modalloc(struct module *mod,
+ unsigned long size, unsigned long align)
+{
+ return -ENOMEM;
+}
+static inline void percpu_modfree(struct module *mod)
{
- BUG();
}
static inline unsigned int find_pcpusec(Elf_Ehdr *hdr,
Elf_Shdr *sechdrs,
@@ -425,12 +469,16 @@ static inline unsigned int find_pcpusec(Elf_Ehdr *hdr,
{
return 0;
}
-static inline void percpu_modcopy(void *pcpudst, const void *src,
- unsigned long size)
+static inline void percpu_modcopy(struct module *mod,
+ const void *from, unsigned long size)
{
/* pcpusec should be 0, and size of that section should be 0. */
BUG_ON(size != 0);
}
+bool is_module_percpu_address(unsigned long addr)
+{
+ return false;
+}
#endif /* CONFIG_SMP */
@@ -473,11 +521,13 @@ static void module_unload_init(struct module *mod)
int cpu;
INIT_LIST_HEAD(&mod->modules_which_use_me);
- for_each_possible_cpu(cpu)
- per_cpu_ptr(mod->refptr, cpu)->count = 0;
+ for_each_possible_cpu(cpu) {
+ per_cpu_ptr(mod->refptr, cpu)->incs = 0;
+ per_cpu_ptr(mod->refptr, cpu)->decs = 0;
+ }
/* Hold reference count during initialization. */
- __this_cpu_write(mod->refptr->count, 1);
+ __this_cpu_write(mod->refptr->incs, 1);
/* Backwards compatibility macros put refcount during init. */
mod->waiter = current;
}
@@ -616,12 +666,28 @@ static int try_stop_module(struct module *mod, int flags, int *forced)
unsigned int module_refcount(struct module *mod)
{
- unsigned int total = 0;
+ unsigned int incs = 0, decs = 0;
int cpu;
for_each_possible_cpu(cpu)
- total += per_cpu_ptr(mod->refptr, cpu)->count;
- return total;
+ decs += per_cpu_ptr(mod->refptr, cpu)->decs;
+ /*
+ * ensure the incs are added up after the decs.
+ * module_put ensures incs are visible before decs with smp_wmb.
+ *
+ * This 2-count scheme avoids the situation where the refcount
+ * for CPU0 is read, then CPU0 increments the module refcount,
+ * then CPU1 drops that refcount, then the refcount for CPU1 is
+ * read. We would record a decrement but not its corresponding
+ * increment so we would see a low count (disaster).
+ *
+ * Rare situation? But module_refcount can be preempted, and we
+ * might be tallying up 4096+ CPUs. So it is not impossible.
+ */
+ smp_rmb();
+ for_each_possible_cpu(cpu)
+ incs += per_cpu_ptr(mod->refptr, cpu)->incs;
+ return incs - decs;
}
EXPORT_SYMBOL(module_refcount);
@@ -798,10 +864,11 @@ void module_put(struct module *module)
{
if (module) {
preempt_disable();
- __this_cpu_dec(module->refptr->count);
+ smp_wmb(); /* see comment in module_refcount */
+ __this_cpu_inc(module->refptr->decs);
trace_module_put(module, _RET_IP_,
- __this_cpu_read(module->refptr->count));
+ __this_cpu_read(module->refptr->decs));
/* Maybe they're waiting for us to drop reference? */
if (unlikely(!module_is_live(module)))
wake_up_process(module->waiter);
@@ -1400,8 +1467,7 @@ static void free_module(struct module *mod)
/* This may be NULL, but that's OK */
module_free(mod, mod->module_init);
kfree(mod->args);
- if (mod->percpu)
- percpu_modfree(mod->percpu);
+ percpu_modfree(mod);
#if defined(CONFIG_MODULE_UNLOAD)
if (mod->refptr)
free_percpu(mod->refptr);
@@ -1520,7 +1586,7 @@ static int simplify_symbols(Elf_Shdr *sechdrs,
default:
/* Divert to percpu allocation if a percpu var. */
if (sym[i].st_shndx == pcpuindex)
- secbase = (unsigned long)mod->percpu;
+ secbase = (unsigned long)mod_percpu(mod);
else
secbase = sechdrs[sym[i].st_shndx].sh_addr;
sym[i].st_value += secbase;
@@ -1954,7 +2020,7 @@ static noinline struct module *load_module(void __user *umod,
unsigned int modindex, versindex, infoindex, pcpuindex;
struct module *mod;
long err = 0;
- void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
+ void *ptr = NULL; /* Stops spurious gcc warning */
unsigned long symoffs, stroffs, *strmap;
mm_segment_t old_fs;
@@ -2094,15 +2160,11 @@ static noinline struct module *load_module(void __user *umod,
if (pcpuindex) {
/* We have a special allocation for this section. */
- percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size,
- sechdrs[pcpuindex].sh_addralign,
- mod->name);
- if (!percpu) {
- err = -ENOMEM;
+ err = percpu_modalloc(mod, sechdrs[pcpuindex].sh_size,
+ sechdrs[pcpuindex].sh_addralign);
+ if (err)
goto free_mod;
- }
sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
- mod->percpu = percpu;
}
/* Determine total sizes, and put offsets in sh_entsize. For now
@@ -2317,7 +2379,7 @@ static noinline struct module *load_module(void __user *umod,
sort_extable(mod->extable, mod->extable + mod->num_exentries);
/* Finally, copy percpu area over. */
- percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr,
+ percpu_modcopy(mod, (void *)sechdrs[pcpuindex].sh_addr,
sechdrs[pcpuindex].sh_size);
add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex,
@@ -2409,8 +2471,7 @@ static noinline struct module *load_module(void __user *umod,
module_free(mod, mod->module_core);
/* mod will be freed with core. Don't access it beyond this line! */
free_percpu:
- if (percpu)
- percpu_modfree(percpu);
+ percpu_modfree(mod);
free_mod:
kfree(args);
kfree(strmap);
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 2ab67233ee8f..f74e6c00e26d 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -13,6 +13,7 @@
* Pavel Emelianov <xemul@openvz.org>
*/
+#include <linux/slab.h>
#include <linux/module.h>
#include <linux/nsproxy.h>
#include <linux/init_task.h>
diff --git a/kernel/padata.c b/kernel/padata.c
index 93caf65ff57c..fd03513c7327 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -25,6 +25,7 @@
#include <linux/padata.h>
#include <linux/mutex.h>
#include <linux/sched.h>
+#include <linux/slab.h>
#include <linux/rcupdate.h>
#define MAX_SEQ_NR INT_MAX - NR_CPUS
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index f40560b86544..2f3fbf84215a 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -15,6 +15,7 @@
#include <linux/smp.h>
#include <linux/file.h>
#include <linux/poll.h>
+#include <linux/slab.h>
#include <linux/sysfs.h>
#include <linux/dcache.h>
#include <linux/percpu.h>
@@ -56,21 +57,6 @@ static atomic_t nr_task_events __read_mostly;
*/
int sysctl_perf_event_paranoid __read_mostly = 1;
-static inline bool perf_paranoid_tracepoint_raw(void)
-{
- return sysctl_perf_event_paranoid > -1;
-}
-
-static inline bool perf_paranoid_cpu(void)
-{
- return sysctl_perf_event_paranoid > 0;
-}
-
-static inline bool perf_paranoid_kernel(void)
-{
- return sysctl_perf_event_paranoid > 1;
-}
-
int sysctl_perf_event_mlock __read_mostly = 512; /* 'free' kb per user */
/*
@@ -96,10 +82,6 @@ extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event)
void __weak hw_perf_disable(void) { barrier(); }
void __weak hw_perf_enable(void) { barrier(); }
-void __weak hw_perf_event_setup(int cpu) { barrier(); }
-void __weak hw_perf_event_setup_online(int cpu) { barrier(); }
-void __weak hw_perf_event_setup_offline(int cpu) { barrier(); }
-
int __weak
hw_perf_group_sched_in(struct perf_event *group_leader,
struct perf_cpu_context *cpuctx,
@@ -112,25 +94,15 @@ void __weak perf_event_print_debug(void) { }
static DEFINE_PER_CPU(int, perf_disable_count);
-void __perf_disable(void)
-{
- __get_cpu_var(perf_disable_count)++;
-}
-
-bool __perf_enable(void)
-{
- return !--__get_cpu_var(perf_disable_count);
-}
-
void perf_disable(void)
{
- __perf_disable();
- hw_perf_disable();
+ if (!__get_cpu_var(perf_disable_count)++)
+ hw_perf_disable();
}
void perf_enable(void)
{
- if (__perf_enable())
+ if (!--__get_cpu_var(perf_disable_count))
hw_perf_enable();
}
@@ -1193,11 +1165,9 @@ void perf_event_task_sched_out(struct task_struct *task,
struct perf_event_context *ctx = task->perf_event_ctxp;
struct perf_event_context *next_ctx;
struct perf_event_context *parent;
- struct pt_regs *regs;
int do_switch = 1;
- regs = task_pt_regs(task);
- perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, regs, 0);
+ perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
if (likely(!ctx || !cpuctx->task_ctx))
return;
@@ -1553,12 +1523,15 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
*/
if (interrupts == MAX_INTERRUPTS) {
perf_log_throttle(event, 1);
+ perf_disable();
event->pmu->unthrottle(event);
+ perf_enable();
}
if (!event->attr.freq || !event->attr.sample_freq)
continue;
+ perf_disable();
event->pmu->read(event);
now = atomic64_read(&event->count);
delta = now - hwc->freq_count_stamp;
@@ -1566,6 +1539,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
if (delta > 0)
perf_adjust_period(event, TICK_NSEC, delta);
+ perf_enable();
}
raw_spin_unlock(&ctx->lock);
}
@@ -1575,9 +1549,6 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
*/
static void rotate_ctx(struct perf_event_context *ctx)
{
- if (!ctx->nr_events)
- return;
-
raw_spin_lock(&ctx->lock);
/* Rotate the first entry last of non-pinned groups */
@@ -1590,19 +1561,28 @@ void perf_event_task_tick(struct task_struct *curr)
{
struct perf_cpu_context *cpuctx;
struct perf_event_context *ctx;
+ int rotate = 0;
if (!atomic_read(&nr_events))
return;
cpuctx = &__get_cpu_var(perf_cpu_context);
- ctx = curr->perf_event_ctxp;
+ if (cpuctx->ctx.nr_events &&
+ cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
+ rotate = 1;
- perf_disable();
+ ctx = curr->perf_event_ctxp;
+ if (ctx && ctx->nr_events && ctx->nr_events != ctx->nr_active)
+ rotate = 1;
perf_ctx_adjust_freq(&cpuctx->ctx);
if (ctx)
perf_ctx_adjust_freq(ctx);
+ if (!rotate)
+ return;
+
+ perf_disable();
cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
if (ctx)
task_ctx_sched_out(ctx, EVENT_FLEXIBLE);
@@ -1614,7 +1594,6 @@ void perf_event_task_tick(struct task_struct *curr)
cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE);
if (ctx)
task_ctx_sched_in(curr, EVENT_FLEXIBLE);
-
perf_enable();
}
@@ -2806,6 +2785,12 @@ __weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
return NULL;
}
+__weak
+void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
+{
+}
+
+
/*
* Output
*/
@@ -3391,15 +3376,23 @@ static void perf_event_task_output(struct perf_event *event,
struct perf_task_event *task_event)
{
struct perf_output_handle handle;
- int size;
struct task_struct *task = task_event->task;
- int ret;
+ unsigned long flags;
+ int size, ret;
+
+ /*
+ * If this CPU attempts to acquire an rq lock held by a CPU spinning
+ * in perf_output_lock() from interrupt context, it's game over.
+ */
+ local_irq_save(flags);
size = task_event->event_id.header.size;
ret = perf_output_begin(&handle, event, size, 0, 0);
- if (ret)
+ if (ret) {
+ local_irq_restore(flags);
return;
+ }
task_event->event_id.pid = perf_event_pid(event, task);
task_event->event_id.ppid = perf_event_pid(event, current);
@@ -3410,6 +3403,7 @@ static void perf_event_task_output(struct perf_event *event,
perf_output_put(&handle, task_event->event_id);
perf_output_end(&handle);
+ local_irq_restore(flags);
}
static int perf_event_task_match(struct perf_event *event)
@@ -4123,8 +4117,7 @@ void __perf_sw_event(u32 event_id, u64 nr, int nmi,
if (rctx < 0)
return;
- data.addr = addr;
- data.raw = NULL;
+ perf_sample_data_init(&data, addr);
do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs);
@@ -4169,11 +4162,10 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
struct perf_event *event;
u64 period;
- event = container_of(hrtimer, struct perf_event, hw.hrtimer);
+ event = container_of(hrtimer, struct perf_event, hw.hrtimer);
event->pmu->read(event);
- data.addr = 0;
- data.raw = NULL;
+ perf_sample_data_init(&data, 0);
data.period = event->hw.last_period;
regs = get_irq_regs();
/*
@@ -4335,26 +4327,20 @@ static const struct pmu perf_ops_task_clock = {
#ifdef CONFIG_EVENT_TRACING
void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
- int entry_size)
+ int entry_size, struct pt_regs *regs)
{
+ struct perf_sample_data data;
struct perf_raw_record raw = {
.size = entry_size,
.data = record,
};
- struct perf_sample_data data = {
- .addr = addr,
- .raw = &raw,
- };
-
- struct pt_regs *regs = get_irq_regs();
-
- if (!regs)
- regs = task_pt_regs(current);
+ perf_sample_data_init(&data, addr);
+ data.raw = &raw;
/* Trace events already protected against recursion */
do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
- &data, regs);
+ &data, regs);
}
EXPORT_SYMBOL_GPL(perf_tp_event);
@@ -4370,7 +4356,7 @@ static int perf_tp_event_match(struct perf_event *event,
static void tp_perf_event_destroy(struct perf_event *event)
{
- ftrace_profile_disable(event->attr.config);
+ perf_trace_disable(event->attr.config);
}
static const struct pmu *tp_perf_event_init(struct perf_event *event)
@@ -4384,7 +4370,7 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event)
!capable(CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
- if (ftrace_profile_enable(event->attr.config))
+ if (perf_trace_enable(event->attr.config))
return NULL;
event->destroy = tp_perf_event_destroy;
@@ -4463,8 +4449,7 @@ void perf_bp_event(struct perf_event *bp, void *data)
struct perf_sample_data sample;
struct pt_regs *regs = data;
- sample.raw = NULL;
- sample.addr = bp->attr.bp_addr;
+ perf_sample_data_init(&sample, bp->attr.bp_addr);
if (!perf_exclude_event(bp, regs))
perf_swevent_add(bp, 1, 1, &sample, regs);
@@ -5392,18 +5377,26 @@ int perf_event_init_task(struct task_struct *child)
return ret;
}
+static void __init perf_event_init_all_cpus(void)
+{
+ int cpu;
+ struct perf_cpu_context *cpuctx;
+
+ for_each_possible_cpu(cpu) {
+ cpuctx = &per_cpu(perf_cpu_context, cpu);
+ __perf_event_init_context(&cpuctx->ctx, NULL);
+ }
+}
+
static void __cpuinit perf_event_init_cpu(int cpu)
{
struct perf_cpu_context *cpuctx;
cpuctx = &per_cpu(perf_cpu_context, cpu);
- __perf_event_init_context(&cpuctx->ctx, NULL);
spin_lock(&perf_resource_lock);
cpuctx->max_pertask = perf_max_events - perf_reserved_percpu;
spin_unlock(&perf_resource_lock);
-
- hw_perf_event_setup(cpu);
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -5443,20 +5436,11 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
perf_event_init_cpu(cpu);
break;
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- hw_perf_event_setup_online(cpu);
- break;
-
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
perf_event_exit_cpu(cpu);
break;
- case CPU_DEAD:
- hw_perf_event_setup_offline(cpu);
- break;
-
default:
break;
}
@@ -5474,6 +5458,7 @@ static struct notifier_block __cpuinitdata perf_cpu_nb = {
void __init perf_event_init(void)
{
+ perf_event_init_all_cpus();
perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE,
(void *)(long)smp_processor_id());
perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE,
diff --git a/kernel/pid.c b/kernel/pid.c
index 86b296943e5f..aebb30d9c233 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -367,7 +367,9 @@ struct task_struct *pid_task(struct pid *pid, enum pid_type type)
struct task_struct *result = NULL;
if (pid) {
struct hlist_node *first;
- first = rcu_dereference_check(pid->tasks[type].first, rcu_read_lock_held() || lockdep_is_held(&tasklist_lock));
+ first = rcu_dereference_check(pid->tasks[type].first,
+ rcu_read_lock_held() ||
+ lockdep_tasklist_lock_is_held());
if (first)
result = hlist_entry(first, struct task_struct, pids[(type)].node);
}
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 79aac93acf99..a5aff94e1f0b 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -13,6 +13,7 @@
#include <linux/syscalls.h>
#include <linux/err.h>
#include <linux/acct.h>
+#include <linux/slab.h>
#define BITS_PER_PAGE (PAGE_SIZE*8)
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 1a22dfd42df9..bc7704b3a443 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -1061,9 +1061,9 @@ static void check_thread_timers(struct task_struct *tsk,
}
}
-static void stop_process_timers(struct task_struct *tsk)
+static void stop_process_timers(struct signal_struct *sig)
{
- struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
+ struct thread_group_cputimer *cputimer = &sig->cputimer;
unsigned long flags;
if (!cputimer->running)
@@ -1072,6 +1072,10 @@ static void stop_process_timers(struct task_struct *tsk)
spin_lock_irqsave(&cputimer->lock, flags);
cputimer->running = 0;
spin_unlock_irqrestore(&cputimer->lock, flags);
+
+ sig->cputime_expires.prof_exp = cputime_zero;
+ sig->cputime_expires.virt_exp = cputime_zero;
+ sig->cputime_expires.sched_exp = 0;
}
static u32 onecputick;
@@ -1133,7 +1137,7 @@ static void check_process_timers(struct task_struct *tsk,
list_empty(&timers[CPUCLOCK_VIRT]) &&
cputime_eq(sig->it[CPUCLOCK_VIRT].expires, cputime_zero) &&
list_empty(&timers[CPUCLOCK_SCHED])) {
- stop_process_timers(tsk);
+ stop_process_timers(sig);
return;
}
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index da5288ec2392..aa9e916da4d5 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -22,6 +22,7 @@
#include <linux/console.h>
#include <linux/cpu.h>
#include <linux/freezer.h>
+#include <linux/gfp.h>
#include <scsi/scsi_scan.h>
#include <asm/suspend.h>
diff --git a/kernel/power/hibernate_nvs.c b/kernel/power/hibernate_nvs.c
index 39ac698ef836..fdcad9ed5a7b 100644
--- a/kernel/power/hibernate_nvs.c
+++ b/kernel/power/hibernate_nvs.c
@@ -10,6 +10,7 @@
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/mm.h>
+#include <linux/slab.h>
#include <linux/suspend.h>
/*
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 5ade1bdcf366..71ae29052ab6 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -88,12 +88,11 @@ static int try_to_freeze_tasks(bool sig_only)
printk(KERN_ERR "Freezing of tasks failed after %d.%02d seconds "
"(%d tasks refusing to freeze):\n",
elapsed_csecs / 100, elapsed_csecs % 100, todo);
- show_state();
read_lock(&tasklist_lock);
do_each_thread(g, p) {
task_lock(p);
if (freezing(p) && !freezer_should_skip(p))
- printk(KERN_ERR " %s\n", p->comm);
+ sched_show_task(p);
cancel_freezing(p);
task_unlock(p);
} while_each_thread(g, p);
@@ -145,7 +144,7 @@ static void thaw_tasks(bool nosig_only)
if (nosig_only && should_send_signal(p))
continue;
- if (cgroup_frozen(p))
+ if (cgroup_freezing_or_frozen(p))
continue;
thaw_process(p);
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 830cadecbdfc..be861c26dda7 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -26,6 +26,7 @@
#include <linux/console.h>
#include <linux/highmem.h>
#include <linux/list.h>
+#include <linux/slab.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 44cce10b582d..56e7dbb8b996 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -15,6 +15,7 @@
#include <linux/console.h>
#include <linux/cpu.h>
#include <linux/syscalls.h>
+#include <linux/gfp.h>
#include "power.h"
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 1d575733d4e1..66824d71983a 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -23,6 +23,7 @@
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/pm.h>
+#include <linux/slab.h>
#include "power.h"
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 4d2289626a84..a8c96212bc1b 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -420,7 +420,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
* User space encodes device types as two-byte values,
* so we need to recode them
*/
- swdev = old_decode_dev(swap_area.dev);
+ swdev = new_decode_dev(swap_area.dev);
if (swdev) {
offset = swap_area.offset;
data->swap = swap_type_of(swdev, offset, NULL);
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index f1125c1a6321..03a7ea1579f6 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -45,6 +45,7 @@
#include <linux/mutex.h>
#include <linux/module.h>
#include <linux/kernel_stat.h>
+#include <linux/hardirq.h>
#ifdef CONFIG_DEBUG_LOCK_ALLOC
static struct lock_class_key rcu_lock_key;
@@ -66,6 +67,35 @@ EXPORT_SYMBOL_GPL(rcu_sched_lock_map);
int rcu_scheduler_active __read_mostly;
EXPORT_SYMBOL_GPL(rcu_scheduler_active);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+
+int debug_lockdep_rcu_enabled(void)
+{
+ return rcu_scheduler_active && debug_locks &&
+ current->lockdep_recursion == 0;
+}
+EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
+
+/**
+ * rcu_read_lock_bh_held - might we be in RCU-bh read-side critical section?
+ *
+ * Check for bottom half being disabled, which covers both the
+ * CONFIG_PROVE_RCU and not cases. Note that if someone uses
+ * rcu_read_lock_bh(), but then later enables BH, lockdep (if enabled)
+ * will show the situation.
+ *
+ * Check debug_lockdep_rcu_enabled() to prevent false positives during boot.
+ */
+int rcu_read_lock_bh_held(void)
+{
+ if (!debug_lockdep_rcu_enabled())
+ return 1;
+ return in_softirq();
+}
+EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
+
+#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
/*
* This function is invoked towards the end of the scheduler's initialization
* process. Before this is called, the idle task might contain
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 1439eb504c22..4a525a30e08e 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -246,12 +246,21 @@ struct rcu_data {
#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
-#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ) /* for rsp->jiffies_stall */
-#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rsp->jiffies_stall */
-#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */
- /* to take at least one */
- /* scheduling clock irq */
- /* before ratting on them. */
+
+#ifdef CONFIG_PROVE_RCU
+#define RCU_STALL_DELAY_DELTA (5 * HZ)
+#else
+#define RCU_STALL_DELAY_DELTA 0
+#endif
+
+#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ + RCU_STALL_DELAY_DELTA)
+ /* for rsp->jiffies_stall */
+#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ + RCU_STALL_DELAY_DELTA)
+ /* for rsp->jiffies_stall */
+#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */
+ /* to take at least one */
+ /* scheduling clock irq */
+ /* before ratting on them. */
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 464ad2cdee00..79b53bda8943 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -1010,6 +1010,10 @@ int rcu_needs_cpu(int cpu)
int c = 0;
int thatcpu;
+ /* Check for being in the holdoff period. */
+ if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies)
+ return rcu_needs_cpu_quick_check(cpu);
+
/* Don't bother unless we are the last non-dyntick-idle CPU. */
for_each_cpu_not(thatcpu, nohz_cpu_mask)
if (thatcpu != cpu) {
@@ -1041,10 +1045,8 @@ int rcu_needs_cpu(int cpu)
}
/* If RCU callbacks are still pending, RCU still needs this CPU. */
- if (c) {
+ if (c)
raise_softirq(RCU_SOFTIRQ);
- per_cpu(rcu_dyntick_holdoff, cpu) = jiffies;
- }
return c;
}
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index bcdabf37c40b..c7eaa37a768b 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -10,7 +10,6 @@
#include <linux/types.h>
#include <linux/parser.h>
#include <linux/fs.h>
-#include <linux/slab.h>
#include <linux/res_counter.h>
#include <linux/uaccess.h>
#include <linux/mm.h>
diff --git a/kernel/resource.c b/kernel/resource.c
index 2d5be5d9bf5f..9c358e263534 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -219,19 +219,34 @@ void release_child_resources(struct resource *r)
}
/**
- * request_resource - request and reserve an I/O or memory resource
+ * request_resource_conflict - request and reserve an I/O or memory resource
* @root: root resource descriptor
* @new: resource descriptor desired by caller
*
- * Returns 0 for success, negative error code on error.
+ * Returns 0 for success, conflict resource on error.
*/
-int request_resource(struct resource *root, struct resource *new)
+struct resource *request_resource_conflict(struct resource *root, struct resource *new)
{
struct resource *conflict;
write_lock(&resource_lock);
conflict = __request_resource(root, new);
write_unlock(&resource_lock);
+ return conflict;
+}
+
+/**
+ * request_resource - request and reserve an I/O or memory resource
+ * @root: root resource descriptor
+ * @new: resource descriptor desired by caller
+ *
+ * Returns 0 for success, negative error code on error.
+ */
+int request_resource(struct resource *root, struct resource *new)
+{
+ struct resource *conflict;
+
+ conflict = request_resource_conflict(root, new);
return conflict ? -EBUSY : 0;
}
@@ -474,25 +489,40 @@ static struct resource * __insert_resource(struct resource *parent, struct resou
}
/**
- * insert_resource - Inserts a resource in the resource tree
+ * insert_resource_conflict - Inserts resource in the resource tree
* @parent: parent of the new resource
* @new: new resource to insert
*
- * Returns 0 on success, -EBUSY if the resource can't be inserted.
+ * Returns 0 on success, conflict resource if the resource can't be inserted.
*
- * This function is equivalent to request_resource when no conflict
+ * This function is equivalent to request_resource_conflict when no conflict
* happens. If a conflict happens, and the conflicting resources
* entirely fit within the range of the new resource, then the new
* resource is inserted and the conflicting resources become children of
* the new resource.
*/
-int insert_resource(struct resource *parent, struct resource *new)
+struct resource *insert_resource_conflict(struct resource *parent, struct resource *new)
{
struct resource *conflict;
write_lock(&resource_lock);
conflict = __insert_resource(parent, new);
write_unlock(&resource_lock);
+ return conflict;
+}
+
+/**
+ * insert_resource - Inserts a resource in the resource tree
+ * @parent: parent of the new resource
+ * @new: new resource to insert
+ *
+ * Returns 0 on success, -EBUSY if the resource can't be inserted.
+ */
+int insert_resource(struct resource *parent, struct resource *new)
+{
+ struct resource *conflict;
+
+ conflict = insert_resource_conflict(parent, new);
return conflict ? -EBUSY : 0;
}
diff --git a/kernel/sched.c b/kernel/sched.c
index 150b6988de49..6af210a7de70 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -71,6 +71,7 @@
#include <linux/debugfs.h>
#include <linux/ctype.h>
#include <linux/ftrace.h>
+#include <linux/slab.h>
#include <asm/tlb.h>
#include <asm/irq_regs.h>
@@ -2359,7 +2360,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
{
int cpu, orig_cpu, this_cpu, success = 0;
unsigned long flags;
- struct rq *rq, *orig_rq;
+ struct rq *rq;
if (!sched_feat(SYNC_WAKEUPS))
wake_flags &= ~WF_SYNC;
@@ -2367,7 +2368,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
this_cpu = get_cpu();
smp_wmb();
- rq = orig_rq = task_rq_lock(p, &flags);
+ rq = task_rq_lock(p, &flags);
update_rq_clock(rq);
if (!(p->state & state))
goto out;
@@ -2650,7 +2651,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
{
unsigned long flags;
struct rq *rq;
- int cpu = get_cpu();
+ int cpu __maybe_unused = get_cpu();
#ifdef CONFIG_SMP
/*
@@ -4902,7 +4903,9 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
int ret;
cpumask_var_t mask;
- if (len < cpumask_size())
+ if ((len * BITS_PER_BYTE) < nr_cpu_ids)
+ return -EINVAL;
+ if (len & (sizeof(unsigned long)-1))
return -EINVAL;
if (!alloc_cpumask_var(&mask, GFP_KERNEL))
@@ -4910,10 +4913,12 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
ret = sched_getaffinity(pid, mask);
if (ret == 0) {
- if (copy_to_user(user_mask_ptr, mask, cpumask_size()))
+ size_t retlen = min_t(size_t, len, cpumask_size());
+
+ if (copy_to_user(user_mask_ptr, mask, retlen))
ret = -EFAULT;
else
- ret = cpumask_size();
+ ret = retlen;
}
free_cpumask_var(mask);
@@ -5383,7 +5388,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
get_task_struct(mt);
task_rq_unlock(rq, &flags);
- wake_up_process(rq->migration_thread);
+ wake_up_process(mt);
put_task_struct(mt);
wait_for_completion(&req.done);
tlb_migrate_finish(p->mm);
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
index fccf9fbb0d7b..e6871cb3fc83 100644
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -27,6 +27,7 @@
* of the License.
*/
+#include <linux/gfp.h>
#include "sched_cpupri.h"
/* Convert between a 140 based task->prio, and our 102 based cpupri */
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 67f95aada4b9..9b49db144037 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -518,8 +518,4 @@ void proc_sched_set_task(struct task_struct *p)
p->se.nr_wakeups_idle = 0;
p->sched_info.bkl_count = 0;
#endif
- p->se.sum_exec_runtime = 0;
- p->se.prev_sum_exec_runtime = 0;
- p->nvcsw = 0;
- p->nivcsw = 0;
}
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 3e1fd96c6cf9..5a5ea2cd924f 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -3476,7 +3476,7 @@ static void run_rebalance_domains(struct softirq_action *h)
static inline int on_null_domain(int cpu)
{
- return !rcu_dereference(cpu_rq(cpu)->sd);
+ return !rcu_dereference_sched(cpu_rq(cpu)->sd);
}
/*
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 5a6ed1f0990a..b5b920ae2ea7 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1146,7 +1146,12 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
if (next && next->prio < idx)
continue;
list_for_each_entry(rt_se, array->queue + idx, run_list) {
- struct task_struct *p = rt_task_of(rt_se);
+ struct task_struct *p;
+
+ if (!rt_entity_is_task(rt_se))
+ continue;
+
+ p = rt_task_of(rt_se);
if (pick_rt_task(rq, p, cpu)) {
next = p;
break;
diff --git a/kernel/slow-work.c b/kernel/slow-work.c
index 7494bbf5a270..7d3f4fa9ef4f 100644
--- a/kernel/slow-work.c
+++ b/kernel/slow-work.c
@@ -637,7 +637,7 @@ int delayed_slow_work_enqueue(struct delayed_slow_work *dwork,
goto cancelled;
/* the timer holds a reference whilst it is pending */
- ret = work->ops->get_ref(work);
+ ret = slow_work_get_ref(work);
if (ret < 0)
goto cant_get_ref;
diff --git a/kernel/slow-work.h b/kernel/slow-work.h
index 321f3c59d732..a29ebd1ef41d 100644
--- a/kernel/slow-work.h
+++ b/kernel/slow-work.h
@@ -43,28 +43,28 @@ extern void slow_work_new_thread_desc(struct slow_work *, struct seq_file *);
*/
static inline void slow_work_set_thread_pid(int id, pid_t pid)
{
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
slow_work_pids[id] = pid;
#endif
}
static inline void slow_work_mark_time(struct slow_work *work)
{
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
work->mark = CURRENT_TIME;
#endif
}
static inline void slow_work_begin_exec(int id, struct slow_work *work)
{
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
slow_work_execs[id] = work;
#endif
}
static inline void slow_work_end_exec(int id, struct slow_work *work)
{
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
write_lock(&slow_work_execs_lock);
slow_work_execs[id] = NULL;
write_unlock(&slow_work_execs_lock);
diff --git a/kernel/smp.c b/kernel/smp.c
index 9867b6bfefce..3fc697336183 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -9,6 +9,7 @@
#include <linux/module.h>
#include <linux/percpu.h>
#include <linux/init.h>
+#include <linux/gfp.h>
#include <linux/smp.h>
#include <linux/cpu.h>
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 0d4c7898ab80..4b493f67dcb5 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -155,11 +155,11 @@ void softlockup_tick(void)
* Wake up the high-prio watchdog task twice per
* threshold timespan.
*/
- if (now > touch_ts + softlockup_thresh/2)
+ if (time_after(now - softlockup_thresh/2, touch_ts))
wake_up_process(per_cpu(softlockup_watchdog, this_cpu));
/* Warn about unreasonable delays: */
- if (now <= (touch_ts + softlockup_thresh))
+ if (time_before_eq(now - softlockup_thresh, touch_ts))
return;
per_cpu(softlockup_print_ts, this_cpu) = touch_ts;
diff --git a/kernel/srcu.c b/kernel/srcu.c
index bde4295774c8..2980da3fd509 100644
--- a/kernel/srcu.c
+++ b/kernel/srcu.c
@@ -30,7 +30,6 @@
#include <linux/preempt.h>
#include <linux/rcupdate.h>
#include <linux/sched.h>
-#include <linux/slab.h>
#include <linux/smp.h>
#include <linux/srcu.h>
diff --git a/kernel/sys.c b/kernel/sys.c
index 8298878f4f71..6d1a7e0f9d5b 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -36,6 +36,7 @@
#include <linux/personality.h>
#include <linux/ptrace.h>
#include <linux/fs_struct.h>
+#include <linux/gfp.h>
#include <linux/compat.h>
#include <linux/syscalls.h>
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 8cd50d8f9bde..59030570f5ca 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -13,6 +13,7 @@
#include <linux/file.h>
#include <linux/ctype.h>
#include <linux/netdevice.h>
+#include <linux/slab.h>
#ifdef CONFIG_SYSCTL_SYSCALL
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 899ca51be5e8..11281d5792bd 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -22,6 +22,7 @@
#include <linux/delayacct.h>
#include <linux/cpumask.h>
#include <linux/percpu.h>
+#include <linux/slab.h>
#include <linux/cgroupstats.h>
#include <linux/cgroup.h>
#include <linux/fs.h>
diff --git a/kernel/time.c b/kernel/time.c
index 804798005d19..656dccfe1cbb 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -35,7 +35,6 @@
#include <linux/syscalls.h>
#include <linux/security.h>
#include <linux/fs.h>
-#include <linux/slab.h>
#include <linux/math64.h>
#include <linux/ptrace.h>
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index 0a8a213016f0..aada0e52680a 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -22,6 +22,29 @@
#include "tick-internal.h"
+/* Limit min_delta to a jiffie */
+#define MIN_DELTA_LIMIT (NSEC_PER_SEC / HZ)
+
+static int tick_increase_min_delta(struct clock_event_device *dev)
+{
+ /* Nothing to do if we already reached the limit */
+ if (dev->min_delta_ns >= MIN_DELTA_LIMIT)
+ return -ETIME;
+
+ if (dev->min_delta_ns < 5000)
+ dev->min_delta_ns = 5000;
+ else
+ dev->min_delta_ns += dev->min_delta_ns >> 1;
+
+ if (dev->min_delta_ns > MIN_DELTA_LIMIT)
+ dev->min_delta_ns = MIN_DELTA_LIMIT;
+
+ printk(KERN_WARNING "CE: %s increased min_delta_ns to %llu nsec\n",
+ dev->name ? dev->name : "?",
+ (unsigned long long) dev->min_delta_ns);
+ return 0;
+}
+
/**
* tick_program_event internal worker function
*/
@@ -37,23 +60,28 @@ int tick_dev_program_event(struct clock_event_device *dev, ktime_t expires,
if (!ret || !force)
return ret;
+ dev->retries++;
/*
- * We tried 2 times to program the device with the given
- * min_delta_ns. If that's not working then we double it
+ * We tried 3 times to program the device with the given
+ * min_delta_ns. If that's not working then we increase it
* and emit a warning.
*/
if (++i > 2) {
/* Increase the min. delta and try again */
- if (!dev->min_delta_ns)
- dev->min_delta_ns = 5000;
- else
- dev->min_delta_ns += dev->min_delta_ns >> 1;
-
- printk(KERN_WARNING
- "CE: %s increasing min_delta_ns to %llu nsec\n",
- dev->name ? dev->name : "?",
- (unsigned long long) dev->min_delta_ns << 1);
-
+ if (tick_increase_min_delta(dev)) {
+ /*
+ * Get out of the loop if min_delta_ns
+ * hit the limit already. That's
+ * better than staying here forever.
+ *
+ * We clear next_event so we have a
+ * chance that the box survives.
+ */
+ printk(KERN_WARNING
+ "CE: Reprogramming failure. Giving up\n");
+ dev->next_event.tv64 = KTIME_MAX;
+ return -ETIME;
+ }
i = 0;
}
diff --git a/kernel/time/timecompare.c b/kernel/time/timecompare.c
index 12f5c55090be..ac38fbb176cc 100644
--- a/kernel/time/timecompare.c
+++ b/kernel/time/timecompare.c
@@ -19,6 +19,7 @@
#include <linux/timecompare.h>
#include <linux/module.h>
+#include <linux/slab.h>
#include <linux/math64.h>
/*
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 16736379a9ca..39f6177fafac 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -818,7 +818,8 @@ void update_wall_time(void)
shift = min(shift, maxshift);
while (offset >= timekeeper.cycle_interval) {
offset = logarithmic_accumulation(offset, shift);
- shift--;
+ if(offset < timekeeper.cycle_interval<<shift)
+ shift--;
}
/* correct the clock when NTP error is too big */
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index bdfb8dd1050c..1a4a7dd78777 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -228,6 +228,7 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
SEQ_printf(m, " event_handler: ");
print_name_offset(m, dev->event_handler);
SEQ_printf(m, "\n");
+ SEQ_printf(m, " retries: %lu\n", dev->retries);
}
static void timer_list_show_tickdevices(struct seq_file *m)
@@ -257,7 +258,7 @@ static int timer_list_show(struct seq_file *m, void *v)
u64 now = ktime_to_ns(ktime_get());
int cpu;
- SEQ_printf(m, "Timer List Version: v0.5\n");
+ SEQ_printf(m, "Timer List Version: v0.6\n");
SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
diff --git a/kernel/timer.c b/kernel/timer.c
index c61a7949387f..aeb6a54f2771 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -39,6 +39,7 @@
#include <linux/kallsyms.h>
#include <linux/perf_event.h>
#include <linux/sched.h>
+#include <linux/slab.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -880,6 +881,7 @@ int try_to_del_timer_sync(struct timer_list *timer)
if (base->running_timer == timer)
goto out;
+ timer_stats_timer_clear_start_info(timer);
ret = 0;
if (timer_pending(timer)) {
detach_timer(timer, 1);
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index d00c6fe23f54..78edc6490038 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -52,7 +52,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_events.o
obj-$(CONFIG_EVENT_TRACING) += trace_export.o
obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
ifeq ($(CONFIG_PERF_EVENTS),y)
-obj-$(CONFIG_EVENT_TRACING) += trace_event_profile.o
+obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
endif
obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 07f945a99430..b3bc91a3f510 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -21,6 +21,7 @@
#include <linux/percpu.h>
#include <linux/init.h>
#include <linux/mutex.h>
+#include <linux/slab.h>
#include <linux/debugfs.h>
#include <linux/smp_lock.h>
#include <linux/time.h>
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 83783579378f..2404b59b3097 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -24,9 +24,11 @@
#include <linux/uaccess.h>
#include <linux/ftrace.h>
#include <linux/sysctl.h>
+#include <linux/slab.h>
#include <linux/ctype.h>
#include <linux/list.h>
#include <linux/hash.h>
+#include <linux/rcupdate.h>
#include <trace/events/sched.h>
@@ -84,22 +86,22 @@ ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-static int ftrace_set_func(unsigned long *array, int *idx, char *buffer);
-#endif
-
+/*
+ * Traverse the ftrace_list, invoking all entries. The reason that we
+ * can use rcu_dereference_raw() is that elements removed from this list
+ * are simply leaked, so there is no need to interact with a grace-period
+ * mechanism. The rcu_dereference_raw() calls are needed to handle
+ * concurrent insertions into the ftrace_list.
+ *
+ * Silly Alpha and silly pointer-speculation compiler optimizations!
+ */
static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
{
- struct ftrace_ops *op = ftrace_list;
-
- /* in case someone actually ports this to alpha! */
- read_barrier_depends();
+ struct ftrace_ops *op = rcu_dereference_raw(ftrace_list); /*see above*/
while (op != &ftrace_list_end) {
- /* silly alpha */
- read_barrier_depends();
op->func(ip, parent_ip);
- op = op->next;
+ op = rcu_dereference_raw(op->next); /*see above*/
};
}
@@ -154,8 +156,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
* the ops->next pointer is valid before another CPU sees
* the ops pointer included into the ftrace_list.
*/
- smp_wmb();
- ftrace_list = ops;
+ rcu_assign_pointer(ftrace_list, ops);
if (ftrace_enabled) {
ftrace_func_t func;
@@ -2276,6 +2277,8 @@ __setup("ftrace_filter=", set_ftrace_filter);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata;
+static int ftrace_set_func(unsigned long *array, int *idx, char *buffer);
+
static int __init set_graph_function(char *str)
{
strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE);
@@ -3351,6 +3354,7 @@ void ftrace_graph_init_task(struct task_struct *t)
{
/* Make sure we do not use the parent ret_stack */
t->ret_stack = NULL;
+ t->curr_ret_stack = -1;
if (ftrace_graph_active) {
struct ftrace_ret_stack *ret_stack;
@@ -3360,7 +3364,6 @@ void ftrace_graph_init_task(struct task_struct *t)
GFP_KERNEL);
if (!ret_stack)
return;
- t->curr_ret_stack = -1;
atomic_set(&t->tracing_graph_pause, 0);
atomic_set(&t->trace_overrun, 0);
t->ftrace_timestamp = 0;
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c
index 9f4f565b01e6..a22582a06161 100644
--- a/kernel/trace/power-traces.c
+++ b/kernel/trace/power-traces.c
@@ -9,7 +9,6 @@
#include <linux/workqueue.h>
#include <linux/sched.h>
#include <linux/module.h>
-#include <linux/slab.h>
#define CREATE_TRACE_POINTS
#include <trace/events/power.h>
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index a2f0fe951831..41ca394feb22 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -14,6 +14,7 @@
#include <linux/module.h>
#include <linux/percpu.h>
#include <linux/mutex.h>
+#include <linux/slab.h>
#include <linux/init.h>
#include <linux/hash.h>
#include <linux/list.h>
@@ -207,6 +208,14 @@ EXPORT_SYMBOL_GPL(tracing_is_on);
#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */
+#if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+# define RB_FORCE_8BYTE_ALIGNMENT 0
+# define RB_ARCH_ALIGNMENT RB_ALIGNMENT
+#else
+# define RB_FORCE_8BYTE_ALIGNMENT 1
+# define RB_ARCH_ALIGNMENT 8U
+#endif
+
/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
@@ -1201,18 +1210,19 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
for (i = 0; i < nr_pages; i++) {
if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
- return;
+ goto out;
p = cpu_buffer->pages->next;
bpage = list_entry(p, struct buffer_page, list);
list_del_init(&bpage->list);
free_buffer_page(bpage);
}
if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
- return;
+ goto out;
rb_reset_cpu(cpu_buffer);
rb_check_pages(cpu_buffer);
+out:
spin_unlock_irq(&cpu_buffer->reader_lock);
}
@@ -1229,7 +1239,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
for (i = 0; i < nr_pages; i++) {
if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
- return;
+ goto out;
p = pages->next;
bpage = list_entry(p, struct buffer_page, list);
list_del_init(&bpage->list);
@@ -1238,6 +1248,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
rb_reset_cpu(cpu_buffer);
rb_check_pages(cpu_buffer);
+out:
spin_unlock_irq(&cpu_buffer->reader_lock);
}
@@ -1547,7 +1558,7 @@ rb_update_event(struct ring_buffer_event *event,
case 0:
length -= RB_EVNT_HDR_SIZE;
- if (length > RB_MAX_SMALL_DATA)
+ if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
event->array[0] = length;
else
event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
@@ -1722,11 +1733,11 @@ static unsigned rb_calculate_event_length(unsigned length)
if (!length)
length = 1;
- if (length > RB_MAX_SMALL_DATA)
+ if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
length += sizeof(event.array[0]);
length += RB_EVNT_HDR_SIZE;
- length = ALIGN(length, RB_ALIGNMENT);
+ length = ALIGN(length, RB_ARCH_ALIGNMENT);
return length;
}
@@ -2233,12 +2244,12 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
if (ring_buffer_flags != RB_BUFFERS_ON)
return NULL;
- if (atomic_read(&buffer->record_disabled))
- return NULL;
-
/* If we are tracing schedule, we don't want to recurse */
resched = ftrace_preempt_disable();
+ if (atomic_read(&buffer->record_disabled))
+ goto out_nocheck;
+
if (trace_recursive_lock())
goto out_nocheck;
@@ -2470,11 +2481,11 @@ int ring_buffer_write(struct ring_buffer *buffer,
if (ring_buffer_flags != RB_BUFFERS_ON)
return -EBUSY;
- if (atomic_read(&buffer->record_disabled))
- return -EBUSY;
-
resched = ftrace_preempt_disable();
+ if (atomic_read(&buffer->record_disabled))
+ goto out;
+
cpu = raw_smp_processor_id();
if (!cpumask_test_cpu(cpu, buffer->cpumask))
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ed01fdba4a55..44f916a04065 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -33,10 +33,10 @@
#include <linux/kdebug.h>
#include <linux/string.h>
#include <linux/rwsem.h>
+#include <linux/slab.h>
#include <linux/ctype.h>
#include <linux/init.h>
#include <linux/poll.h>
-#include <linux/gfp.h>
#include <linux/fs.h>
#include "trace.h"
@@ -374,6 +374,21 @@ static int __init set_buf_size(char *str)
}
__setup("trace_buf_size=", set_buf_size);
+static int __init set_tracing_thresh(char *str)
+{
+ unsigned long threshhold;
+ int ret;
+
+ if (!str)
+ return 0;
+ ret = strict_strtoul(str, 0, &threshhold);
+ if (ret < 0)
+ return 0;
+ tracing_thresh = threshhold * 1000;
+ return 1;
+}
+__setup("tracing_thresh=", set_tracing_thresh);
+
unsigned long nsecs_to_usecs(unsigned long nsecs)
{
return nsecs / 1000;
@@ -579,9 +594,10 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
static arch_spinlock_t ftrace_max_lock =
(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
+unsigned long __read_mostly tracing_thresh;
+
#ifdef CONFIG_TRACER_MAX_TRACE
unsigned long __read_mostly tracing_max_latency;
-unsigned long __read_mostly tracing_thresh;
/*
* Copy the new maximum trace into the separate maximum-trace
@@ -592,7 +608,7 @@ static void
__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
{
struct trace_array_cpu *data = tr->data[cpu];
- struct trace_array_cpu *max_data = tr->data[cpu];
+ struct trace_array_cpu *max_data;
max_tr.cpu = cpu;
max_tr.time_start = data->preempt_timestamp;
@@ -602,7 +618,7 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
max_data->critical_start = data->critical_start;
max_data->critical_end = data->critical_end;
- memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
+ memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
max_data->pid = tsk->pid;
max_data->uid = task_uid(tsk);
max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
@@ -824,10 +840,10 @@ out:
mutex_unlock(&trace_types_lock);
}
-static void __tracing_reset(struct trace_array *tr, int cpu)
+static void __tracing_reset(struct ring_buffer *buffer, int cpu)
{
ftrace_disable_cpu();
- ring_buffer_reset_cpu(tr->buffer, cpu);
+ ring_buffer_reset_cpu(buffer, cpu);
ftrace_enable_cpu();
}
@@ -839,7 +855,7 @@ void tracing_reset(struct trace_array *tr, int cpu)
/* Make sure all commits have finished */
synchronize_sched();
- __tracing_reset(tr, cpu);
+ __tracing_reset(buffer, cpu);
ring_buffer_record_enable(buffer);
}
@@ -857,7 +873,7 @@ void tracing_reset_online_cpus(struct trace_array *tr)
tr->time_start = ftrace_now(tr->cpu);
for_each_online_cpu(cpu)
- __tracing_reset(tr, cpu);
+ __tracing_reset(buffer, cpu);
ring_buffer_record_enable(buffer);
}
@@ -934,6 +950,8 @@ void tracing_start(void)
goto out;
}
+ /* Prevent the buffers from switching */
+ arch_spin_lock(&ftrace_max_lock);
buffer = global_trace.buffer;
if (buffer)
@@ -943,6 +961,8 @@ void tracing_start(void)
if (buffer)
ring_buffer_record_enable(buffer);
+ arch_spin_unlock(&ftrace_max_lock);
+
ftrace_start();
out:
spin_unlock_irqrestore(&tracing_start_lock, flags);
@@ -964,6 +984,9 @@ void tracing_stop(void)
if (trace_stop_count++)
goto out;
+ /* Prevent the buffers from switching */
+ arch_spin_lock(&ftrace_max_lock);
+
buffer = global_trace.buffer;
if (buffer)
ring_buffer_record_disable(buffer);
@@ -972,6 +995,8 @@ void tracing_stop(void)
if (buffer)
ring_buffer_record_disable(buffer);
+ arch_spin_unlock(&ftrace_max_lock);
+
out:
spin_unlock_irqrestore(&tracing_start_lock, flags);
}
@@ -1259,6 +1284,13 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
return;
+ /*
+ * NMIs can not handle page faults, even with fix ups.
+ * The save user stack can (and often does) fault.
+ */
+ if (unlikely(in_nmi()))
+ return;
+
event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
sizeof(*entry), flags, pc);
if (!event)
@@ -1703,6 +1735,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
ftrace_enable_cpu();
+ iter->leftover = 0;
for (p = iter; p && l < *pos; p = s_next(m, p, &l))
;
@@ -4248,10 +4281,10 @@ static __init int tracer_init_debugfs(void)
#ifdef CONFIG_TRACER_MAX_TRACE
trace_create_file("tracing_max_latency", 0644, d_tracer,
&tracing_max_latency, &tracing_max_lat_fops);
+#endif
trace_create_file("tracing_thresh", 0644, d_tracer,
&tracing_thresh, &tracing_max_lat_fops);
-#endif
trace_create_file("README", 0444, d_tracer,
NULL, &tracing_readme_fops);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 09b39112a5e2..2825ef2c0b15 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -396,9 +396,10 @@ extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
extern unsigned long nsecs_to_usecs(unsigned long nsecs);
+extern unsigned long tracing_thresh;
+
#ifdef CONFIG_TRACER_MAX_TRACE
extern unsigned long tracing_max_latency;
-extern unsigned long tracing_thresh;
void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
void update_max_tr_single(struct trace_array *tr,
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 84a3a7ba072a..9d589d8dcd1a 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -13,6 +13,7 @@
* Tracer plugins will chose a default from these clocks.
*/
#include <linux/spinlock.h>
+#include <linux/irqflags.h>
#include <linux/hardirq.h>
#include <linux/module.h>
#include <linux/percpu.h>
@@ -83,7 +84,7 @@ u64 notrace trace_clock_global(void)
int this_cpu;
u64 now;
- raw_local_irq_save(flags);
+ local_irq_save(flags);
this_cpu = raw_smp_processor_id();
now = cpu_clock(this_cpu);
@@ -109,7 +110,7 @@ u64 notrace trace_clock_global(void)
arch_spin_unlock(&trace_clock_struct.lock);
out:
- raw_local_irq_restore(flags);
+ local_irq_restore(flags);
return now;
}
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_perf.c
index f0d693005075..0565bb42566f 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_perf.c
@@ -1,32 +1,41 @@
/*
- * trace event based perf counter profiling
+ * trace event based perf event profiling/tracing
*
* Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
- *
+ * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
*/
#include <linux/module.h>
#include <linux/kprobes.h>
#include "trace.h"
+DEFINE_PER_CPU(struct pt_regs, perf_trace_regs);
+EXPORT_PER_CPU_SYMBOL_GPL(perf_trace_regs);
+
+EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs);
static char *perf_trace_buf;
static char *perf_trace_buf_nmi;
-typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ;
+/*
+ * Force it to be aligned to unsigned long to avoid misaligned accesses
+ * suprises
+ */
+typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
+ perf_trace_t;
/* Count the events in use (per event id, not per instance) */
-static int total_profile_count;
+static int total_ref_count;
-static int ftrace_profile_enable_event(struct ftrace_event_call *event)
+static int perf_trace_event_enable(struct ftrace_event_call *event)
{
char *buf;
int ret = -ENOMEM;
- if (event->profile_count++ > 0)
+ if (event->perf_refcount++ > 0)
return 0;
- if (!total_profile_count) {
+ if (!total_ref_count) {
buf = (char *)alloc_percpu(perf_trace_t);
if (!buf)
goto fail_buf;
@@ -40,35 +49,35 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
rcu_assign_pointer(perf_trace_buf_nmi, buf);
}
- ret = event->profile_enable(event);
+ ret = event->perf_event_enable(event);
if (!ret) {
- total_profile_count++;
+ total_ref_count++;
return 0;
}
fail_buf_nmi:
- if (!total_profile_count) {
+ if (!total_ref_count) {
free_percpu(perf_trace_buf_nmi);
free_percpu(perf_trace_buf);
perf_trace_buf_nmi = NULL;
perf_trace_buf = NULL;
}
fail_buf:
- event->profile_count--;
+ event->perf_refcount--;
return ret;
}
-int ftrace_profile_enable(int event_id)
+int perf_trace_enable(int event_id)
{
struct ftrace_event_call *event;
int ret = -EINVAL;
mutex_lock(&event_mutex);
list_for_each_entry(event, &ftrace_events, list) {
- if (event->id == event_id && event->profile_enable &&
+ if (event->id == event_id && event->perf_event_enable &&
try_module_get(event->mod)) {
- ret = ftrace_profile_enable_event(event);
+ ret = perf_trace_event_enable(event);
break;
}
}
@@ -77,16 +86,16 @@ int ftrace_profile_enable(int event_id)
return ret;
}
-static void ftrace_profile_disable_event(struct ftrace_event_call *event)
+static void perf_trace_event_disable(struct ftrace_event_call *event)
{
char *buf, *nmi_buf;
- if (--event->profile_count > 0)
+ if (--event->perf_refcount > 0)
return;
- event->profile_disable(event);
+ event->perf_event_disable(event);
- if (!--total_profile_count) {
+ if (!--total_ref_count) {
buf = perf_trace_buf;
rcu_assign_pointer(perf_trace_buf, NULL);
@@ -104,14 +113,14 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event)
}
}
-void ftrace_profile_disable(int event_id)
+void perf_trace_disable(int event_id)
{
struct ftrace_event_call *event;
mutex_lock(&event_mutex);
list_for_each_entry(event, &ftrace_events, list) {
if (event->id == event_id) {
- ftrace_profile_disable_event(event);
+ perf_trace_event_disable(event);
module_put(event->mod);
break;
}
@@ -119,13 +128,15 @@ void ftrace_profile_disable(int event_id)
mutex_unlock(&event_mutex);
}
-__kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type,
- int *rctxp, unsigned long *irq_flags)
+__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
+ int *rctxp, unsigned long *irq_flags)
{
struct trace_entry *entry;
char *trace_buf, *raw_data;
int pc, cpu;
+ BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
+
pc = preempt_count();
/* Protect the per cpu buffer, begin the rcu read side */
@@ -138,9 +149,9 @@ __kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type,
cpu = smp_processor_id();
if (in_nmi())
- trace_buf = rcu_dereference(perf_trace_buf_nmi);
+ trace_buf = rcu_dereference_sched(perf_trace_buf_nmi);
else
- trace_buf = rcu_dereference(perf_trace_buf);
+ trace_buf = rcu_dereference_sched(perf_trace_buf);
if (!trace_buf)
goto err;
@@ -148,7 +159,7 @@ __kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type,
raw_data = per_cpu_ptr(trace_buf, cpu);
/* zero the dead bytes from align to not leak stack to user */
- *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+ memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
entry = (struct trace_entry *)raw_data;
tracing_generic_entry_update(entry, *irq_flags, pc);
@@ -161,4 +172,4 @@ err_recursion:
local_irq_restore(*irq_flags);
return NULL;
}
-EXPORT_SYMBOL_GPL(ftrace_perf_buf_prepare);
+EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 3f972ad98d04..c697c7043349 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -15,6 +15,7 @@
#include <linux/uaccess.h>
#include <linux/module.h>
#include <linux/ctype.h>
+#include <linux/slab.h>
#include <linux/delay.h>
#include <asm/setup.h>
@@ -938,7 +939,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
trace_create_file("enable", 0644, call->dir, call,
enable);
- if (call->id && call->profile_enable)
+ if (call->id && call->perf_event_enable)
trace_create_file("id", 0444, call->dir, call,
id);
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 4615f62a04f1..88c0b6dbd7fe 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -22,6 +22,7 @@
#include <linux/ctype.h>
#include <linux/mutex.h>
#include <linux/perf_event.h>
+#include <linux/slab.h>
#include "trace.h"
#include "trace_output.h"
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 3fc2a575664f..9aed1a5cf553 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -9,6 +9,7 @@
#include <linux/debugfs.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
+#include <linux/slab.h>
#include <linux/fs.h>
#include "trace.h"
@@ -237,6 +238,14 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
return ret;
}
+int trace_graph_thresh_entry(struct ftrace_graph_ent *trace)
+{
+ if (tracing_thresh)
+ return 1;
+ else
+ return trace_graph_entry(trace);
+}
+
static void __trace_graph_return(struct trace_array *tr,
struct ftrace_graph_ret *trace,
unsigned long flags,
@@ -290,13 +299,26 @@ void set_graph_array(struct trace_array *tr)
smp_mb();
}
+void trace_graph_thresh_return(struct ftrace_graph_ret *trace)
+{
+ if (tracing_thresh &&
+ (trace->rettime - trace->calltime < tracing_thresh))
+ return;
+ else
+ trace_graph_return(trace);
+}
+
static int graph_trace_init(struct trace_array *tr)
{
int ret;
set_graph_array(tr);
- ret = register_ftrace_graph(&trace_graph_return,
- &trace_graph_entry);
+ if (tracing_thresh)
+ ret = register_ftrace_graph(&trace_graph_thresh_return,
+ &trace_graph_thresh_entry);
+ else
+ ret = register_ftrace_graph(&trace_graph_return,
+ &trace_graph_entry);
if (ret)
return ret;
tracing_start_cmdline_record();
@@ -920,7 +942,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
} else {
- ret = trace_seq_printf(s, "} (%ps)\n", (void *)trace->func);
+ ret = trace_seq_printf(s, "} /* %ps */\n", (void *)trace->func);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
}
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 505c92273b1a..1251e367bae9 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1214,7 +1214,7 @@ static int set_print_fmt(struct trace_probe *tp)
#ifdef CONFIG_PERF_EVENTS
/* Kprobe profile handler */
-static __kprobes void kprobe_profile_func(struct kprobe *kp,
+static __kprobes void kprobe_perf_func(struct kprobe *kp,
struct pt_regs *regs)
{
struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
@@ -1227,11 +1227,11 @@ static __kprobes void kprobe_profile_func(struct kprobe *kp,
__size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
size = ALIGN(__size + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
- if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
+ if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
"profile buffer not large enough"))
return;
- entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags);
+ entry = perf_trace_buf_prepare(size, call->id, &rctx, &irq_flags);
if (!entry)
return;
@@ -1240,11 +1240,11 @@ static __kprobes void kprobe_profile_func(struct kprobe *kp,
for (i = 0; i < tp->nr_args; i++)
entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
- ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags);
+ perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags, regs);
}
/* Kretprobe profile handler */
-static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri,
+static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
@@ -1257,11 +1257,11 @@ static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri,
__size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
size = ALIGN(__size + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
- if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
+ if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
"profile buffer not large enough"))
return;
- entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags);
+ entry = perf_trace_buf_prepare(size, call->id, &rctx, &irq_flags);
if (!entry)
return;
@@ -1271,10 +1271,11 @@ static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri,
for (i = 0; i < tp->nr_args; i++)
entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
- ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1, irq_flags);
+ perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1,
+ irq_flags, regs);
}
-static int probe_profile_enable(struct ftrace_event_call *call)
+static int probe_perf_enable(struct ftrace_event_call *call)
{
struct trace_probe *tp = (struct trace_probe *)call->data;
@@ -1286,7 +1287,7 @@ static int probe_profile_enable(struct ftrace_event_call *call)
return enable_kprobe(&tp->rp.kp);
}
-static void probe_profile_disable(struct ftrace_event_call *call)
+static void probe_perf_disable(struct ftrace_event_call *call)
{
struct trace_probe *tp = (struct trace_probe *)call->data;
@@ -1311,7 +1312,7 @@ int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
kprobe_trace_func(kp, regs);
#ifdef CONFIG_PERF_EVENTS
if (tp->flags & TP_FLAG_PROFILE)
- kprobe_profile_func(kp, regs);
+ kprobe_perf_func(kp, regs);
#endif
return 0; /* We don't tweek kernel, so just return 0 */
}
@@ -1325,7 +1326,7 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
kretprobe_trace_func(ri, regs);
#ifdef CONFIG_PERF_EVENTS
if (tp->flags & TP_FLAG_PROFILE)
- kretprobe_profile_func(ri, regs);
+ kretprobe_perf_func(ri, regs);
#endif
return 0; /* We don't tweek kernel, so just return 0 */
}
@@ -1358,8 +1359,8 @@ static int register_probe_event(struct trace_probe *tp)
call->unregfunc = probe_event_disable;
#ifdef CONFIG_PERF_EVENTS
- call->profile_enable = probe_profile_enable;
- call->profile_disable = probe_profile_disable;
+ call->perf_event_enable = probe_perf_enable;
+ call->perf_event_disable = probe_perf_disable;
#endif
call->data = tp;
ret = trace_add_event_call(call);
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
index 94103cdcf9d8..d59cd6879477 100644
--- a/kernel/trace/trace_ksym.c
+++ b/kernel/trace/trace_ksym.c
@@ -23,6 +23,7 @@
#include <linux/debugfs.h>
#include <linux/ftrace.h>
#include <linux/module.h>
+#include <linux/slab.h>
#include <linux/fs.h>
#include "trace_output.h"
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 0acd834659ed..017fa376505d 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/mmiotrace.h>
#include <linux/pci.h>
+#include <linux/slab.h>
#include <linux/time.h>
#include <asm/atomic.h>
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 280fea470d67..81003b4d617f 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -3,6 +3,7 @@
#include <linux/stringify.h>
#include <linux/kthread.h>
#include <linux/delay.h>
+#include <linux/slab.h>
static inline int trace_valid_entry(struct trace_entry *entry)
{
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index a4bb239eb987..96cffb269e73 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -10,6 +10,7 @@
#include <linux/list.h>
+#include <linux/slab.h>
#include <linux/rbtree.h>
#include <linux/debugfs.h>
#include "trace_stat.h"
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index cba47d7935cc..4d6d711717f2 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -1,5 +1,6 @@
#include <trace/syscall.h>
#include <trace/events/syscalls.h>
+#include <linux/slab.h>
#include <linux/kernel.h>
#include <linux/ftrace.h>
#include <linux/perf_event.h>
@@ -428,12 +429,12 @@ core_initcall(init_ftrace_syscalls);
#ifdef CONFIG_PERF_EVENTS
-static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls);
-static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls);
-static int sys_prof_refcount_enter;
-static int sys_prof_refcount_exit;
+static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls);
+static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
+static int sys_perf_refcount_enter;
+static int sys_perf_refcount_exit;
-static void prof_syscall_enter(struct pt_regs *regs, long id)
+static void perf_syscall_enter(struct pt_regs *regs, long id)
{
struct syscall_metadata *sys_data;
struct syscall_trace_enter *rec;
@@ -443,7 +444,7 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
int size;
syscall_nr = syscall_get_nr(current, regs);
- if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
+ if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
return;
sys_data = syscall_nr_to_meta(syscall_nr);
@@ -455,11 +456,11 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
size = ALIGN(size + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
- if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
- "profile buffer not large enough"))
+ if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
+ "perf buffer not large enough"))
return;
- rec = (struct syscall_trace_enter *)ftrace_perf_buf_prepare(size,
+ rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
sys_data->enter_event->id, &rctx, &flags);
if (!rec)
return;
@@ -467,10 +468,10 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
rec->nr = syscall_nr;
syscall_get_arguments(current, regs, 0, sys_data->nb_args,
(unsigned long *)&rec->args);
- ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags);
+ perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs);
}
-int prof_sysenter_enable(struct ftrace_event_call *call)
+int perf_sysenter_enable(struct ftrace_event_call *call)
{
int ret = 0;
int num;
@@ -478,34 +479,34 @@ int prof_sysenter_enable(struct ftrace_event_call *call)
num = ((struct syscall_metadata *)call->data)->syscall_nr;
mutex_lock(&syscall_trace_lock);
- if (!sys_prof_refcount_enter)
- ret = register_trace_sys_enter(prof_syscall_enter);
+ if (!sys_perf_refcount_enter)
+ ret = register_trace_sys_enter(perf_syscall_enter);
if (ret) {
pr_info("event trace: Could not activate"
"syscall entry trace point");
} else {
- set_bit(num, enabled_prof_enter_syscalls);
- sys_prof_refcount_enter++;
+ set_bit(num, enabled_perf_enter_syscalls);
+ sys_perf_refcount_enter++;
}
mutex_unlock(&syscall_trace_lock);
return ret;
}
-void prof_sysenter_disable(struct ftrace_event_call *call)
+void perf_sysenter_disable(struct ftrace_event_call *call)
{
int num;
num = ((struct syscall_metadata *)call->data)->syscall_nr;
mutex_lock(&syscall_trace_lock);
- sys_prof_refcount_enter--;
- clear_bit(num, enabled_prof_enter_syscalls);
- if (!sys_prof_refcount_enter)
- unregister_trace_sys_enter(prof_syscall_enter);
+ sys_perf_refcount_enter--;
+ clear_bit(num, enabled_perf_enter_syscalls);
+ if (!sys_perf_refcount_enter)
+ unregister_trace_sys_enter(perf_syscall_enter);
mutex_unlock(&syscall_trace_lock);
}
-static void prof_syscall_exit(struct pt_regs *regs, long ret)
+static void perf_syscall_exit(struct pt_regs *regs, long ret)
{
struct syscall_metadata *sys_data;
struct syscall_trace_exit *rec;
@@ -515,7 +516,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
int size;
syscall_nr = syscall_get_nr(current, regs);
- if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
+ if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
return;
sys_data = syscall_nr_to_meta(syscall_nr);
@@ -530,11 +531,11 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
* Impossible, but be paranoid with the future
* How to put this check outside runtime?
*/
- if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
- "exit event has grown above profile buffer size"))
+ if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
+ "exit event has grown above perf buffer size"))
return;
- rec = (struct syscall_trace_exit *)ftrace_perf_buf_prepare(size,
+ rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
sys_data->exit_event->id, &rctx, &flags);
if (!rec)
return;
@@ -542,10 +543,10 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
rec->nr = syscall_nr;
rec->ret = syscall_get_return_value(current, regs);
- ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags);
+ perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs);
}
-int prof_sysexit_enable(struct ftrace_event_call *call)
+int perf_sysexit_enable(struct ftrace_event_call *call)
{
int ret = 0;
int num;
@@ -553,30 +554,30 @@ int prof_sysexit_enable(struct ftrace_event_call *call)
num = ((struct syscall_metadata *)call->data)->syscall_nr;
mutex_lock(&syscall_trace_lock);
- if (!sys_prof_refcount_exit)
- ret = register_trace_sys_exit(prof_syscall_exit);
+ if (!sys_perf_refcount_exit)
+ ret = register_trace_sys_exit(perf_syscall_exit);
if (ret) {
pr_info("event trace: Could not activate"
"syscall exit trace point");
} else {
- set_bit(num, enabled_prof_exit_syscalls);
- sys_prof_refcount_exit++;
+ set_bit(num, enabled_perf_exit_syscalls);
+ sys_perf_refcount_exit++;
}
mutex_unlock(&syscall_trace_lock);
return ret;
}
-void prof_sysexit_disable(struct ftrace_event_call *call)
+void perf_sysexit_disable(struct ftrace_event_call *call)
{
int num;
num = ((struct syscall_metadata *)call->data)->syscall_nr;
mutex_lock(&syscall_trace_lock);
- sys_prof_refcount_exit--;
- clear_bit(num, enabled_prof_exit_syscalls);
- if (!sys_prof_refcount_exit)
- unregister_trace_sys_exit(prof_syscall_exit);
+ sys_perf_refcount_exit--;
+ clear_bit(num, enabled_perf_exit_syscalls);
+ if (!sys_perf_refcount_exit)
+ unregister_trace_sys_exit(perf_syscall_exit);
mutex_unlock(&syscall_trace_lock);
}
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index 40cafb07dffd..cc2d2faa7d9e 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -9,6 +9,7 @@
#include <trace/events/workqueue.h>
#include <linux/list.h>
#include <linux/percpu.h>
+#include <linux/slab.h>
#include <linux/kref.h>
#include "trace_stat.h"
#include "trace.h"
OpenPOWER on IntegriCloud