summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c58
-rw-r--r--kernel/cpuset.c20
-rw-r--r--kernel/events/core.c2
-rw-r--r--kernel/power/process.c1
-rw-r--r--kernel/power/suspend.c4
-rw-r--r--kernel/trace/ftrace.c4
-rw-r--r--kernel/trace/ring_buffer.c4
-rw-r--r--kernel/trace/trace.c18
-rw-r--r--kernel/trace/trace_events.c1
-rw-r--r--kernel/workqueue.c3
10 files changed, 94 insertions, 21 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 7868fc3c0bc5..70776aec2562 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1648,10 +1648,13 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
int flags, const char *unused_dev_name,
void *data)
{
+ struct super_block *pinned_sb = NULL;
+ struct cgroup_subsys *ss;
struct cgroup_root *root;
struct cgroup_sb_opts opts;
struct dentry *dentry;
int ret;
+ int i;
bool new_sb;
/*
@@ -1677,6 +1680,27 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
goto out_unlock;
}
+ /*
+ * Destruction of cgroup root is asynchronous, so subsystems may
+ * still be dying after the previous unmount. Let's drain the
+ * dying subsystems. We just need to ensure that the ones
+ * unmounted previously finish dying and don't care about new ones
+ * starting. Testing ref liveliness is good enough.
+ */
+ for_each_subsys(ss, i) {
+ if (!(opts.subsys_mask & (1 << i)) ||
+ ss->root == &cgrp_dfl_root)
+ continue;
+
+ if (!percpu_ref_tryget_live(&ss->root->cgrp.self.refcnt)) {
+ mutex_unlock(&cgroup_mutex);
+ msleep(10);
+ ret = restart_syscall();
+ goto out_free;
+ }
+ cgroup_put(&ss->root->cgrp);
+ }
+
for_each_root(root) {
bool name_match = false;
@@ -1717,15 +1741,23 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
}
/*
- * A root's lifetime is governed by its root cgroup.
- * tryget_live failure indicate that the root is being
- * destroyed. Wait for destruction to complete so that the
- * subsystems are free. We can use wait_queue for the wait
- * but this path is super cold. Let's just sleep for a bit
- * and retry.
+ * We want to reuse @root whose lifetime is governed by its
+ * ->cgrp. Let's check whether @root is alive and keep it
+ * that way. As cgroup_kill_sb() can happen anytime, we
+ * want to block it by pinning the sb so that @root doesn't
+ * get killed before mount is complete.
+ *
+ * With the sb pinned, tryget_live can reliably indicate
+ * whether @root can be reused. If it's being killed,
+ * drain it. We can use wait_queue for the wait but this
+ * path is super cold. Let's just sleep a bit and retry.
*/
- if (!percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
+ pinned_sb = kernfs_pin_sb(root->kf_root, NULL);
+ if (IS_ERR(pinned_sb) ||
+ !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
mutex_unlock(&cgroup_mutex);
+ if (!IS_ERR_OR_NULL(pinned_sb))
+ deactivate_super(pinned_sb);
msleep(10);
ret = restart_syscall();
goto out_free;
@@ -1770,6 +1802,16 @@ out_free:
CGROUP_SUPER_MAGIC, &new_sb);
if (IS_ERR(dentry) || !new_sb)
cgroup_put(&root->cgrp);
+
+ /*
+ * If @pinned_sb, we're reusing an existing root and holding an
+ * extra ref on its sb. Mount is complete. Put the extra ref.
+ */
+ if (pinned_sb) {
+ WARN_ON(new_sb);
+ deactivate_super(pinned_sb);
+ }
+
return dentry;
}
@@ -3328,7 +3370,7 @@ bool css_has_online_children(struct cgroup_subsys_state *css)
rcu_read_lock();
css_for_each_child(child, css) {
- if (css->flags & CSS_ONLINE) {
+ if (child->flags & CSS_ONLINE) {
ret = true;
break;
}
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index f6b33c696224..116a4164720a 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1181,7 +1181,13 @@ done:
int current_cpuset_is_being_rebound(void)
{
- return task_cs(current) == cpuset_being_rebound;
+ int ret;
+
+ rcu_read_lock();
+ ret = task_cs(current) == cpuset_being_rebound;
+ rcu_read_unlock();
+
+ return ret;
}
static int update_relax_domain_level(struct cpuset *cs, s64 val)
@@ -1617,7 +1623,17 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
* resources, wait for the previously scheduled operations before
* proceeding, so that we don't end up keep removing tasks added
* after execution capability is restored.
+ *
+ * cpuset_hotplug_work calls back into cgroup core via
+ * cgroup_transfer_tasks() and waiting for it from a cgroupfs
+ * operation like this one can lead to a deadlock through kernfs
+ * active_ref protection. Let's break the protection. Losing the
+ * protection is okay as we check whether @cs is online after
+ * grabbing cpuset_mutex anyway. This only happens on the legacy
+ * hierarchies.
*/
+ css_get(&cs->css);
+ kernfs_break_active_protection(of->kn);
flush_work(&cpuset_hotplug_work);
mutex_lock(&cpuset_mutex);
@@ -1645,6 +1661,8 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
free_trial_cpuset(trialcs);
out_unlock:
mutex_unlock(&cpuset_mutex);
+ kernfs_unbreak_active_protection(of->kn);
+ css_put(&cs->css);
return retval ?: nbytes;
}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index a33d9a2bcbd7..b0c95f0f06fd 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2320,7 +2320,7 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
next_parent = rcu_dereference(next_ctx->parent_ctx);
/* If neither context have a parent context; they cannot be clones. */
- if (!parent && !next_parent)
+ if (!parent || !next_parent)
goto unlock;
if (next_parent == ctx || next_ctx == parent || next_parent == parent) {
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 0ca8d83e2369..4ee194eb524b 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -186,6 +186,7 @@ void thaw_processes(void)
printk("Restarting tasks ... ");
+ __usermodehelper_set_disable_depth(UMH_FREEZING);
thaw_workqueues();
read_lock(&tasklist_lock);
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 4dd8822f732a..ed35a4790afe 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -306,7 +306,7 @@ int suspend_devices_and_enter(suspend_state_t state)
error = suspend_ops->begin(state);
if (error)
goto Close;
- } else if (state == PM_SUSPEND_FREEZE && freeze_ops->begin) {
+ } else if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->begin) {
error = freeze_ops->begin();
if (error)
goto Close;
@@ -335,7 +335,7 @@ int suspend_devices_and_enter(suspend_state_t state)
Close:
if (need_suspend_ops(state) && suspend_ops->end)
suspend_ops->end();
- else if (state == PM_SUSPEND_FREEZE && freeze_ops->end)
+ else if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->end)
freeze_ops->end();
return error;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 5b372e3ed675..ac9d1dad630b 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -265,12 +265,12 @@ static void update_ftrace_function(void)
func = ftrace_ops_list_func;
}
+ update_function_graph_func();
+
/* If there's no change, then do nothing more here */
if (ftrace_trace_function == func)
return;
- update_function_graph_func();
-
/*
* If we are using the list function, it doesn't care
* about the function_trace_ops.
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 7c56c3d06943..ff7027199a9a 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -616,10 +616,6 @@ int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
struct ring_buffer_per_cpu *cpu_buffer;
struct rb_irq_work *work;
- if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
- (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
- return POLLIN | POLLRDNORM;
-
if (cpu == RING_BUFFER_ALL_CPUS)
work = &buffer->irq_work;
else {
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index f243444a3772..bda9621638cc 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -466,6 +466,12 @@ int __trace_puts(unsigned long ip, const char *str, int size)
struct print_entry *entry;
unsigned long irq_flags;
int alloc;
+ int pc;
+
+ if (!(trace_flags & TRACE_ITER_PRINTK))
+ return 0;
+
+ pc = preempt_count();
if (unlikely(tracing_selftest_running || tracing_disabled))
return 0;
@@ -475,7 +481,7 @@ int __trace_puts(unsigned long ip, const char *str, int size)
local_save_flags(irq_flags);
buffer = global_trace.trace_buffer.buffer;
event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
- irq_flags, preempt_count());
+ irq_flags, pc);
if (!event)
return 0;
@@ -492,6 +498,7 @@ int __trace_puts(unsigned long ip, const char *str, int size)
entry->buf[size] = '\0';
__buffer_unlock_commit(buffer, event);
+ ftrace_trace_stack(buffer, irq_flags, 4, pc);
return size;
}
@@ -509,6 +516,12 @@ int __trace_bputs(unsigned long ip, const char *str)
struct bputs_entry *entry;
unsigned long irq_flags;
int size = sizeof(struct bputs_entry);
+ int pc;
+
+ if (!(trace_flags & TRACE_ITER_PRINTK))
+ return 0;
+
+ pc = preempt_count();
if (unlikely(tracing_selftest_running || tracing_disabled))
return 0;
@@ -516,7 +529,7 @@ int __trace_bputs(unsigned long ip, const char *str)
local_save_flags(irq_flags);
buffer = global_trace.trace_buffer.buffer;
event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
- irq_flags, preempt_count());
+ irq_flags, pc);
if (!event)
return 0;
@@ -525,6 +538,7 @@ int __trace_bputs(unsigned long ip, const char *str)
entry->str = str;
__buffer_unlock_commit(buffer, event);
+ ftrace_trace_stack(buffer, irq_flags, 4, pc);
return 1;
}
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index f99e0b3bca8c..2de53628689f 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -470,6 +470,7 @@ static void remove_event_file_dir(struct ftrace_event_file *file)
list_del(&file->list);
remove_subsystem(file->system);
+ free_event_filter(file->filter);
kmem_cache_free(file_cachep, file);
}
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 6203d2900877..35974ac69600 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3284,6 +3284,7 @@ int workqueue_sysfs_register(struct workqueue_struct *wq)
}
}
+ dev_set_uevent_suppress(&wq_dev->dev, false);
kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
return 0;
}
@@ -4879,7 +4880,7 @@ static void __init wq_numa_init(void)
BUG_ON(!tbl);
for_each_node(node)
- BUG_ON(!alloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
+ BUG_ON(!zalloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
node_online(node) ? node : NUMA_NO_NODE));
for_each_possible_cpu(cpu) {
OpenPOWER on IntegriCloud