summaryrefslogtreecommitdiffstats
path: root/kernel/stop_machine.c
diff options
context:
space:
mode:
authorRik van Riel <riel@redhat.com>2013-11-01 10:41:46 -0400
committerIngo Molnar <mingo@kernel.org>2013-11-11 12:43:38 +0100
commit7053ea1a34fa8567cb5e3c39e04ace4c5d0fbeaa (patch)
treeed5866f5a2449e6d7b918ce92858f891c8bc1450 /kernel/stop_machine.c
parent37dc6b50cee97954c4e6edcd5b1fa614b76038ee (diff)
downloadtalos-op-linux-7053ea1a34fa8567cb5e3c39e04ace4c5d0fbeaa.tar.gz
talos-op-linux-7053ea1a34fa8567cb5e3c39e04ace4c5d0fbeaa.zip
stop_machine: Fix race between stop_two_cpus() and stop_cpus()
There is a race between stop_two_cpus, and the global stop_cpus. It is possible for two CPUs to get their stopper functions queued "backwards" from one another, resulting in the stopper threads getting stuck, and the system hanging. This can happen because queuing up stoppers is not synchronized. This patch adds synchronization between stop_cpus (a rare operation), and stop_two_cpus. Reported-and-Tested-by: Prarit Bhargava <prarit@redhat.com> Signed-off-by: Rik van Riel <riel@redhat.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Acked-by: Mel Gorman <mgorman@suse.de> Link: http://lkml.kernel.org/r/20131101104146.03d1e043@annuminas.surriel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/stop_machine.c')
-rw-r--r--kernel/stop_machine.c15
1 files changed, 13 insertions, 2 deletions
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index c530bc5be7cf..84571e09c907 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -20,6 +20,7 @@
#include <linux/kallsyms.h>
#include <linux/smpboot.h>
#include <linux/atomic.h>
+#include <linux/lglock.h>
/*
* Structure to determine completion condition and record errors. May
@@ -43,6 +44,14 @@ static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
static DEFINE_PER_CPU(struct task_struct *, cpu_stopper_task);
static bool stop_machine_initialized = false;
+/*
+ * Avoids a race between stop_two_cpus and global stop_cpus, where
+ * the stoppers could get queued up in reverse order, leading to
+ * system deadlock. Using an lglock means stop_two_cpus remains
+ * relatively cheap.
+ */
+DEFINE_STATIC_LGLOCK(stop_cpus_lock);
+
static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
{
memset(done, 0, sizeof(*done));
@@ -276,6 +285,7 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
return -ENOENT;
}
+ lg_local_lock(&stop_cpus_lock);
/*
* Queuing needs to be done by the lowest numbered CPU, to ensure
* that works are always queued in the same order on every CPU.
@@ -284,6 +294,7 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
smp_call_function_single(min(cpu1, cpu2),
&irq_cpu_stop_queue_work,
&call_args, 0);
+ lg_local_unlock(&stop_cpus_lock);
preempt_enable();
wait_for_completion(&done.completion);
@@ -335,10 +346,10 @@ static void queue_stop_cpus_work(const struct cpumask *cpumask,
* preempted by a stopper which might wait for other stoppers
* to enter @fn which can lead to deadlock.
*/
- preempt_disable();
+ lg_global_lock(&stop_cpus_lock);
for_each_cpu(cpu, cpumask)
cpu_stop_queue_work(cpu, &per_cpu(stop_cpus_work, cpu));
- preempt_enable();
+ lg_global_unlock(&stop_cpus_lock);
}
static int __stop_cpus(const struct cpumask *cpumask,
OpenPOWER on IntegriCloud