summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2008-07-16 11:07:59 +1000
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2008-07-16 11:07:59 +1000
commit84c3d4aaec3338201b449034beac41635866bddf (patch)
tree3412951682fb2dd4feb8a5532f8efbaf8b345933 /kernel
parent43d2548bb2ef7e6d753f91468a746784041e522d (diff)
parentfafa3a3f16723997f039a0193997464d66dafd8f (diff)
downloadblackbird-op-linux-84c3d4aaec3338201b449034beac41635866bddf.tar.gz
blackbird-op-linux-84c3d4aaec3338201b449034beac41635866bddf.zip
Merge commit 'origin/master'
Manual merge of: arch/powerpc/Kconfig arch/powerpc/kernel/stacktrace.c arch/powerpc/mm/slice.c arch/ppc/kernel/smp.c
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/backtracetest.c65
-rw-r--r--kernel/hrtimer.c9
-rw-r--r--kernel/irq/manage.c33
-rw-r--r--kernel/irq/proc.c59
-rw-r--r--kernel/pid.c1
-rw-r--r--kernel/posix-cpu-timers.c3
-rw-r--r--kernel/profile.c6
-rw-r--r--kernel/rcuclassic.c34
-rw-r--r--kernel/rcupdate.c71
-rw-r--r--kernel/rcupreempt.c418
-rw-r--r--kernel/rcupreempt_trace.c1
-rw-r--r--kernel/rcutorture.c174
-rw-r--r--kernel/smp.c383
-rw-r--r--kernel/softirq.c4
-rw-r--r--kernel/stacktrace.c14
-rw-r--r--kernel/sysctl.c13
-rw-r--r--kernel/time/tick-broadcast.c2
-rw-r--r--kernel/time/tick-sched.c8
19 files changed, 1182 insertions, 117 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index f6328e16dfdd..0a7ed838984b 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -39,6 +39,7 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
+obj-$(CONFIG_USE_GENERIC_SMP_HELPERS) += smp.o
obj-$(CONFIG_SMP) += spinlock.o
obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
diff --git a/kernel/backtracetest.c b/kernel/backtracetest.c
index d1a7605c5b8f..a5e026bc45c4 100644
--- a/kernel/backtracetest.c
+++ b/kernel/backtracetest.c
@@ -10,30 +10,73 @@
* of the License.
*/
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/sched.h>
-#include <linux/delay.h>
+#include <linux/stacktrace.h>
+
+static void backtrace_test_normal(void)
+{
+ printk("Testing a backtrace from process context.\n");
+ printk("The following trace is a kernel self test and not a bug!\n");
-static struct timer_list backtrace_timer;
+ dump_stack();
+}
-static void backtrace_test_timer(unsigned long data)
+static DECLARE_COMPLETION(backtrace_work);
+
+static void backtrace_test_irq_callback(unsigned long data)
+{
+ dump_stack();
+ complete(&backtrace_work);
+}
+
+static DECLARE_TASKLET(backtrace_tasklet, &backtrace_test_irq_callback, 0);
+
+static void backtrace_test_irq(void)
{
printk("Testing a backtrace from irq context.\n");
printk("The following trace is a kernel self test and not a bug!\n");
- dump_stack();
+
+ init_completion(&backtrace_work);
+ tasklet_schedule(&backtrace_tasklet);
+ wait_for_completion(&backtrace_work);
+}
+
+#ifdef CONFIG_STACKTRACE
+static void backtrace_test_saved(void)
+{
+ struct stack_trace trace;
+ unsigned long entries[8];
+
+ printk("Testing a saved backtrace.\n");
+ printk("The following trace is a kernel self test and not a bug!\n");
+
+ trace.nr_entries = 0;
+ trace.max_entries = ARRAY_SIZE(entries);
+ trace.entries = entries;
+ trace.skip = 0;
+
+ save_stack_trace(&trace);
+ print_stack_trace(&trace, 0);
+}
+#else
+static void backtrace_test_saved(void)
+{
+ printk("Saved backtrace test skipped.\n");
}
+#endif
+
static int backtrace_regression_test(void)
{
printk("====[ backtrace testing ]===========\n");
- printk("Testing a backtrace from process context.\n");
- printk("The following trace is a kernel self test and not a bug!\n");
- dump_stack();
- init_timer(&backtrace_timer);
- backtrace_timer.function = backtrace_test_timer;
- mod_timer(&backtrace_timer, jiffies + 10);
+ backtrace_test_normal();
+ backtrace_test_irq();
+ backtrace_test_saved();
- msleep(10);
printk("====[ end of backtrace testing ]====\n");
return 0;
}
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 27a83ee41443..b8e4dce80a74 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -300,11 +300,10 @@ EXPORT_SYMBOL_GPL(ktime_sub_ns);
*/
u64 ktime_divns(const ktime_t kt, s64 div)
{
- u64 dclc, inc, dns;
+ u64 dclc;
int sft = 0;
- dclc = dns = ktime_to_ns(kt);
- inc = div;
+ dclc = ktime_to_ns(kt);
/* Make sure the divisor is less than 2^32: */
while (div >> 32) {
sft++;
@@ -623,7 +622,7 @@ static void retrigger_next_event(void *arg)
void clock_was_set(void)
{
/* Retrigger the CPU local events everywhere */
- on_each_cpu(retrigger_next_event, NULL, 0, 1);
+ on_each_cpu(retrigger_next_event, NULL, 1);
}
/*
@@ -632,8 +631,6 @@ void clock_was_set(void)
*/
void hres_timers_resume(void)
{
- WARN_ON_ONCE(num_online_cpus() > 1);
-
/* Retrigger the CPU local events: */
retrigger_next_event(NULL);
}
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 46d6611a33bb..77a51be36010 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -17,6 +17,8 @@
#ifdef CONFIG_SMP
+cpumask_t irq_default_affinity = CPU_MASK_ALL;
+
/**
* synchronize_irq - wait for pending IRQ handlers (on other CPUs)
* @irq: interrupt number to wait for
@@ -95,6 +97,27 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
return 0;
}
+#ifndef CONFIG_AUTO_IRQ_AFFINITY
+/*
+ * Generic version of the affinity autoselector.
+ */
+int irq_select_affinity(unsigned int irq)
+{
+ cpumask_t mask;
+
+ if (!irq_can_set_affinity(irq))
+ return 0;
+
+ cpus_and(mask, cpu_online_map, irq_default_affinity);
+
+ irq_desc[irq].affinity = mask;
+ irq_desc[irq].chip->set_affinity(irq, mask);
+
+ set_balance_irq_affinity(irq, mask);
+ return 0;
+}
+#endif
+
#endif
/**
@@ -354,7 +377,7 @@ int setup_irq(unsigned int irq, struct irqaction *new)
/* Setup the type (level, edge polarity) if configured: */
if (new->flags & IRQF_TRIGGER_MASK) {
- if (desc->chip && desc->chip->set_type)
+ if (desc->chip->set_type)
desc->chip->set_type(irq,
new->flags & IRQF_TRIGGER_MASK);
else
@@ -364,8 +387,7 @@ int setup_irq(unsigned int irq, struct irqaction *new)
*/
printk(KERN_WARNING "No IRQF_TRIGGER set_type "
"function for IRQ %d (%s)\n", irq,
- desc->chip ? desc->chip->name :
- "unknown");
+ desc->chip->name);
} else
compat_irq_chip_set_default_handler(desc);
@@ -382,6 +404,9 @@ int setup_irq(unsigned int irq, struct irqaction *new)
} else
/* Undo nested disables: */
desc->depth = 1;
+
+ /* Set default affinity mask once everything is setup */
+ irq_select_affinity(irq);
}
/* Reset broken irq detection when installing new handler */
desc->irq_count = 0;
@@ -571,8 +596,6 @@ int request_irq(unsigned int irq, irq_handler_t handler,
action->next = NULL;
action->dev_id = dev_id;
- select_smp_affinity(irq);
-
#ifdef CONFIG_DEBUG_SHIRQ
if (irqflags & IRQF_SHARED) {
/*
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index c2f2ccb0549a..6c6d35d68ee9 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -44,7 +44,7 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer,
unsigned long count, void *data)
{
unsigned int irq = (int)(long)data, full_count = count, err;
- cpumask_t new_value, tmp;
+ cpumask_t new_value;
if (!irq_desc[irq].chip->set_affinity || no_irq_affinity ||
irq_balancing_disabled(irq))
@@ -62,17 +62,51 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer,
* way to make the system unusable accidentally :-) At least
* one online CPU still has to be targeted.
*/
- cpus_and(tmp, new_value, cpu_online_map);
- if (cpus_empty(tmp))
+ if (!cpus_intersects(new_value, cpu_online_map))
/* Special case for empty set - allow the architecture
code to set default SMP affinity. */
- return select_smp_affinity(irq) ? -EINVAL : full_count;
+ return irq_select_affinity(irq) ? -EINVAL : full_count;
irq_set_affinity(irq, new_value);
return full_count;
}
+static int default_affinity_read(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ int len = cpumask_scnprintf(page, count, irq_default_affinity);
+ if (count - len < 2)
+ return -EINVAL;
+ len += sprintf(page + len, "\n");
+ return len;
+}
+
+static int default_affinity_write(struct file *file, const char __user *buffer,
+ unsigned long count, void *data)
+{
+ unsigned int full_count = count, err;
+ cpumask_t new_value;
+
+ err = cpumask_parse_user(buffer, count, new_value);
+ if (err)
+ return err;
+
+ if (!is_affinity_mask_valid(new_value))
+ return -EINVAL;
+
+ /*
+ * Do not allow disabling IRQs completely - it's a too easy
+ * way to make the system unusable accidentally :-) At least
+ * one online CPU still has to be targeted.
+ */
+ if (!cpus_intersects(new_value, cpu_online_map))
+ return -EINVAL;
+
+ irq_default_affinity = new_value;
+
+ return full_count;
+}
#endif
static int irq_spurious_read(char *page, char **start, off_t off,
@@ -171,6 +205,21 @@ void unregister_handler_proc(unsigned int irq, struct irqaction *action)
remove_proc_entry(action->dir->name, irq_desc[irq].dir);
}
+void register_default_affinity_proc(void)
+{
+#ifdef CONFIG_SMP
+ struct proc_dir_entry *entry;
+
+ /* create /proc/irq/default_smp_affinity */
+ entry = create_proc_entry("default_smp_affinity", 0600, root_irq_dir);
+ if (entry) {
+ entry->data = NULL;
+ entry->read_proc = default_affinity_read;
+ entry->write_proc = default_affinity_write;
+ }
+#endif
+}
+
void init_irq_proc(void)
{
int i;
@@ -180,6 +229,8 @@ void init_irq_proc(void)
if (!root_irq_dir)
return;
+ register_default_affinity_proc();
+
/*
* Create entries for all existing IRQs.
*/
diff --git a/kernel/pid.c b/kernel/pid.c
index 20d59fa2d493..30bd5d4b2ac7 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -30,6 +30,7 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/init.h>
+#include <linux/rculist.h>
#include <linux/bootmem.h>
#include <linux/hash.h>
#include <linux/pid_namespace.h>
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index f1525ad06cb3..c42a03aef36f 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -1037,6 +1037,9 @@ static void check_thread_timers(struct task_struct *tsk,
sig->rlim[RLIMIT_RTTIME].rlim_cur +=
USEC_PER_SEC;
}
+ printk(KERN_INFO
+ "RT Watchdog Timeout: %s[%d]\n",
+ tsk->comm, task_pid_nr(tsk));
__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
}
}
diff --git a/kernel/profile.c b/kernel/profile.c
index ae7ead82cbc9..58926411eb2a 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -252,7 +252,7 @@ static void profile_flip_buffers(void)
mutex_lock(&profile_flip_mutex);
j = per_cpu(cpu_profile_flip, get_cpu());
put_cpu();
- on_each_cpu(__profile_flip_buffers, NULL, 0, 1);
+ on_each_cpu(__profile_flip_buffers, NULL, 1);
for_each_online_cpu(cpu) {
struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[j];
for (i = 0; i < NR_PROFILE_HIT; ++i) {
@@ -275,7 +275,7 @@ static void profile_discard_flip_buffers(void)
mutex_lock(&profile_flip_mutex);
i = per_cpu(cpu_profile_flip, get_cpu());
put_cpu();
- on_each_cpu(__profile_flip_buffers, NULL, 0, 1);
+ on_each_cpu(__profile_flip_buffers, NULL, 1);
for_each_online_cpu(cpu) {
struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[i];
memset(hits, 0, NR_PROFILE_HIT*sizeof(struct profile_hit));
@@ -558,7 +558,7 @@ static int __init create_hash_tables(void)
out_cleanup:
prof_on = 0;
smp_mb();
- on_each_cpu(profile_nop, NULL, 0, 1);
+ on_each_cpu(profile_nop, NULL, 1);
for_each_online_cpu(cpu) {
struct page *page;
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index 65c0906080ef..16eeeaa9d618 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -387,6 +387,10 @@ static void __rcu_offline_cpu(struct rcu_data *this_rdp,
rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail);
rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
+
+ local_irq_disable();
+ this_rdp->qlen += rdp->qlen;
+ local_irq_enable();
}
static void rcu_offline_cpu(int cpu)
@@ -516,10 +520,38 @@ void rcu_check_callbacks(int cpu, int user)
if (user ||
(idle_cpu(cpu) && !in_softirq() &&
hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
+
+ /*
+ * Get here if this CPU took its interrupt from user
+ * mode or from the idle loop, and if this is not a
+ * nested interrupt. In this case, the CPU is in
+ * a quiescent state, so count it.
+ *
+ * Also do a memory barrier. This is needed to handle
+ * the case where writes from a preempt-disable section
+ * of code get reordered into schedule() by this CPU's
+ * write buffer. The memory barrier makes sure that
+ * the rcu_qsctr_inc() and rcu_bh_qsctr_inc() are see
+ * by other CPUs to happen after any such write.
+ */
+
+ smp_mb(); /* See above block comment. */
rcu_qsctr_inc(cpu);
rcu_bh_qsctr_inc(cpu);
- } else if (!in_softirq())
+
+ } else if (!in_softirq()) {
+
+ /*
+ * Get here if this CPU did not take its interrupt from
+ * softirq, in other words, if it is not interrupting
+ * a rcu_bh read-side critical section. This is an _bh
+ * critical section, so count it. The memory barrier
+ * is needed for the same reason as is the above one.
+ */
+
+ smp_mb(); /* See above block comment. */
rcu_bh_qsctr_inc(cpu);
+ }
raise_rcu_softirq();
}
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index c09605f8d16c..f14f372cf6f5 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -39,16 +39,16 @@
#include <linux/sched.h>
#include <asm/atomic.h>
#include <linux/bitops.h>
-#include <linux/completion.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/mutex.h>
#include <linux/module.h>
-struct rcu_synchronize {
- struct rcu_head head;
- struct completion completion;
+enum rcu_barrier {
+ RCU_BARRIER_STD,
+ RCU_BARRIER_BH,
+ RCU_BARRIER_SCHED,
};
static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
@@ -60,7 +60,7 @@ static struct completion rcu_barrier_completion;
* Awaken the corresponding synchronize_rcu() instance now that a
* grace period has elapsed.
*/
-static void wakeme_after_rcu(struct rcu_head *head)
+void wakeme_after_rcu(struct rcu_head *head)
{
struct rcu_synchronize *rcu;
@@ -77,17 +77,7 @@ static void wakeme_after_rcu(struct rcu_head *head)
* sections are delimited by rcu_read_lock() and rcu_read_unlock(),
* and may be nested.
*/
-void synchronize_rcu(void)
-{
- struct rcu_synchronize rcu;
-
- init_completion(&rcu.completion);
- /* Will wake me after RCU finished */
- call_rcu(&rcu.head, wakeme_after_rcu);
-
- /* Wait for it */
- wait_for_completion(&rcu.completion);
-}
+synchronize_rcu_xxx(synchronize_rcu, call_rcu)
EXPORT_SYMBOL_GPL(synchronize_rcu);
static void rcu_barrier_callback(struct rcu_head *notused)
@@ -99,19 +89,30 @@ static void rcu_barrier_callback(struct rcu_head *notused)
/*
* Called with preemption disabled, and from cross-cpu IRQ context.
*/
-static void rcu_barrier_func(void *notused)
+static void rcu_barrier_func(void *type)
{
int cpu = smp_processor_id();
struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu);
atomic_inc(&rcu_barrier_cpu_count);
- call_rcu(head, rcu_barrier_callback);
+ switch ((enum rcu_barrier)type) {
+ case RCU_BARRIER_STD:
+ call_rcu(head, rcu_barrier_callback);
+ break;
+ case RCU_BARRIER_BH:
+ call_rcu_bh(head, rcu_barrier_callback);
+ break;
+ case RCU_BARRIER_SCHED:
+ call_rcu_sched(head, rcu_barrier_callback);
+ break;
+ }
}
-/**
- * rcu_barrier - Wait until all the in-flight RCUs are complete.
+/*
+ * Orchestrate the specified type of RCU barrier, waiting for all
+ * RCU callbacks of the specified type to complete.
*/
-void rcu_barrier(void)
+static void _rcu_barrier(enum rcu_barrier type)
{
BUG_ON(in_interrupt());
/* Take cpucontrol mutex to protect against CPU hotplug */
@@ -127,13 +128,39 @@ void rcu_barrier(void)
* until all the callbacks are queued.
*/
rcu_read_lock();
- on_each_cpu(rcu_barrier_func, NULL, 0, 1);
+ on_each_cpu(rcu_barrier_func, (void *)type, 1);
rcu_read_unlock();
wait_for_completion(&rcu_barrier_completion);
mutex_unlock(&rcu_barrier_mutex);
}
+
+/**
+ * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
+ */
+void rcu_barrier(void)
+{
+ _rcu_barrier(RCU_BARRIER_STD);
+}
EXPORT_SYMBOL_GPL(rcu_barrier);
+/**
+ * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
+ */
+void rcu_barrier_bh(void)
+{
+ _rcu_barrier(RCU_BARRIER_BH);
+}
+EXPORT_SYMBOL_GPL(rcu_barrier_bh);
+
+/**
+ * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
+ */
+void rcu_barrier_sched(void)
+{
+ _rcu_barrier(RCU_BARRIER_SCHED);
+}
+EXPORT_SYMBOL_GPL(rcu_barrier_sched);
+
void __init rcu_init(void)
{
__rcu_init();
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index 9bf445664457..6f62b77d93c4 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -46,11 +46,11 @@
#include <asm/atomic.h>
#include <linux/bitops.h>
#include <linux/module.h>
+#include <linux/kthread.h>
#include <linux/completion.h>
#include <linux/moduleparam.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
-#include <linux/rcupdate.h>
#include <linux/cpu.h>
#include <linux/random.h>
#include <linux/delay.h>
@@ -82,14 +82,18 @@ struct rcu_data {
spinlock_t lock; /* Protect rcu_data fields. */
long completed; /* Number of last completed batch. */
int waitlistcount;
- struct tasklet_struct rcu_tasklet;
struct rcu_head *nextlist;
struct rcu_head **nexttail;
struct rcu_head *waitlist[GP_STAGES];
struct rcu_head **waittail[GP_STAGES];
- struct rcu_head *donelist;
+ struct rcu_head *donelist; /* from waitlist & waitschedlist */
struct rcu_head **donetail;
long rcu_flipctr[2];
+ struct rcu_head *nextschedlist;
+ struct rcu_head **nextschedtail;
+ struct rcu_head *waitschedlist;
+ struct rcu_head **waitschedtail;
+ int rcu_sched_sleeping;
#ifdef CONFIG_RCU_TRACE
struct rcupreempt_trace trace;
#endif /* #ifdef CONFIG_RCU_TRACE */
@@ -131,11 +135,24 @@ enum rcu_try_flip_states {
rcu_try_flip_waitmb_state,
};
+/*
+ * States for rcu_ctrlblk.rcu_sched_sleep.
+ */
+
+enum rcu_sched_sleep_states {
+ rcu_sched_not_sleeping, /* Not sleeping, callbacks need GP. */
+ rcu_sched_sleep_prep, /* Thinking of sleeping, rechecking. */
+ rcu_sched_sleeping, /* Sleeping, awaken if GP needed. */
+};
+
struct rcu_ctrlblk {
spinlock_t fliplock; /* Protect state-machine transitions. */
long completed; /* Number of last completed batch. */
enum rcu_try_flip_states rcu_try_flip_state; /* The current state of
the rcu state machine */
+ spinlock_t schedlock; /* Protect rcu_sched sleep state. */
+ enum rcu_sched_sleep_states sched_sleep; /* rcu_sched state. */
+ wait_queue_head_t sched_wq; /* Place for rcu_sched to sleep. */
};
static DEFINE_PER_CPU(struct rcu_data, rcu_data);
@@ -143,8 +160,12 @@ static struct rcu_ctrlblk rcu_ctrlblk = {
.fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock),
.completed = 0,
.rcu_try_flip_state = rcu_try_flip_idle_state,
+ .schedlock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.schedlock),
+ .sched_sleep = rcu_sched_not_sleeping,
+ .sched_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rcu_ctrlblk.sched_wq),
};
+static struct task_struct *rcu_sched_grace_period_task;
#ifdef CONFIG_RCU_TRACE
static char *rcu_try_flip_state_names[] =
@@ -207,6 +228,8 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_mb_flag_values, rcu_mb_flag)
*/
#define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace));
+#define RCU_SCHED_BATCH_TIME (HZ / 50)
+
/*
* Return the number of RCU batches processed thus far. Useful
* for debug and statistics.
@@ -411,32 +434,34 @@ static void __rcu_advance_callbacks(struct rcu_data *rdp)
}
}
-#ifdef CONFIG_NO_HZ
+DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_dyntick_sched, rcu_dyntick_sched) = {
+ .dynticks = 1,
+};
-DEFINE_PER_CPU(long, dynticks_progress_counter) = 1;
-static DEFINE_PER_CPU(long, rcu_dyntick_snapshot);
+#ifdef CONFIG_NO_HZ
static DEFINE_PER_CPU(int, rcu_update_flag);
/**
* rcu_irq_enter - Called from Hard irq handlers and NMI/SMI.
*
* If the CPU was idle with dynamic ticks active, this updates the
- * dynticks_progress_counter to let the RCU handling know that the
+ * rcu_dyntick_sched.dynticks to let the RCU handling know that the
* CPU is active.
*/
void rcu_irq_enter(void)
{
int cpu = smp_processor_id();
+ struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
if (per_cpu(rcu_update_flag, cpu))
per_cpu(rcu_update_flag, cpu)++;
/*
* Only update if we are coming from a stopped ticks mode
- * (dynticks_progress_counter is even).
+ * (rcu_dyntick_sched.dynticks is even).
*/
if (!in_interrupt() &&
- (per_cpu(dynticks_progress_counter, cpu) & 0x1) == 0) {
+ (rdssp->dynticks & 0x1) == 0) {
/*
* The following might seem like we could have a race
* with NMI/SMIs. But this really isn't a problem.
@@ -459,12 +484,12 @@ void rcu_irq_enter(void)
* RCU read-side critical sections on this CPU would
* have already completed.
*/
- per_cpu(dynticks_progress_counter, cpu)++;
+ rdssp->dynticks++;
/*
* The following memory barrier ensures that any
* rcu_read_lock() primitives in the irq handler
* are seen by other CPUs to follow the above
- * increment to dynticks_progress_counter. This is
+ * increment to rcu_dyntick_sched.dynticks. This is
* required in order for other CPUs to correctly
* determine when it is safe to advance the RCU
* grace-period state machine.
@@ -472,7 +497,7 @@ void rcu_irq_enter(void)
smp_mb(); /* see above block comment. */
/*
* Since we can't determine the dynamic tick mode from
- * the dynticks_progress_counter after this routine,
+ * the rcu_dyntick_sched.dynticks after this routine,
* we use a second flag to acknowledge that we came
* from an idle state with ticks stopped.
*/
@@ -480,7 +505,7 @@ void rcu_irq_enter(void)
/*
* If we take an NMI/SMI now, they will also increment
* the rcu_update_flag, and will not update the
- * dynticks_progress_counter on exit. That is for
+ * rcu_dyntick_sched.dynticks on exit. That is for
* this IRQ to do.
*/
}
@@ -490,12 +515,13 @@ void rcu_irq_enter(void)
* rcu_irq_exit - Called from exiting Hard irq context.
*
* If the CPU was idle with dynamic ticks active, update the
- * dynticks_progress_counter to put let the RCU handling be
+ * rcu_dyntick_sched.dynticks to put let the RCU handling be
* aware that the CPU is going back to idle with no ticks.
*/
void rcu_irq_exit(void)
{
int cpu = smp_processor_id();
+ struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
/*
* rcu_update_flag is set if we interrupted the CPU
@@ -503,7 +529,7 @@ void rcu_irq_exit(void)
* Once this occurs, we keep track of interrupt nesting
* because a NMI/SMI could also come in, and we still
* only want the IRQ that started the increment of the
- * dynticks_progress_counter to be the one that modifies
+ * rcu_dyntick_sched.dynticks to be the one that modifies
* it on exit.
*/
if (per_cpu(rcu_update_flag, cpu)) {
@@ -515,28 +541,29 @@ void rcu_irq_exit(void)
/*
* If an NMI/SMI happens now we are still
- * protected by the dynticks_progress_counter being odd.
+ * protected by the rcu_dyntick_sched.dynticks being odd.
*/
/*
* The following memory barrier ensures that any
* rcu_read_unlock() primitives in the irq handler
* are seen by other CPUs to preceed the following
- * increment to dynticks_progress_counter. This
+ * increment to rcu_dyntick_sched.dynticks. This
* is required in order for other CPUs to determine
* when it is safe to advance the RCU grace-period
* state machine.
*/
smp_mb(); /* see above block comment. */
- per_cpu(dynticks_progress_counter, cpu)++;
- WARN_ON(per_cpu(dynticks_progress_counter, cpu) & 0x1);
+ rdssp->dynticks++;
+ WARN_ON(rdssp->dynticks & 0x1);
}
}
static void dyntick_save_progress_counter(int cpu)
{
- per_cpu(rcu_dyntick_snapshot, cpu) =
- per_cpu(dynticks_progress_counter, cpu);
+ struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
+
+ rdssp->dynticks_snap = rdssp->dynticks;
}
static inline int
@@ -544,9 +571,10 @@ rcu_try_flip_waitack_needed(int cpu)
{
long curr;
long snap;
+ struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
- curr = per_cpu(dynticks_progress_counter, cpu);
- snap = per_cpu(rcu_dyntick_snapshot, cpu);
+ curr = rdssp->dynticks;
+ snap = rdssp->dynticks_snap;
smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
/*
@@ -567,7 +595,7 @@ rcu_try_flip_waitack_needed(int cpu)
* that this CPU already acknowledged the counter.
*/
- if ((curr - snap) > 2 || (snap & 0x1) == 0)
+ if ((curr - snap) > 2 || (curr & 0x1) == 0)
return 0;
/* We need this CPU to explicitly acknowledge the counter flip. */
@@ -580,9 +608,10 @@ rcu_try_flip_waitmb_needed(int cpu)
{
long curr;
long snap;
+ struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
- curr = per_cpu(dynticks_progress_counter, cpu);
- snap = per_cpu(rcu_dyntick_snapshot, cpu);
+ curr = rdssp->dynticks;
+ snap = rdssp->dynticks_snap;
smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
/*
@@ -609,14 +638,86 @@ rcu_try_flip_waitmb_needed(int cpu)
return 1;
}
+static void dyntick_save_progress_counter_sched(int cpu)
+{
+ struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
+
+ rdssp->sched_dynticks_snap = rdssp->dynticks;
+}
+
+static int rcu_qsctr_inc_needed_dyntick(int cpu)
+{
+ long curr;
+ long snap;
+ struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
+
+ curr = rdssp->dynticks;
+ snap = rdssp->sched_dynticks_snap;
+ smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
+
+ /*
+ * If the CPU remained in dynticks mode for the entire time
+ * and didn't take any interrupts, NMIs, SMIs, or whatever,
+ * then it cannot be in the middle of an rcu_read_lock(), so
+ * the next rcu_read_lock() it executes must use the new value
+ * of the counter. Therefore, this CPU has been in a quiescent
+ * state the entire time, and we don't need to wait for it.
+ */
+
+ if ((curr == snap) && ((curr & 0x1) == 0))
+ return 0;
+
+ /*
+ * If the CPU passed through or entered a dynticks idle phase with
+ * no active irq handlers, then, as above, this CPU has already
+ * passed through a quiescent state.
+ */
+
+ if ((curr - snap) > 2 || (snap & 0x1) == 0)
+ return 0;
+
+ /* We need this CPU to go through a quiescent state. */
+
+ return 1;
+}
+
#else /* !CONFIG_NO_HZ */
-# define dyntick_save_progress_counter(cpu) do { } while (0)
-# define rcu_try_flip_waitack_needed(cpu) (1)
-# define rcu_try_flip_waitmb_needed(cpu) (1)
+# define dyntick_save_progress_counter(cpu) do { } while (0)
+# define rcu_try_flip_waitack_needed(cpu) (1)
+# define rcu_try_flip_waitmb_needed(cpu) (1)
+
+# define dyntick_save_progress_counter_sched(cpu) do { } while (0)
+# define rcu_qsctr_inc_needed_dyntick(cpu) (1)
#endif /* CONFIG_NO_HZ */
+static void save_qsctr_sched(int cpu)
+{
+ struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
+
+ rdssp->sched_qs_snap = rdssp->sched_qs;
+}
+
+static inline int rcu_qsctr_inc_needed(int cpu)
+{
+ struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
+
+ /*
+ * If there has been a quiescent state, no more need to wait
+ * on this CPU.
+ */
+
+ if (rdssp->sched_qs != rdssp->sched_qs_snap) {
+ smp_mb(); /* force ordering with cpu entering schedule(). */
+ return 0;
+ }
+
+ /* We need this CPU to go through a quiescent state. */
+
+ return 1;
+}
+
/*
* Get here when RCU is idle. Decide whether we need to
* move out of idle state, and return non-zero if so.
@@ -819,6 +920,26 @@ void rcu_check_callbacks(int cpu, int user)
unsigned long flags;
struct rcu_data *rdp = RCU_DATA_CPU(cpu);
+ /*
+ * If this CPU took its interrupt from user mode or from the
+ * idle loop, and this is not a nested interrupt, then
+ * this CPU has to have exited all prior preept-disable
+ * sections of code. So increment the counter to note this.
+ *
+ * The memory barrier is needed to handle the case where
+ * writes from a preempt-disable section of code get reordered
+ * into schedule() by this CPU's write buffer. So the memory
+ * barrier makes sure that the rcu_qsctr_inc() is seen by other
+ * CPUs to happen after any such write.
+ */
+
+ if (user ||
+ (idle_cpu(cpu) && !in_softirq() &&
+ hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
+ smp_mb(); /* Guard against aggressive schedule(). */
+ rcu_qsctr_inc(cpu);
+ }
+
rcu_check_mb(cpu);
if (rcu_ctrlblk.completed == rdp->completed)
rcu_try_flip();
@@ -869,6 +990,8 @@ void rcu_offline_cpu(int cpu)
struct rcu_head *list = NULL;
unsigned long flags;
struct rcu_data *rdp = RCU_DATA_CPU(cpu);
+ struct rcu_head *schedlist = NULL;
+ struct rcu_head **schedtail = &schedlist;
struct rcu_head **tail = &list;
/*
@@ -882,6 +1005,11 @@ void rcu_offline_cpu(int cpu)
rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i],
list, tail);
rcu_offline_cpu_enqueue(rdp->nextlist, rdp->nexttail, list, tail);
+ rcu_offline_cpu_enqueue(rdp->waitschedlist, rdp->waitschedtail,
+ schedlist, schedtail);
+ rcu_offline_cpu_enqueue(rdp->nextschedlist, rdp->nextschedtail,
+ schedlist, schedtail);
+ rdp->rcu_sched_sleeping = 0;
spin_unlock_irqrestore(&rdp->lock, flags);
rdp->waitlistcount = 0;
@@ -916,12 +1044,15 @@ void rcu_offline_cpu(int cpu)
* fix.
*/
- local_irq_save(flags);
+ local_irq_save(flags); /* disable preempt till we know what lock. */
rdp = RCU_DATA_ME();
spin_lock(&rdp->lock);
*rdp->nexttail = list;
if (list)
rdp->nexttail = tail;
+ *rdp->nextschedtail = schedlist;
+ if (schedlist)
+ rdp->nextschedtail = schedtail;
spin_unlock_irqrestore(&rdp->lock, flags);
}
@@ -936,10 +1067,25 @@ void rcu_offline_cpu(int cpu)
void __cpuinit rcu_online_cpu(int cpu)
{
unsigned long flags;
+ struct rcu_data *rdp;
spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
cpu_set(cpu, rcu_cpu_online_map);
spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
+
+ /*
+ * The rcu_sched grace-period processing might have bypassed
+ * this CPU, given that it was not in the rcu_cpu_online_map
+ * when the grace-period scan started. This means that the
+ * grace-period task might sleep. So make sure that if this
+ * should happen, the first callback posted to this CPU will
+ * wake up the grace-period task if need be.
+ */
+
+ rdp = RCU_DATA_CPU(cpu);
+ spin_lock_irqsave(&rdp->lock, flags);
+ rdp->rcu_sched_sleeping = 1;
+ spin_unlock_irqrestore(&rdp->lock, flags);
}
static void rcu_process_callbacks(struct softirq_action *unused)
@@ -982,31 +1128,196 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
*rdp->nexttail = head;
rdp->nexttail = &head->next;
RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp);
- spin_unlock(&rdp->lock);
- local_irq_restore(flags);
+ spin_unlock_irqrestore(&rdp->lock, flags);
}
EXPORT_SYMBOL_GPL(call_rcu);
+void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+{
+ unsigned long flags;
+ struct rcu_data *rdp;
+ int wake_gp = 0;
+
+ head->func = func;
+ head->next = NULL;
+ local_irq_save(flags);
+ rdp = RCU_DATA_ME();
+ spin_lock(&rdp->lock);
+ *rdp->nextschedtail = head;
+ rdp->nextschedtail = &head->next;
+ if (rdp->rcu_sched_sleeping) {
+
+ /* Grace-period processing might be sleeping... */
+
+ rdp->rcu_sched_sleeping = 0;
+ wake_gp = 1;
+ }
+ spin_unlock_irqrestore(&rdp->lock, flags);
+ if (wake_gp) {
+
+ /* Wake up grace-period processing, unless someone beat us. */
+
+ spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
+ if (rcu_ctrlblk.sched_sleep != rcu_sched_sleeping)
+ wake_gp = 0;
+ rcu_ctrlblk.sched_sleep = rcu_sched_not_sleeping;
+ spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
+ if (wake_gp)
+ wake_up_interruptible(&rcu_ctrlblk.sched_wq);
+ }
+}
+EXPORT_SYMBOL_GPL(call_rcu_sched);
+
/*
* Wait until all currently running preempt_disable() code segments
* (including hardware-irq-disable segments) complete. Note that
* in -rt this does -not- necessarily result in all currently executing
* interrupt -handlers- having completed.
*/
-void __synchronize_sched(void)
+synchronize_rcu_xxx(__synchronize_sched, call_rcu_sched)
+EXPORT_SYMBOL_GPL(__synchronize_sched);
+
+/*
+ * kthread function that manages call_rcu_sched grace periods.
+ */
+static int rcu_sched_grace_period(void *arg)
{
- cpumask_t oldmask;
+ int couldsleep; /* might sleep after current pass. */
+ int couldsleepnext = 0; /* might sleep after next pass. */
int cpu;
+ unsigned long flags;
+ struct rcu_data *rdp;
+ int ret;
- if (sched_getaffinity(0, &oldmask) < 0)
- oldmask = cpu_possible_map;
- for_each_online_cpu(cpu) {
- sched_setaffinity(0, &cpumask_of_cpu(cpu));
- schedule();
- }
- sched_setaffinity(0, &oldmask);
+ /*
+ * Each pass through the following loop handles one
+ * rcu_sched grace period cycle.
+ */
+ do {
+ /* Save each CPU's current state. */
+
+ for_each_online_cpu(cpu) {
+ dyntick_save_progress_counter_sched(cpu);
+ save_qsctr_sched(cpu);
+ }
+
+ /*
+ * Sleep for about an RCU grace-period's worth to
+ * allow better batching and to consume less CPU.
+ */
+ schedule_timeout_interruptible(RCU_SCHED_BATCH_TIME);
+
+ /*
+ * If there was nothing to do last time, prepare to
+ * sleep at the end of the current grace period cycle.
+ */
+ couldsleep = couldsleepnext;
+ couldsleepnext = 1;
+ if (couldsleep) {
+ spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
+ rcu_ctrlblk.sched_sleep = rcu_sched_sleep_prep;
+ spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
+ }
+
+ /*
+ * Wait on each CPU in turn to have either visited
+ * a quiescent state or been in dynticks-idle mode.
+ */
+ for_each_online_cpu(cpu) {
+ while (rcu_qsctr_inc_needed(cpu) &&
+ rcu_qsctr_inc_needed_dyntick(cpu)) {
+ /* resched_cpu(cpu); @@@ */
+ schedule_timeout_interruptible(1);
+ }
+ }
+
+ /* Advance callbacks for each CPU. */
+
+ for_each_online_cpu(cpu) {
+
+ rdp = RCU_DATA_CPU(cpu);
+ spin_lock_irqsave(&rdp->lock, flags);
+
+ /*
+ * We are running on this CPU irq-disabled, so no
+ * CPU can go offline until we re-enable irqs.
+ * The current CPU might have already gone
+ * offline (between the for_each_offline_cpu and
+ * the spin_lock_irqsave), but in that case all its
+ * callback lists will be empty, so no harm done.
+ *
+ * Advance the callbacks! We share normal RCU's
+ * donelist, since callbacks are invoked the
+ * same way in either case.
+ */
+ if (rdp->waitschedlist != NULL) {
+ *rdp->donetail = rdp->waitschedlist;
+ rdp->donetail = rdp->waitschedtail;
+
+ /*
+ * Next rcu_check_callbacks() will
+ * do the required raise_softirq().
+ */
+ }
+ if (rdp->nextschedlist != NULL) {
+ rdp->waitschedlist = rdp->nextschedlist;
+ rdp->waitschedtail = rdp->nextschedtail;
+ couldsleep = 0;
+ couldsleepnext = 0;
+ } else {
+ rdp->waitschedlist = NULL;
+ rdp->waitschedtail = &rdp->waitschedlist;
+ }
+ rdp->nextschedlist = NULL;
+ rdp->nextschedtail = &rdp->nextschedlist;
+
+ /* Mark sleep intention. */
+
+ rdp->rcu_sched_sleeping = couldsleep;
+
+ spin_unlock_irqrestore(&rdp->lock, flags);
+ }
+
+ /* If we saw callbacks on the last scan, go deal with them. */
+
+ if (!couldsleep)
+ continue;
+
+ /* Attempt to block... */
+
+ spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
+ if (rcu_ctrlblk.sched_sleep != rcu_sched_sleep_prep) {
+
+ /*
+ * Someone posted a callback after we scanned.
+ * Go take care of it.
+ */
+ spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
+ couldsleepnext = 0;
+ continue;
+ }
+
+ /* Block until the next person posts a callback. */
+
+ rcu_ctrlblk.sched_sleep = rcu_sched_sleeping;
+ spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
+ ret = 0;
+ __wait_event_interruptible(rcu_ctrlblk.sched_wq,
+ rcu_ctrlblk.sched_sleep != rcu_sched_sleeping,
+ ret);
+
+ /*
+ * Signals would prevent us from sleeping, and we cannot
+ * do much with them in any case. So flush them.
+ */
+ if (ret)
+ flush_signals(current);
+ couldsleepnext = 0;
+
+ } while (!kthread_should_stop());
+
+ return (0);
}
-EXPORT_SYMBOL_GPL(__synchronize_sched);
/*
* Check to see if any future RCU-related work will need to be done
@@ -1023,7 +1334,9 @@ int rcu_needs_cpu(int cpu)
return (rdp->donelist != NULL ||
!!rdp->waitlistcount ||
- rdp->nextlist != NULL);
+ rdp->nextlist != NULL ||
+ rdp->nextschedlist != NULL ||
+ rdp->waitschedlist != NULL);
}
int rcu_pending(int cpu)
@@ -1034,7 +1347,9 @@ int rcu_pending(int cpu)
if (rdp->donelist != NULL ||
!!rdp->waitlistcount ||
- rdp->nextlist != NULL)
+ rdp->nextlist != NULL ||
+ rdp->nextschedlist != NULL ||
+ rdp->waitschedlist != NULL)
return 1;
/* The RCU core needs an acknowledgement from this CPU. */
@@ -1101,6 +1416,11 @@ void __init __rcu_init(void)
rdp->donetail = &rdp->donelist;
rdp->rcu_flipctr[0] = 0;
rdp->rcu_flipctr[1] = 0;
+ rdp->nextschedlist = NULL;
+ rdp->nextschedtail = &rdp->nextschedlist;
+ rdp->waitschedlist = NULL;
+ rdp->waitschedtail = &rdp->waitschedlist;
+ rdp->rcu_sched_sleeping = 0;
}
register_cpu_notifier(&rcu_nb);
@@ -1123,11 +1443,15 @@ void __init __rcu_init(void)
}
/*
- * Deprecated, use synchronize_rcu() or synchronize_sched() instead.
+ * Late-boot-time RCU initialization that must wait until after scheduler
+ * has been initialized.
*/
-void synchronize_kernel(void)
+void __init rcu_init_sched(void)
{
- synchronize_rcu();
+ rcu_sched_grace_period_task = kthread_run(rcu_sched_grace_period,
+ NULL,
+ "rcu_sched_grace_period");
+ WARN_ON(IS_ERR(rcu_sched_grace_period_task));
}
#ifdef CONFIG_RCU_TRACE
diff --git a/kernel/rcupreempt_trace.c b/kernel/rcupreempt_trace.c
index 49ac4947af24..5edf82c34bbc 100644
--- a/kernel/rcupreempt_trace.c
+++ b/kernel/rcupreempt_trace.c
@@ -38,7 +38,6 @@
#include <linux/moduleparam.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
-#include <linux/rcupdate.h>
#include <linux/cpu.h>
#include <linux/mutex.h>
#include <linux/rcupreempt_trace.h>
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 33acc424667e..90b5b123f7a1 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -57,7 +57,9 @@ static int stat_interval; /* Interval between stats, in seconds. */
/* Defaults to "only at end of test". */
static int verbose; /* Print more debug info. */
static int test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */
-static int shuffle_interval = 5; /* Interval between shuffles (in sec)*/
+static int shuffle_interval = 3; /* Interval between shuffles (in sec)*/
+static int stutter = 5; /* Start/stop testing interval (in sec) */
+static int irqreader = 1; /* RCU readers from irq (timers). */
static char *torture_type = "rcu"; /* What RCU implementation to torture. */
module_param(nreaders, int, 0444);
@@ -72,6 +74,10 @@ module_param(test_no_idle_hz, bool, 0444);
MODULE_PARM_DESC(test_no_idle_hz, "Test support for tickless idle CPUs");
module_param(shuffle_interval, int, 0444);
MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles");
+module_param(stutter, int, 0444);
+MODULE_PARM_DESC(stutter, "Number of seconds to run/halt test");
+module_param(irqreader, int, 0444);
+MODULE_PARM_DESC(irqreader, "Allow RCU readers from irq handlers");
module_param(torture_type, charp, 0444);
MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)");
@@ -91,6 +97,7 @@ static struct task_struct **fakewriter_tasks;
static struct task_struct **reader_tasks;
static struct task_struct *stats_task;
static struct task_struct *shuffler_task;
+static struct task_struct *stutter_task;
#define RCU_TORTURE_PIPE_LEN 10
@@ -117,8 +124,18 @@ static atomic_t n_rcu_torture_alloc_fail;
static atomic_t n_rcu_torture_free;
static atomic_t n_rcu_torture_mberror;
static atomic_t n_rcu_torture_error;
+static long n_rcu_torture_timers = 0;
static struct list_head rcu_torture_removed;
+static int stutter_pause_test = 0;
+
+#if defined(MODULE) || defined(CONFIG_RCU_TORTURE_TEST_RUNNABLE)
+#define RCUTORTURE_RUNNABLE_INIT 1
+#else
+#define RCUTORTURE_RUNNABLE_INIT 0
+#endif
+int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT;
+
/*
* Allocate an element from the rcu_tortures pool.
*/
@@ -179,6 +196,16 @@ rcu_random(struct rcu_random_state *rrsp)
return swahw32(rrsp->rrs_state);
}
+static void
+rcu_stutter_wait(void)
+{
+ while (stutter_pause_test || !rcutorture_runnable)
+ if (rcutorture_runnable)
+ schedule_timeout_interruptible(1);
+ else
+ schedule_timeout_interruptible(round_jiffies_relative(HZ));
+}
+
/*
* Operations vector for selecting different types of tests.
*/
@@ -192,7 +219,9 @@ struct rcu_torture_ops {
int (*completed)(void);
void (*deferredfree)(struct rcu_torture *p);
void (*sync)(void);
+ void (*cb_barrier)(void);
int (*stats)(char *page);
+ int irqcapable;
char *name;
};
static struct rcu_torture_ops *cur_ops = NULL;
@@ -265,7 +294,9 @@ static struct rcu_torture_ops rcu_ops = {
.completed = rcu_torture_completed,
.deferredfree = rcu_torture_deferred_free,
.sync = synchronize_rcu,
+ .cb_barrier = rcu_barrier,
.stats = NULL,
+ .irqcapable = 1,
.name = "rcu"
};
@@ -304,7 +335,9 @@ static struct rcu_torture_ops rcu_sync_ops = {
.completed = rcu_torture_completed,
.deferredfree = rcu_sync_torture_deferred_free,
.sync = synchronize_rcu,
+ .cb_barrier = NULL,
.stats = NULL,
+ .irqcapable = 1,
.name = "rcu_sync"
};
@@ -364,7 +397,9 @@ static struct rcu_torture_ops rcu_bh_ops = {
.completed = rcu_bh_torture_completed,
.deferredfree = rcu_bh_torture_deferred_free,
.sync = rcu_bh_torture_synchronize,
+ .cb_barrier = rcu_barrier_bh,
.stats = NULL,
+ .irqcapable = 1,
.name = "rcu_bh"
};
@@ -377,7 +412,9 @@ static struct rcu_torture_ops rcu_bh_sync_ops = {
.completed = rcu_bh_torture_completed,
.deferredfree = rcu_sync_torture_deferred_free,
.sync = rcu_bh_torture_synchronize,
+ .cb_barrier = NULL,
.stats = NULL,
+ .irqcapable = 1,
.name = "rcu_bh_sync"
};
@@ -458,6 +495,7 @@ static struct rcu_torture_ops srcu_ops = {
.completed = srcu_torture_completed,
.deferredfree = rcu_sync_torture_deferred_free,
.sync = srcu_torture_synchronize,
+ .cb_barrier = NULL,
.stats = srcu_torture_stats,
.name = "srcu"
};
@@ -482,6 +520,11 @@ static int sched_torture_completed(void)
return 0;
}
+static void rcu_sched_torture_deferred_free(struct rcu_torture *p)
+{
+ call_rcu_sched(&p->rtort_rcu, rcu_torture_cb);
+}
+
static void sched_torture_synchronize(void)
{
synchronize_sched();
@@ -494,12 +537,28 @@ static struct rcu_torture_ops sched_ops = {
.readdelay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = sched_torture_read_unlock,
.completed = sched_torture_completed,
- .deferredfree = rcu_sync_torture_deferred_free,
+ .deferredfree = rcu_sched_torture_deferred_free,
.sync = sched_torture_synchronize,
+ .cb_barrier = rcu_barrier_sched,
.stats = NULL,
+ .irqcapable = 1,
.name = "sched"
};
+static struct rcu_torture_ops sched_ops_sync = {
+ .init = rcu_sync_torture_init,
+ .cleanup = NULL,
+ .readlock = sched_torture_read_lock,
+ .readdelay = rcu_read_delay, /* just reuse rcu's version. */
+ .readunlock = sched_torture_read_unlock,
+ .completed = sched_torture_completed,
+ .deferredfree = rcu_sync_torture_deferred_free,
+ .sync = sched_torture_synchronize,
+ .cb_barrier = NULL,
+ .stats = NULL,
+ .name = "sched_sync"
+};
+
/*
* RCU torture writer kthread. Repeatedly substitutes a new structure
* for that pointed to by rcu_torture_current, freeing the old structure
@@ -537,6 +596,7 @@ rcu_torture_writer(void *arg)
}
rcu_torture_current_version++;
oldbatch = cur_ops->completed();
+ rcu_stutter_wait();
} while (!kthread_should_stop() && !fullstop);
VERBOSE_PRINTK_STRING("rcu_torture_writer task stopping");
while (!kthread_should_stop())
@@ -560,6 +620,7 @@ rcu_torture_fakewriter(void *arg)
schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10);
udelay(rcu_random(&rand) & 0x3ff);
cur_ops->sync();
+ rcu_stutter_wait();
} while (!kthread_should_stop() && !fullstop);
VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task stopping");
@@ -569,6 +630,52 @@ rcu_torture_fakewriter(void *arg)
}
/*
+ * RCU torture reader from timer handler. Dereferences rcu_torture_current,
+ * incrementing the corresponding element of the pipeline array. The
+ * counter in the element should never be greater than 1, otherwise, the
+ * RCU implementation is broken.
+ */
+static void rcu_torture_timer(unsigned long unused)
+{
+ int idx;
+ int completed;
+ static DEFINE_RCU_RANDOM(rand);
+ static DEFINE_SPINLOCK(rand_lock);
+ struct rcu_torture *p;
+ int pipe_count;
+
+ idx = cur_ops->readlock();
+ completed = cur_ops->completed();
+ p = rcu_dereference(rcu_torture_current);
+ if (p == NULL) {
+ /* Leave because rcu_torture_writer is not yet underway */
+ cur_ops->readunlock(idx);
+ return;
+ }
+ if (p->rtort_mbtest == 0)
+ atomic_inc(&n_rcu_torture_mberror);
+ spin_lock(&rand_lock);
+ cur_ops->readdelay(&rand);
+ n_rcu_torture_timers++;
+ spin_unlock(&rand_lock);
+ preempt_disable();
+ pipe_count = p->rtort_pipe_count;
+ if (pipe_count > RCU_TORTURE_PIPE_LEN) {
+ /* Should not happen, but... */
+ pipe_count = RCU_TORTURE_PIPE_LEN;
+ }
+ ++__get_cpu_var(rcu_torture_count)[pipe_count];
+ completed = cur_ops->completed() - completed;
+ if (completed > RCU_TORTURE_PIPE_LEN) {
+ /* Should not happen, but... */
+ completed = RCU_TORTURE_PIPE_LEN;
+ }
+ ++__get_cpu_var(rcu_torture_batch)[completed];
+ preempt_enable();
+ cur_ops->readunlock(idx);
+}
+
+/*
* RCU torture reader kthread. Repeatedly dereferences rcu_torture_current,
* incrementing the corresponding element of the pipeline array. The
* counter in the element should never be greater than 1, otherwise, the
@@ -582,11 +689,18 @@ rcu_torture_reader(void *arg)
DEFINE_RCU_RANDOM(rand);
struct rcu_torture *p;
int pipe_count;
+ struct timer_list t;
VERBOSE_PRINTK_STRING("rcu_torture_reader task started");
set_user_nice(current, 19);
+ if (irqreader && cur_ops->irqcapable)
+ setup_timer_on_stack(&t, rcu_torture_timer, 0);
do {
+ if (irqreader && cur_ops->irqcapable) {
+ if (!timer_pending(&t))
+ mod_timer(&t, 1);
+ }
idx = cur_ops->readlock();
completed = cur_ops->completed();
p = rcu_dereference(rcu_torture_current);
@@ -615,8 +729,11 @@ rcu_torture_reader(void *arg)
preempt_enable();
cur_ops->readunlock(idx);
schedule();
+ rcu_stutter_wait();
} while (!kthread_should_stop() && !fullstop);
VERBOSE_PRINTK_STRING("rcu_torture_reader task stopping");
+ if (irqreader && cur_ops->irqcapable)
+ del_timer_sync(&t);
while (!kthread_should_stop())
schedule_timeout_uninterruptible(1);
return 0;
@@ -647,20 +764,22 @@ rcu_torture_printk(char *page)
cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG);
cnt += sprintf(&page[cnt],
"rtc: %p ver: %ld tfle: %d rta: %d rtaf: %d rtf: %d "
- "rtmbe: %d",
+ "rtmbe: %d nt: %ld",
rcu_torture_current,
rcu_torture_current_version,
list_empty(&rcu_torture_freelist),
atomic_read(&n_rcu_torture_alloc),
atomic_read(&n_rcu_torture_alloc_fail),
atomic_read(&n_rcu_torture_free),
- atomic_read(&n_rcu_torture_mberror));
+ atomic_read(&n_rcu_torture_mberror),
+ n_rcu_torture_timers);
if (atomic_read(&n_rcu_torture_mberror) != 0)
cnt += sprintf(&page[cnt], " !!!");
cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG);
if (i > 1) {
cnt += sprintf(&page[cnt], "!!! ");
atomic_inc(&n_rcu_torture_error);
+ WARN_ON_ONCE(1);
}
cnt += sprintf(&page[cnt], "Reader Pipe: ");
for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
@@ -785,15 +904,34 @@ rcu_torture_shuffle(void *arg)
return 0;
}
+/* Cause the rcutorture test to "stutter", starting and stopping all
+ * threads periodically.
+ */
+static int
+rcu_torture_stutter(void *arg)
+{
+ VERBOSE_PRINTK_STRING("rcu_torture_stutter task started");
+ do {
+ schedule_timeout_interruptible(stutter * HZ);
+ stutter_pause_test = 1;
+ if (!kthread_should_stop())
+ schedule_timeout_interruptible(stutter * HZ);
+ stutter_pause_test = 0;
+ } while (!kthread_should_stop());
+ VERBOSE_PRINTK_STRING("rcu_torture_stutter task stopping");
+ return 0;
+}
+
static inline void
rcu_torture_print_module_parms(char *tag)
{
printk(KERN_ALERT "%s" TORTURE_FLAG
"--- %s: nreaders=%d nfakewriters=%d "
"stat_interval=%d verbose=%d test_no_idle_hz=%d "
- "shuffle_interval = %d\n",
+ "shuffle_interval=%d stutter=%d irqreader=%d\n",
torture_type, tag, nrealreaders, nfakewriters,
- stat_interval, verbose, test_no_idle_hz, shuffle_interval);
+ stat_interval, verbose, test_no_idle_hz, shuffle_interval,
+ stutter, irqreader);
}
static void
@@ -802,6 +940,11 @@ rcu_torture_cleanup(void)
int i;
fullstop = 1;
+ if (stutter_task) {
+ VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task");
+ kthread_stop(stutter_task);
+ }
+ stutter_task = NULL;
if (shuffler_task) {
VERBOSE_PRINTK_STRING("Stopping rcu_torture_shuffle task");
kthread_stop(shuffler_task);
@@ -848,7 +991,9 @@ rcu_torture_cleanup(void)
stats_task = NULL;
/* Wait for all RCU callbacks to fire. */
- rcu_barrier();
+
+ if (cur_ops->cb_barrier != NULL)
+ cur_ops->cb_barrier();
rcu_torture_stats_print(); /* -After- the stats thread is stopped! */
@@ -868,7 +1013,7 @@ rcu_torture_init(void)
int firsterr = 0;
static struct rcu_torture_ops *torture_ops[] =
{ &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops,
- &srcu_ops, &sched_ops, };
+ &srcu_ops, &sched_ops, &sched_ops_sync, };
/* Process args and tell the world that the torturer is on the job. */
for (i = 0; i < ARRAY_SIZE(torture_ops); i++) {
@@ -988,6 +1133,19 @@ rcu_torture_init(void)
goto unwind;
}
}
+ if (stutter < 0)
+ stutter = 0;
+ if (stutter) {
+ /* Create the stutter thread */
+ stutter_task = kthread_run(rcu_torture_stutter, NULL,
+ "rcu_torture_stutter");
+ if (IS_ERR(stutter_task)) {
+ firsterr = PTR_ERR(stutter_task);
+ VERBOSE_PRINTK_ERRSTRING("Failed to create stutter");
+ stutter_task = NULL;
+ goto unwind;
+ }
+ }
return 0;
unwind:
diff --git a/kernel/smp.c b/kernel/smp.c
new file mode 100644
index 000000000000..462c785ca1ee
--- /dev/null
+++ b/kernel/smp.c
@@ -0,0 +1,383 @@
+/*
+ * Generic helpers for smp ipi calls
+ *
+ * (C) Jens Axboe <jens.axboe@oracle.com> 2008
+ *
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/rcupdate.h>
+#include <linux/rculist.h>
+#include <linux/smp.h>
+
+static DEFINE_PER_CPU(struct call_single_queue, call_single_queue);
+static LIST_HEAD(call_function_queue);
+__cacheline_aligned_in_smp DEFINE_SPINLOCK(call_function_lock);
+
+enum {
+ CSD_FLAG_WAIT = 0x01,
+ CSD_FLAG_ALLOC = 0x02,
+};
+
+struct call_function_data {
+ struct call_single_data csd;
+ spinlock_t lock;
+ unsigned int refs;
+ cpumask_t cpumask;
+ struct rcu_head rcu_head;
+};
+
+struct call_single_queue {
+ struct list_head list;
+ spinlock_t lock;
+};
+
+void __cpuinit init_call_single_data(void)
+{
+ int i;
+
+ for_each_possible_cpu(i) {
+ struct call_single_queue *q = &per_cpu(call_single_queue, i);
+
+ spin_lock_init(&q->lock);
+ INIT_LIST_HEAD(&q->list);
+ }
+}
+
+static void csd_flag_wait(struct call_single_data *data)
+{
+ /* Wait for response */
+ do {
+ /*
+ * We need to see the flags store in the IPI handler
+ */
+ smp_mb();
+ if (!(data->flags & CSD_FLAG_WAIT))
+ break;
+ cpu_relax();
+ } while (1);
+}
+
+/*
+ * Insert a previously allocated call_single_data element for execution
+ * on the given CPU. data must already have ->func, ->info, and ->flags set.
+ */
+static void generic_exec_single(int cpu, struct call_single_data *data)
+{
+ struct call_single_queue *dst = &per_cpu(call_single_queue, cpu);
+ int wait = data->flags & CSD_FLAG_WAIT, ipi;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dst->lock, flags);
+ ipi = list_empty(&dst->list);
+ list_add_tail(&data->list, &dst->list);
+ spin_unlock_irqrestore(&dst->lock, flags);
+
+ if (ipi)
+ arch_send_call_function_single_ipi(cpu);
+
+ if (wait)
+ csd_flag_wait(data);
+}
+
+static void rcu_free_call_data(struct rcu_head *head)
+{
+ struct call_function_data *data;
+
+ data = container_of(head, struct call_function_data, rcu_head);
+
+ kfree(data);
+}
+
+/*
+ * Invoked by arch to handle an IPI for call function. Must be called with
+ * interrupts disabled.
+ */
+void generic_smp_call_function_interrupt(void)
+{
+ struct call_function_data *data;
+ int cpu = get_cpu();
+
+ /*
+ * It's ok to use list_for_each_rcu() here even though we may delete
+ * 'pos', since list_del_rcu() doesn't clear ->next
+ */
+ rcu_read_lock();
+ list_for_each_entry_rcu(data, &call_function_queue, csd.list) {
+ int refs;
+
+ if (!cpu_isset(cpu, data->cpumask))
+ continue;
+
+ data->csd.func(data->csd.info);
+
+ spin_lock(&data->lock);
+ cpu_clear(cpu, data->cpumask);
+ WARN_ON(data->refs == 0);
+ data->refs--;
+ refs = data->refs;
+ spin_unlock(&data->lock);
+
+ if (refs)
+ continue;
+
+ spin_lock(&call_function_lock);
+ list_del_rcu(&data->csd.list);
+ spin_unlock(&call_function_lock);
+
+ if (data->csd.flags & CSD_FLAG_WAIT) {
+ /*
+ * serialize stores to data with the flag clear
+ * and wakeup
+ */
+ smp_wmb();
+ data->csd.flags &= ~CSD_FLAG_WAIT;
+ } else
+ call_rcu(&data->rcu_head, rcu_free_call_data);
+ }
+ rcu_read_unlock();
+
+ put_cpu();
+}
+
+/*
+ * Invoked by arch to handle an IPI for call function single. Must be called
+ * from the arch with interrupts disabled.
+ */
+void generic_smp_call_function_single_interrupt(void)
+{
+ struct call_single_queue *q = &__get_cpu_var(call_single_queue);
+ LIST_HEAD(list);
+
+ /*
+ * Need to see other stores to list head for checking whether
+ * list is empty without holding q->lock
+ */
+ smp_mb();
+ while (!list_empty(&q->list)) {
+ unsigned int data_flags;
+
+ spin_lock(&q->lock);
+ list_replace_init(&q->list, &list);
+ spin_unlock(&q->lock);
+
+ while (!list_empty(&list)) {
+ struct call_single_data *data;
+
+ data = list_entry(list.next, struct call_single_data,
+ list);
+ list_del(&data->list);
+
+ /*
+ * 'data' can be invalid after this call if
+ * flags == 0 (when called through
+ * generic_exec_single(), so save them away before
+ * making the call.
+ */
+ data_flags = data->flags;
+
+ data->func(data->info);
+
+ if (data_flags & CSD_FLAG_WAIT) {
+ smp_wmb();
+ data->flags &= ~CSD_FLAG_WAIT;
+ } else if (data_flags & CSD_FLAG_ALLOC)
+ kfree(data);
+ }
+ /*
+ * See comment on outer loop
+ */
+ smp_mb();
+ }
+}
+
+/*
+ * smp_call_function_single - Run a function on a specific CPU
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @wait: If true, wait until function has completed on other CPUs.
+ *
+ * Returns 0 on success, else a negative status code. Note that @wait
+ * will be implicitly turned on in case of allocation failures, since
+ * we fall back to on-stack allocation.
+ */
+int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
+ int wait)
+{
+ struct call_single_data d;
+ unsigned long flags;
+ /* prevent preemption and reschedule on another processor */
+ int me = get_cpu();
+
+ /* Can deadlock when called with interrupts disabled */
+ WARN_ON(irqs_disabled());
+
+ if (cpu == me) {
+ local_irq_save(flags);
+ func(info);
+ local_irq_restore(flags);
+ } else {
+ struct call_single_data *data = NULL;
+
+ if (!wait) {
+ data = kmalloc(sizeof(*data), GFP_ATOMIC);
+ if (data)
+ data->flags = CSD_FLAG_ALLOC;
+ }
+ if (!data) {
+ data = &d;
+ data->flags = CSD_FLAG_WAIT;
+ }
+
+ data->func = func;
+ data->info = info;
+ generic_exec_single(cpu, data);
+ }
+
+ put_cpu();
+ return 0;
+}
+EXPORT_SYMBOL(smp_call_function_single);
+
+/**
+ * __smp_call_function_single(): Run a function on another CPU
+ * @cpu: The CPU to run on.
+ * @data: Pre-allocated and setup data structure
+ *
+ * Like smp_call_function_single(), but allow caller to pass in a pre-allocated
+ * data structure. Useful for embedding @data inside other structures, for
+ * instance.
+ *
+ */
+void __smp_call_function_single(int cpu, struct call_single_data *data)
+{
+ /* Can deadlock when called with interrupts disabled */
+ WARN_ON((data->flags & CSD_FLAG_WAIT) && irqs_disabled());
+
+ generic_exec_single(cpu, data);
+}
+
+/**
+ * smp_call_function_mask(): Run a function on a set of other CPUs.
+ * @mask: The set of cpus to run on.
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @wait: If true, wait (atomically) until function has completed on other CPUs.
+ *
+ * Returns 0 on success, else a negative status code.
+ *
+ * If @wait is true, then returns once @func has returned. Note that @wait
+ * will be implicitly turned on in case of allocation failures, since
+ * we fall back to on-stack allocation.
+ *
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler or from a bottom half handler. Preemption
+ * must be disabled when calling this function.
+ */
+int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
+ int wait)
+{
+ struct call_function_data d;
+ struct call_function_data *data = NULL;
+ cpumask_t allbutself;
+ unsigned long flags;
+ int cpu, num_cpus;
+
+ /* Can deadlock when called with interrupts disabled */
+ WARN_ON(irqs_disabled());
+
+ cpu = smp_processor_id();
+ allbutself = cpu_online_map;
+ cpu_clear(cpu, allbutself);
+ cpus_and(mask, mask, allbutself);
+ num_cpus = cpus_weight(mask);
+
+ /*
+ * If zero CPUs, return. If just a single CPU, turn this request
+ * into a targetted single call instead since it's faster.
+ */
+ if (!num_cpus)
+ return 0;
+ else if (num_cpus == 1) {
+ cpu = first_cpu(mask);
+ return smp_call_function_single(cpu, func, info, wait);
+ }
+
+ if (!wait) {
+ data = kmalloc(sizeof(*data), GFP_ATOMIC);
+ if (data)
+ data->csd.flags = CSD_FLAG_ALLOC;
+ }
+ if (!data) {
+ data = &d;
+ data->csd.flags = CSD_FLAG_WAIT;
+ wait = 1;
+ }
+
+ spin_lock_init(&data->lock);
+ data->csd.func = func;
+ data->csd.info = info;
+ data->refs = num_cpus;
+ data->cpumask = mask;
+
+ spin_lock_irqsave(&call_function_lock, flags);
+ list_add_tail_rcu(&data->csd.list, &call_function_queue);
+ spin_unlock_irqrestore(&call_function_lock, flags);
+
+ /* Send a message to all CPUs in the map */
+ arch_send_call_function_ipi(mask);
+
+ /* optionally wait for the CPUs to complete */
+ if (wait)
+ csd_flag_wait(&data->csd);
+
+ return 0;
+}
+EXPORT_SYMBOL(smp_call_function_mask);
+
+/**
+ * smp_call_function(): Run a function on all other CPUs.
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @wait: If true, wait (atomically) until function has completed on other CPUs.
+ *
+ * Returns 0 on success, else a negative status code.
+ *
+ * If @wait is true, then returns once @func has returned; otherwise
+ * it returns just before the target cpu calls @func. In case of allocation
+ * failure, @wait will be implicitly turned on.
+ *
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler or from a bottom half handler.
+ */
+int smp_call_function(void (*func)(void *), void *info, int wait)
+{
+ int ret;
+
+ preempt_disable();
+ ret = smp_call_function_mask(cpu_online_map, func, info, wait);
+ preempt_enable();
+ return ret;
+}
+EXPORT_SYMBOL(smp_call_function);
+
+void ipi_call_lock(void)
+{
+ spin_lock(&call_function_lock);
+}
+
+void ipi_call_unlock(void)
+{
+ spin_unlock(&call_function_lock);
+}
+
+void ipi_call_lock_irq(void)
+{
+ spin_lock_irq(&call_function_lock);
+}
+
+void ipi_call_unlock_irq(void)
+{
+ spin_unlock_irq(&call_function_lock);
+}
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 3e9e896fdc5b..81e2fe0f983a 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -645,12 +645,12 @@ __init int spawn_ksoftirqd(void)
/*
* Call a function on all processors
*/
-int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait)
+int on_each_cpu(void (*func) (void *info), void *info, int wait)
{
int ret = 0;
preempt_disable();
- ret = smp_call_function(func, info, retry, wait);
+ ret = smp_call_function(func, info, wait);
local_irq_disable();
func(info);
local_irq_enable();
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
index b71816e47a30..94b527ef1d1e 100644
--- a/kernel/stacktrace.c
+++ b/kernel/stacktrace.c
@@ -6,19 +6,21 @@
* Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
*/
#include <linux/sched.h>
+#include <linux/module.h>
#include <linux/kallsyms.h>
#include <linux/stacktrace.h>
void print_stack_trace(struct stack_trace *trace, int spaces)
{
- int i, j;
+ int i;
- for (i = 0; i < trace->nr_entries; i++) {
- unsigned long ip = trace->entries[i];
+ if (WARN_ON(!trace->entries))
+ return;
- for (j = 0; j < spaces + 1; j++)
- printk(" ");
- print_ip_sym(ip);
+ for (i = 0; i < trace->nr_entries; i++) {
+ printk("%*c", 1 + spaces, ' ');
+ print_ip_sym(trace->entries[i]);
}
}
+EXPORT_SYMBOL_GPL(print_stack_trace);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 0d562d6531eb..6b16e16428d8 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -83,6 +83,9 @@ extern int maps_protect;
extern int sysctl_stat_interval;
extern int latencytop_enabled;
extern int sysctl_nr_open_min, sysctl_nr_open_max;
+#ifdef CONFIG_RCU_TORTURE_TEST
+extern int rcutorture_runnable;
+#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
/* Constants used for minimum and maximum */
#if defined(CONFIG_DETECT_SOFTLOCKUP) || defined(CONFIG_HIGHMEM)
@@ -820,6 +823,16 @@ static struct ctl_table kern_table[] = {
.child = key_sysctls,
},
#endif
+#ifdef CONFIG_RCU_TORTURE_TEST
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "rcutorture_runnable",
+ .data = &rcutorture_runnable,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+#endif
/*
* NOTE: do not add new entries to this table unless you have read
* Documentation/sysctl/ctl_unnumbered.txt
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 67f80c261709..f48d0f09d32f 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -268,7 +268,7 @@ void tick_broadcast_on_off(unsigned long reason, int *oncpu)
"offline CPU #%d\n", *oncpu);
else
smp_call_function_single(*oncpu, tick_do_broadcast_on_off,
- &reason, 1, 1);
+ &reason, 1);
}
/*
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index d63008b09a4c..beef7ccdf842 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -48,6 +48,13 @@ static void tick_do_update_jiffies64(ktime_t now)
unsigned long ticks = 0;
ktime_t delta;
+ /*
+ * Do a quick check without holding xtime_lock:
+ */
+ delta = ktime_sub(now, last_jiffies_update);
+ if (delta.tv64 < tick_period.tv64)
+ return;
+
/* Reevalute with xtime_lock held */
write_seqlock(&xtime_lock);
@@ -228,6 +235,7 @@ void tick_nohz_stop_sched_tick(void)
local_softirq_pending());
ratelimit++;
}
+ goto end;
}
ts->idle_calls++;
OpenPOWER on IntegriCloud