From 82c65d60d64401aedc1006d6572469bbfdf148de Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 4 Jun 2014 10:31:16 -0700 Subject: sched/idle: Clear polling before descheduling the idle thread Currently, the only real guarantee provided by the polling bit is that, if you hold rq->lock and the polling bit is set, then you can set need_resched to force a reschedule. The only reason the lock is needed is that the idle thread might not be running at all when setting its need_resched bit, and rq->lock keeps it pinned. This is easy to fix: just clear the polling bit before scheduling. Now the idle thread's polling bit is only ever set when rq->curr == rq->idle. Signed-off-by: Andy Lutomirski Signed-off-by: Peter Zijlstra Cc: nicolas.pitre@linaro.org Cc: daniel.lezcano@linaro.org Cc: umgwanakikbuti@gmail.com Cc: Rafael J. Wysocki Cc: Linus Torvalds Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/b2059fcb4c613d520cb503b6fad6e47033c7c203.1401902905.git.luto@amacapital.net Signed-off-by: Ingo Molnar --- kernel/sched/idle.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'kernel/sched/idle.c') diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 25b9423abce9..fe4b24bf33ca 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -67,6 +67,10 @@ void __weak arch_cpu_idle(void) * cpuidle_idle_call - the main idle function * * NOTE: no locks or semaphores should be used here + * + * On archs that support TIF_POLLING_NRFLAG, is called with polling + * set, and it returns with polling set. If it ever stops polling, it + * must clear the polling bit. */ static void cpuidle_idle_call(void) { @@ -175,10 +179,22 @@ exit_idle: /* * Generic idle loop implementation + * + * Called with polling cleared. */ static void cpu_idle_loop(void) { while (1) { + /* + * If the arch has a polling bit, we maintain an invariant: + * + * Our polling bit is clear if we're not scheduled (i.e. if + * rq->curr != rq->idle). This means that, if rq->idle has + * the polling bit set, then setting need_resched is + * guaranteed to cause the cpu to reschedule. + */ + + __current_set_polling(); tick_nohz_idle_enter(); while (!need_resched()) { @@ -218,6 +234,15 @@ static void cpu_idle_loop(void) */ preempt_set_need_resched(); tick_nohz_idle_exit(); + __current_clr_polling(); + + /* + * We promise to reschedule if need_resched is set while + * polling is set. That means that clearing polling + * needs to be visible before rescheduling. + */ + smp_mb__after_atomic(); + schedule_preempt_disabled(); } } @@ -239,7 +264,6 @@ void cpu_startup_entry(enum cpuhp_state state) */ boot_init_stack_canary(); #endif - __current_set_polling(); arch_cpu_idle_prepare(); cpu_idle_loop(); } -- cgit v1.2.1 From e3baac47f0e82c4be632f4f97215bb93bf16b342 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 4 Jun 2014 10:31:18 -0700 Subject: sched/idle: Optimize try-to-wake-up IPI [ This series reduces the number of IPIs on Andy's workload by something like 99%. It's down from many hundreds per second to very few. The basic idea behind this series is to make TIF_POLLING_NRFLAG be a reliable indication that the idle task is polling. Once that's done, the rest is reasonably straightforward. ] When enqueueing tasks on remote LLC domains, we send an IPI to do the work 'locally' and avoid bouncing all the cachelines over. However, when the remote CPU is idle (and polling, say x86 mwait), we don't need to send an IPI, we can simply kick the TIF word to wake it up and have the 'idle' loop do the work. So when _TIF_POLLING_NRFLAG is set, but _TIF_NEED_RESCHED is not (yet) set, set _TIF_NEED_RESCHED and avoid sending the IPI. Much-requested-by: Andy Lutomirski Signed-off-by: Peter Zijlstra [Edited by Andy Lutomirski, but this is mostly Peter Zijlstra's code.] Signed-off-by: Andy Lutomirski Cc: nicolas.pitre@linaro.org Cc: daniel.lezcano@linaro.org Cc: Mike Galbraith Cc: umgwanakikbuti@gmail.com Cc: Rafael J. Wysocki Cc: Linus Torvalds Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/ce06f8b02e7e337be63e97597fc4b248d3aa6f9b.1401902905.git.luto@amacapital.net Signed-off-by: Ingo Molnar --- kernel/sched/idle.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'kernel/sched/idle.c') diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index fe4b24bf33ca..cf009fb0bc25 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -12,6 +12,8 @@ #include +#include "sched.h" + static int __read_mostly cpu_idle_force_poll; void cpu_idle_poll_ctrl(bool enable) @@ -237,12 +239,14 @@ static void cpu_idle_loop(void) __current_clr_polling(); /* - * We promise to reschedule if need_resched is set while - * polling is set. That means that clearing polling - * needs to be visible before rescheduling. + * We promise to call sched_ttwu_pending and reschedule + * if need_resched is set while polling is set. That + * means that clearing polling needs to be visible + * before doing these things. */ smp_mb__after_atomic(); + sched_ttwu_pending(); schedule_preempt_disabled(); } } -- cgit v1.2.1