From 5fd7a09cfb8c6852f596c1f8c891c6158395250e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 11 Aug 2015 18:03:23 +0200 Subject: atomic: Export fetch_or() Export fetch_or() that's implemented and used internally by the scheduler. We are going to use it for NO_HZ so make it generally available. Reviewed-by: Chris Metcalf Cc: Christoph Lameter Cc: Chris Metcalf Cc: Ingo Molnar Cc: Luiz Capitulino Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Viresh Kumar Signed-off-by: Frederic Weisbecker --- kernel/sched/core.c | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'kernel/sched') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9503d590e5ef..7142feb4b92d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -453,20 +453,6 @@ static inline void init_hrtick(void) } #endif /* CONFIG_SCHED_HRTICK */ -/* - * cmpxchg based fetch_or, macro so it works for different integer types - */ -#define fetch_or(ptr, val) \ -({ typeof(*(ptr)) __old, __val = *(ptr); \ - for (;;) { \ - __old = cmpxchg((ptr), __val, __val | (val)); \ - if (__old == __val) \ - break; \ - __val = __old; \ - } \ - __old; \ -}) - #if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG) /* * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG, -- cgit v1.2.1 From 01d36d0ac390895e719d0dd8ab91ebbbf506d28e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 4 Nov 2015 18:17:10 +0100 Subject: sched: Account rr tasks In order to evaluate the scheduler tick dependency without probing context switches, we need to know how much SCHED_RR and SCHED_FIFO tasks are enqueued as those policies don't have the same preemption requirements. To prepare for that, let's account SCHED_RR tasks, we'll be able to deduce SCHED_FIFO tasks as well from it and the total RT tasks in the runqueue. Reviewed-by: Chris Metcalf Cc: Christoph Lameter Cc: Chris Metcalf Cc: Ingo Molnar Cc: Luiz Capitulino Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Viresh Kumar Signed-off-by: Frederic Weisbecker --- kernel/sched/rt.c | 16 ++++++++++++++++ kernel/sched/sched.h | 1 + 2 files changed, 17 insertions(+) (limited to 'kernel/sched') diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 8ec86abe0ea1..3f1fcffbb18f 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1141,6 +1141,20 @@ unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se) return 1; } +static inline +unsigned int rt_se_rr_nr_running(struct sched_rt_entity *rt_se) +{ + struct rt_rq *group_rq = group_rt_rq(rt_se); + struct task_struct *tsk; + + if (group_rq) + return group_rq->rr_nr_running; + + tsk = rt_task_of(rt_se); + + return (tsk->policy == SCHED_RR) ? 1 : 0; +} + static inline void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) { @@ -1148,6 +1162,7 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) WARN_ON(!rt_prio(prio)); rt_rq->rt_nr_running += rt_se_nr_running(rt_se); + rt_rq->rr_nr_running += rt_se_rr_nr_running(rt_se); inc_rt_prio(rt_rq, prio); inc_rt_migration(rt_se, rt_rq); @@ -1160,6 +1175,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) WARN_ON(!rt_prio(rt_se_prio(rt_se))); WARN_ON(!rt_rq->rt_nr_running); rt_rq->rt_nr_running -= rt_se_nr_running(rt_se); + rt_rq->rr_nr_running -= rt_se_rr_nr_running(rt_se); dec_rt_prio(rt_rq, rt_se_prio(rt_se)); dec_rt_migration(rt_se, rt_rq); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 10f16374df7f..f0abfce14044 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -450,6 +450,7 @@ static inline int rt_bandwidth_enabled(void) struct rt_rq { struct rt_prio_array active; unsigned int rt_nr_running; + unsigned int rr_nr_running; #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED struct { int curr; /* highest queued rt task prio */ -- cgit v1.2.1 From 76d92ac305f23cada3a9b3c48a7ccea5f71019cb Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 17 Jul 2015 22:25:49 +0200 Subject: sched: Migrate sched to use new tick dependency mask model Instead of providing asynchronous checks for the nohz subsystem to verify sched tick dependency, migrate sched to the new mask. Everytime a task is enqueued or dequeued, we evaluate the state of the tick dependency on top of the policy of the tasks in the runqueue, by order of priority: SCHED_DEADLINE: Need the tick in order to periodically check for runtime SCHED_FIFO : Don't need the tick (no round-robin) SCHED_RR : Need the tick if more than 1 task of the same priority for round robin (simplified with checking if more than one SCHED_RR task no matter what priority). SCHED_NORMAL : Need the tick if more than 1 task for round-robin. We could optimize that further with one flag per sched policy on the tick dependency mask and perform only the checks relevant to the policy concerned by an enqueue/dequeue operation. Since the checks aren't based on the current task anymore, we could get rid of the task switch hook but it's still needed for posix cpu timers. Reviewed-by: Chris Metcalf Cc: Christoph Lameter Cc: Chris Metcalf Cc: Ingo Molnar Cc: Luiz Capitulino Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Viresh Kumar Signed-off-by: Frederic Weisbecker --- kernel/sched/core.c | 35 ++++++++++++++++++++--------------- kernel/sched/sched.h | 47 +++++++++++++++++++++++++++++++++-------------- 2 files changed, 53 insertions(+), 29 deletions(-) (limited to 'kernel/sched') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 7142feb4b92d..1fad82364ffe 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -701,31 +701,36 @@ static inline bool got_nohz_idle_kick(void) #endif /* CONFIG_NO_HZ_COMMON */ #ifdef CONFIG_NO_HZ_FULL -bool sched_can_stop_tick(void) +bool sched_can_stop_tick(struct rq *rq) { + int fifo_nr_running; + + /* Deadline tasks, even if single, need the tick */ + if (rq->dl.dl_nr_running) + return false; + /* - * FIFO realtime policy runs the highest priority task. Other runnable - * tasks are of a lower priority. The scheduler tick does nothing. + * FIFO realtime policy runs the highest priority task (after DEADLINE). + * Other runnable tasks are of a lower priority. The scheduler tick + * isn't needed. */ - if (current->policy == SCHED_FIFO) + fifo_nr_running = rq->rt.rt_nr_running - rq->rt.rr_nr_running; + if (fifo_nr_running) return true; /* * Round-robin realtime tasks time slice with other tasks at the same - * realtime priority. Is this task the only one at this priority? + * realtime priority. */ - if (current->policy == SCHED_RR) { - struct sched_rt_entity *rt_se = ¤t->rt; - - return list_is_singular(&rt_se->run_list); + if (rq->rt.rr_nr_running) { + if (rq->rt.rr_nr_running == 1) + return true; + else + return false; } - /* - * More than one running task need preemption. - * nr_running update is assumed to be visible - * after IPI is sent from wakers. - */ - if (this_rq()->nr_running > 1) + /* Normal multitasking need periodic preemption checks */ + if (rq->cfs.nr_running > 1) return false; return true; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index f0abfce14044..4f0bca770108 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1279,6 +1279,35 @@ unsigned long to_ratio(u64 period, u64 runtime); extern void init_entity_runnable_average(struct sched_entity *se); +#ifdef CONFIG_NO_HZ_FULL +extern bool sched_can_stop_tick(struct rq *rq); + +/* + * Tick may be needed by tasks in the runqueue depending on their policy and + * requirements. If tick is needed, lets send the target an IPI to kick it out of + * nohz mode if necessary. + */ +static inline void sched_update_tick_dependency(struct rq *rq) +{ + int cpu; + + if (!tick_nohz_full_enabled()) + return; + + cpu = cpu_of(rq); + + if (!tick_nohz_full_cpu(cpu)) + return; + + if (sched_can_stop_tick(rq)) + tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED); + else + tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED); +} +#else +static inline void sched_update_tick_dependency(struct rq *rq) { } +#endif + static inline void add_nr_running(struct rq *rq, unsigned count) { unsigned prev_nr = rq->nr_running; @@ -1290,26 +1319,16 @@ static inline void add_nr_running(struct rq *rq, unsigned count) if (!rq->rd->overload) rq->rd->overload = true; #endif - -#ifdef CONFIG_NO_HZ_FULL - if (tick_nohz_full_cpu(rq->cpu)) { - /* - * Tick is needed if more than one task runs on a CPU. - * Send the target an IPI to kick it out of nohz mode. - * - * We assume that IPI implies full memory barrier and the - * new value of rq->nr_running is visible on reception - * from the target. - */ - tick_nohz_full_kick_cpu(rq->cpu); - } -#endif } + + sched_update_tick_dependency(rq); } static inline void sub_nr_running(struct rq *rq, unsigned count) { rq->nr_running -= count; + /* Check if we still need preemption */ + sched_update_tick_dependency(rq); } static inline void rq_last_tick_reset(struct rq *rq) -- cgit v1.2.1 From 4f49b90abb4aca6fe677c95fc352fd0674d489bd Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 22 Jul 2015 17:03:52 +0200 Subject: sched-clock: Migrate to use new tick dependency mask model Instead of checking sched_clock_stable from the nohz subsystem to verify its tick dependency, migrate it to the new mask in order to include it to the all-in-one check. Reviewed-by: Chris Metcalf Cc: Christoph Lameter Cc: Chris Metcalf Cc: Ingo Molnar Cc: Luiz Capitulino Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Viresh Kumar Signed-off-by: Frederic Weisbecker --- kernel/sched/clock.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel/sched') diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index bc54e84675da..fedb967a9841 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -61,6 +61,7 @@ #include #include #include +#include /* * Scheduler clock - returns current time in nanosec units. @@ -89,6 +90,8 @@ static void __set_sched_clock_stable(void) { if (!sched_clock_stable()) static_key_slow_inc(&__sched_clock_stable); + + tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE); } void set_sched_clock_stable(void) @@ -108,6 +111,8 @@ static void __clear_sched_clock_stable(struct work_struct *work) /* XXX worry about clock continuity */ if (sched_clock_stable()) static_key_slow_dec(&__sched_clock_stable); + + tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE); } static DECLARE_WORK(sched_clock_work, __clear_sched_clock_stable); -- cgit v1.2.1