diff options
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/autogroup.h | 1 | ||||
-rw-r--r-- | kernel/sched/clock.c | 2 | ||||
-rw-r--r-- | kernel/sched/completion.c | 3 | ||||
-rw-r--r-- | kernel/sched/core.c | 86 | ||||
-rw-r--r-- | kernel/sched/cpudeadline.c | 4 | ||||
-rw-r--r-- | kernel/sched/cpufreq_schedutil.c | 40 | ||||
-rw-r--r-- | kernel/sched/cpupri.c | 4 | ||||
-rw-r--r-- | kernel/sched/cputime.c | 6 | ||||
-rw-r--r-- | kernel/sched/deadline.c | 98 | ||||
-rw-r--r-- | kernel/sched/debug.c | 3 | ||||
-rw-r--r-- | kernel/sched/fair.c | 31 | ||||
-rw-r--r-- | kernel/sched/features.h | 5 | ||||
-rw-r--r-- | kernel/sched/idle.c | 1 | ||||
-rw-r--r-- | kernel/sched/loadavg.c | 21 | ||||
-rw-r--r-- | kernel/sched/rt.c | 29 | ||||
-rw-r--r-- | kernel/sched/sched.h | 24 | ||||
-rw-r--r-- | kernel/sched/stats.h | 111 | ||||
-rw-r--r-- | kernel/sched/swait.c | 2 | ||||
-rw-r--r-- | kernel/sched/wait.c | 42 |
19 files changed, 274 insertions, 239 deletions
diff --git a/kernel/sched/autogroup.h b/kernel/sched/autogroup.h index 890c95f2587a..ce40c810cd5c 100644 --- a/kernel/sched/autogroup.h +++ b/kernel/sched/autogroup.h @@ -2,6 +2,7 @@ #include <linux/kref.h> #include <linux/rwsem.h> +#include <linux/sched/autogroup.h> struct autogroup { /* diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index ad64efe41722..a08795e21628 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -58,6 +58,8 @@ #include <linux/percpu.h> #include <linux/ktime.h> #include <linux/sched.h> +#include <linux/nmi.h> +#include <linux/sched/clock.h> #include <linux/static_key.h> #include <linux/workqueue.h> #include <linux/compiler.h> diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c index f063a25d4449..53f9558fa925 100644 --- a/kernel/sched/completion.c +++ b/kernel/sched/completion.c @@ -11,7 +11,8 @@ * Waiting for completion is a typically sync point, but not an exclusion point. */ -#include <linux/sched.h> +#include <linux/sched/signal.h> +#include <linux/sched/debug.h> #include <linux/completion.h> /** diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 34e2291a9a6c..3b31fc05a0f1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6,10 +6,15 @@ * Copyright (C) 1991-2002 Linus Torvalds */ #include <linux/sched.h> +#include <linux/sched/clock.h> +#include <uapi/linux/sched/types.h> +#include <linux/sched/loadavg.h> +#include <linux/sched/hotplug.h> #include <linux/cpuset.h> #include <linux/delayacct.h> #include <linux/init_task.h> #include <linux/context_tracking.h> +#include <linux/rcupdate_wait.h> #include <linux/blkdev.h> #include <linux/kprobes.h> @@ -23,6 +28,9 @@ #include <asm/switch_to.h> #include <asm/tlb.h> +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#endif #include "sched.h" #include "../workqueue_internal.h" @@ -978,7 +986,7 @@ static struct rq *__migrate_task(struct rq *rq, struct task_struct *p, int dest_ return rq; /* Affinity changed (again). */ - if (!cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) + if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) return rq; rq = move_queued_task(rq, p, dest_cpu); @@ -1087,6 +1095,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, int ret = 0; rq = task_rq_lock(p, &rf); + update_rq_clock(rq); if (p->flags & PF_KTHREAD) { /* @@ -1255,10 +1264,10 @@ static int migrate_swap_stop(void *data) if (task_cpu(arg->src_task) != arg->src_cpu) goto unlock; - if (!cpumask_test_cpu(arg->dst_cpu, tsk_cpus_allowed(arg->src_task))) + if (!cpumask_test_cpu(arg->dst_cpu, &arg->src_task->cpus_allowed)) goto unlock; - if (!cpumask_test_cpu(arg->src_cpu, tsk_cpus_allowed(arg->dst_task))) + if (!cpumask_test_cpu(arg->src_cpu, &arg->dst_task->cpus_allowed)) goto unlock; __migrate_swap_task(arg->src_task, arg->dst_cpu); @@ -1299,10 +1308,10 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p) if (!cpu_active(arg.src_cpu) || !cpu_active(arg.dst_cpu)) goto out; - if (!cpumask_test_cpu(arg.dst_cpu, tsk_cpus_allowed(arg.src_task))) + if (!cpumask_test_cpu(arg.dst_cpu, &arg.src_task->cpus_allowed)) goto out; - if (!cpumask_test_cpu(arg.src_cpu, tsk_cpus_allowed(arg.dst_task))) + if (!cpumask_test_cpu(arg.src_cpu, &arg.dst_task->cpus_allowed)) goto out; trace_sched_swap_numa(cur, arg.src_cpu, p, arg.dst_cpu); @@ -1486,14 +1495,14 @@ static int select_fallback_rq(int cpu, struct task_struct *p) for_each_cpu(dest_cpu, nodemask) { if (!cpu_active(dest_cpu)) continue; - if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) + if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) return dest_cpu; } } for (;;) { /* Any allowed, online CPU? */ - for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) { + for_each_cpu(dest_cpu, &p->cpus_allowed) { if (!(p->flags & PF_KTHREAD) && !cpu_active(dest_cpu)) continue; if (!cpu_online(dest_cpu)) @@ -1545,10 +1554,10 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) { lockdep_assert_held(&p->pi_lock); - if (tsk_nr_cpus_allowed(p) > 1) + if (p->nr_cpus_allowed > 1) cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags); else - cpu = cpumask_any(tsk_cpus_allowed(p)); + cpu = cpumask_any(&p->cpus_allowed); /* * In order not to call set_task_cpu() on a blocking task we need @@ -1560,7 +1569,7 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) * [ this allows ->select_task() to simply return task_cpu(p) and * not worry about this generic constraint ] */ - if (unlikely(!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)) || + if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) || !cpu_online(cpu))) cpu = select_fallback_rq(task_cpu(p), p); @@ -2844,7 +2853,7 @@ context_switch(struct rq *rq, struct task_struct *prev, if (!mm) { next->active_mm = oldmm; - atomic_inc(&oldmm->mm_count); + mmgrab(oldmm); enter_lazy_tlb(oldmm, next); } else switch_mm_irqs_off(oldmm, mm, next); @@ -3207,6 +3216,15 @@ static inline void preempt_latency_start(int val) { } static inline void preempt_latency_stop(int val) { } #endif +static inline unsigned long get_preempt_disable_ip(struct task_struct *p) +{ +#ifdef CONFIG_DEBUG_PREEMPT + return p->preempt_disable_ip; +#else + return 0; +#endif +} + /* * Print scheduling while atomic bug: */ @@ -3269,10 +3287,15 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) struct task_struct *p; /* - * Optimization: we know that if all tasks are in - * the fair class we can call that function directly: + * Optimization: we know that if all tasks are in the fair class we can + * call that function directly, but only if the @prev task wasn't of a + * higher scheduling class, because otherwise those loose the + * opportunity to pull in more work from other CPUs. */ - if (likely(rq->nr_running == rq->cfs.h_nr_running)) { + if (likely((prev->sched_class == &idle_sched_class || + prev->sched_class == &fair_sched_class) && + rq->nr_running == rq->cfs.h_nr_running)) { + p = fair_sched_class.pick_next_task(rq, prev, rf); if (unlikely(p == RETRY_TASK)) goto again; @@ -5229,6 +5252,9 @@ void sched_show_task(struct task_struct *p) int ppid; unsigned long state = p->state; + /* Make sure the string lines up properly with the number of task states: */ + BUILD_BUG_ON(sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1); + if (!try_get_task_stack(p)) return; if (state) @@ -5457,7 +5483,7 @@ int migrate_task_to(struct task_struct *p, int target_cpu) if (curr_cpu == target_cpu) return 0; - if (!cpumask_test_cpu(target_cpu, tsk_cpus_allowed(p))) + if (!cpumask_test_cpu(target_cpu, &p->cpus_allowed)) return -EINVAL; /* TODO: This is not properly updating schedstats */ @@ -5557,7 +5583,7 @@ static void migrate_tasks(struct rq *dead_rq) { struct rq *rq = dead_rq; struct task_struct *next, *stop = rq->stop; - struct rq_flags rf, old_rf; + struct rq_flags rf; int dest_cpu; /* @@ -5576,7 +5602,9 @@ static void migrate_tasks(struct rq *dead_rq) * class method both need to have an up-to-date * value of rq->clock[_task] */ + rq_pin_lock(rq, &rf); update_rq_clock(rq); + rq_unpin_lock(rq, &rf); for (;;) { /* @@ -5589,7 +5617,7 @@ static void migrate_tasks(struct rq *dead_rq) /* * pick_next_task() assumes pinned rq->lock: */ - rq_pin_lock(rq, &rf); + rq_repin_lock(rq, &rf); next = pick_next_task(rq, &fake_task, &rf); BUG_ON(!next); next->sched_class->put_prev_task(rq, next); @@ -5618,13 +5646,6 @@ static void migrate_tasks(struct rq *dead_rq) continue; } - /* - * __migrate_task() may return with a different - * rq->lock held and a new cookie in 'rf', but we need - * to preserve rf::clock_update_flags for 'dead_rq'. - */ - old_rf = rf; - /* Find suitable destination for @next, with force if needed. */ dest_cpu = select_fallback_rq(dead_rq->cpu, next); @@ -5633,7 +5654,6 @@ static void migrate_tasks(struct rq *dead_rq) raw_spin_unlock(&rq->lock); rq = dead_rq; raw_spin_lock(&rq->lock); - rf = old_rf; } raw_spin_unlock(&next->pi_lock); } @@ -6095,7 +6115,7 @@ void __init sched_init(void) /* * The boot idle thread does lazy MMU switching as well: */ - atomic_inc(&init_mm.mm_count); + mmgrab(&init_mm); enter_lazy_tlb(&init_mm, current); /* @@ -6816,11 +6836,20 @@ cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) if (IS_ERR(tg)) return ERR_PTR(-ENOMEM); - sched_online_group(tg, parent); - return &tg->css; } +/* Expose task group only after completing cgroup initialization */ +static int cpu_cgroup_css_online(struct cgroup_subsys_state *css) +{ + struct task_group *tg = css_tg(css); + struct task_group *parent = css_tg(css->parent); + + if (parent) + sched_online_group(tg, parent); + return 0; +} + static void cpu_cgroup_css_released(struct cgroup_subsys_state *css) { struct task_group *tg = css_tg(css); @@ -7226,6 +7255,7 @@ static struct cftype cpu_files[] = { struct cgroup_subsys cpu_cgrp_subsys = { .css_alloc = cpu_cgroup_css_alloc, + .css_online = cpu_cgroup_css_online, .css_released = cpu_cgroup_css_released, .css_free = cpu_cgroup_css_free, .fork = cpu_cgroup_fork, diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c index e73119013c53..fba235c7d026 100644 --- a/kernel/sched/cpudeadline.c +++ b/kernel/sched/cpudeadline.c @@ -128,10 +128,10 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p, const struct sched_dl_entity *dl_se = &p->dl; if (later_mask && - cpumask_and(later_mask, cp->free_cpus, tsk_cpus_allowed(p))) { + cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) { best_cpu = cpumask_any(later_mask); goto out; - } else if (cpumask_test_cpu(cpudl_maximum(cp), tsk_cpus_allowed(p)) && + } else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) && dl_time_before(dl_se->deadline, cp->elements[0].dl)) { best_cpu = cpudl_maximum(cp); if (later_mask) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index fd4659313640..54c577578da6 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -13,6 +13,7 @@ #include <linux/cpufreq.h> #include <linux/kthread.h> +#include <uapi/linux/sched/types.h> #include <linux/slab.h> #include <trace/events/power.h> @@ -35,6 +36,7 @@ struct sugov_policy { u64 last_freq_update_time; s64 freq_update_delay_ns; unsigned int next_freq; + unsigned int cached_raw_freq; /* The next fields are only needed if fast switch cannot be used. */ struct irq_work irq_work; @@ -51,7 +53,6 @@ struct sugov_cpu { struct update_util_data update_util; struct sugov_policy *sg_policy; - unsigned int cached_raw_freq; unsigned long iowait_boost; unsigned long iowait_boost_max; u64 last_update; @@ -115,7 +116,7 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, /** * get_next_freq - Compute a new frequency for a given cpufreq policy. - * @sg_cpu: schedutil cpu object to compute the new frequency for. + * @sg_policy: schedutil policy object to compute the new frequency for. * @util: Current CPU utilization. * @max: CPU capacity. * @@ -135,19 +136,18 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, * next_freq (as calculated above) is returned, subject to policy min/max and * cpufreq driver limitations. */ -static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util, - unsigned long max) +static unsigned int get_next_freq(struct sugov_policy *sg_policy, + unsigned long util, unsigned long max) { - struct sugov_policy *sg_policy = sg_cpu->sg_policy; struct cpufreq_policy *policy = sg_policy->policy; unsigned int freq = arch_scale_freq_invariant() ? policy->cpuinfo.max_freq : policy->cur; freq = (freq + (freq >> 2)) * util / max; - if (freq == sg_cpu->cached_raw_freq && sg_policy->next_freq != UINT_MAX) + if (freq == sg_policy->cached_raw_freq && sg_policy->next_freq != UINT_MAX) return sg_policy->next_freq; - sg_cpu->cached_raw_freq = freq; + sg_policy->cached_raw_freq = freq; return cpufreq_driver_resolve_freq(policy, freq); } @@ -212,7 +212,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, } else { sugov_get_util(&util, &max); sugov_iowait_boost(sg_cpu, &util, &max); - next_f = get_next_freq(sg_cpu, util, max); + next_f = get_next_freq(sg_policy, util, max); } sugov_update_commit(sg_policy, time, next_f); } @@ -266,7 +266,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, sugov_iowait_boost(j_sg_cpu, &util, &max); } - return get_next_freq(sg_cpu, util, max); + return get_next_freq(sg_policy, util, max); } static void sugov_update_shared(struct update_util_data *hook, u64 time, @@ -579,25 +579,19 @@ static int sugov_start(struct cpufreq_policy *policy) sg_policy->next_freq = UINT_MAX; sg_policy->work_in_progress = false; sg_policy->need_freq_update = false; + sg_policy->cached_raw_freq = 0; for_each_cpu(cpu, policy->cpus) { struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); + memset(sg_cpu, 0, sizeof(*sg_cpu)); sg_cpu->sg_policy = sg_policy; - if (policy_is_shared(policy)) { - sg_cpu->util = 0; - sg_cpu->max = 0; - sg_cpu->flags = SCHED_CPUFREQ_RT; - sg_cpu->last_update = 0; - sg_cpu->cached_raw_freq = 0; - sg_cpu->iowait_boost = 0; - sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; - cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, - sugov_update_shared); - } else { - cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, - sugov_update_single); - } + sg_cpu->flags = SCHED_CPUFREQ_RT; + sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; + cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, + policy_is_shared(policy) ? + sugov_update_shared : + sugov_update_single); } return 0; } diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c index 11e9705bf937..981fcd7dc394 100644 --- a/kernel/sched/cpupri.c +++ b/kernel/sched/cpupri.c @@ -103,11 +103,11 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p, if (skip) continue; - if (cpumask_any_and(tsk_cpus_allowed(p), vec->mask) >= nr_cpu_ids) + if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) continue; if (lowest_mask) { - cpumask_and(lowest_mask, tsk_cpus_allowed(p), vec->mask); + cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask); /* * We have to ensure that we have at least one bit diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 2ecec3a4f1ee..f3778e2b46c8 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -4,12 +4,8 @@ #include <linux/kernel_stat.h> #include <linux/static_key.h> #include <linux/context_tracking.h> -#include <linux/cputime.h> +#include <linux/sched/cputime.h> #include "sched.h" -#ifdef CONFIG_PARAVIRT -#include <asm/paravirt.h> -#endif - #ifdef CONFIG_IRQ_TIME_ACCOUNTING diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 27737f34757d..a2ce59015642 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -134,7 +134,7 @@ static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) { struct task_struct *p = dl_task_of(dl_se); - if (tsk_nr_cpus_allowed(p) > 1) + if (p->nr_cpus_allowed > 1) dl_rq->dl_nr_migratory++; update_dl_migration(dl_rq); @@ -144,7 +144,7 @@ static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) { struct task_struct *p = dl_task_of(dl_se); - if (tsk_nr_cpus_allowed(p) > 1) + if (p->nr_cpus_allowed > 1) dl_rq->dl_nr_migratory--; update_dl_migration(dl_rq); @@ -252,7 +252,7 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p * If we cannot preempt any rq, fall back to pick any * online cpu. */ - cpu = cpumask_any_and(cpu_active_mask, tsk_cpus_allowed(p)); + cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed); if (cpu >= nr_cpu_ids) { /* * Fail to find any suitable cpu. @@ -445,13 +445,13 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se, * * This function returns true if: * - * runtime / (deadline - t) > dl_runtime / dl_period , + * runtime / (deadline - t) > dl_runtime / dl_deadline , * * IOW we can't recycle current parameters. * - * Notice that the bandwidth check is done against the period. For + * Notice that the bandwidth check is done against the deadline. For * task with deadline equal to period this is the same of using - * dl_deadline instead of dl_period in the equation above. + * dl_period instead of dl_deadline in the equation above. */ static bool dl_entity_overflow(struct sched_dl_entity *dl_se, struct sched_dl_entity *pi_se, u64 t) @@ -476,7 +476,7 @@ static bool dl_entity_overflow(struct sched_dl_entity *dl_se, * of anything below microseconds resolution is actually fiction * (but still we want to give the user that illusion >;). */ - left = (pi_se->dl_period >> DL_SCALE) * (dl_se->runtime >> DL_SCALE); + left = (pi_se->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE); right = ((dl_se->deadline - t) >> DL_SCALE) * (pi_se->dl_runtime >> DL_SCALE); @@ -505,10 +505,15 @@ static void update_dl_entity(struct sched_dl_entity *dl_se, } } +static inline u64 dl_next_period(struct sched_dl_entity *dl_se) +{ + return dl_se->deadline - dl_se->dl_deadline + dl_se->dl_period; +} + /* * If the entity depleted all its runtime, and if we want it to sleep * while waiting for some new execution time to become available, we - * set the bandwidth enforcement timer to the replenishment instant + * set the bandwidth replenishment timer to the replenishment instant * and try to activate it. * * Notice that it is important for the caller to know if the timer @@ -530,7 +535,7 @@ static int start_dl_timer(struct task_struct *p) * that it is actually coming from rq->clock and not from * hrtimer's time base reading. */ - act = ns_to_ktime(dl_se->deadline); + act = ns_to_ktime(dl_next_period(dl_se)); now = hrtimer_cb_get_time(timer); delta = ktime_to_ns(now) - rq_clock(rq); act = ktime_add_ns(act, delta); @@ -638,6 +643,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) lockdep_unpin_lock(&rq->lock, rf.cookie); rq = dl_task_offline_migration(rq, p); rf.cookie = lockdep_pin_lock(&rq->lock); + update_rq_clock(rq); /* * Now that the task has been migrated to the new RQ and we @@ -689,6 +695,37 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se) timer->function = dl_task_timer; } +/* + * During the activation, CBS checks if it can reuse the current task's + * runtime and period. If the deadline of the task is in the past, CBS + * cannot use the runtime, and so it replenishes the task. This rule + * works fine for implicit deadline tasks (deadline == period), and the + * CBS was designed for implicit deadline tasks. However, a task with + * constrained deadline (deadine < period) might be awakened after the + * deadline, but before the next period. In this case, replenishing the + * task would allow it to run for runtime / deadline. As in this case + * deadline < period, CBS enables a task to run for more than the + * runtime / period. In a very loaded system, this can cause a domino + * effect, making other tasks miss their deadlines. + * + * To avoid this problem, in the activation of a constrained deadline + * task after the deadline but before the next period, throttle the + * task and set the replenishing timer to the begin of the next period, + * unless it is boosted. + */ +static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se) +{ + struct task_struct *p = dl_task_of(dl_se); + struct rq *rq = rq_of_dl_rq(dl_rq_of_se(dl_se)); + + if (dl_time_before(dl_se->deadline, rq_clock(rq)) && + dl_time_before(rq_clock(rq), dl_next_period(dl_se))) { + if (unlikely(dl_se->dl_boosted || !start_dl_timer(p))) + return; + dl_se->dl_throttled = 1; + } +} + static int dl_runtime_exceeded(struct sched_dl_entity *dl_se) { @@ -922,6 +959,11 @@ static void dequeue_dl_entity(struct sched_dl_entity *dl_se) __dequeue_dl_entity(dl_se); } +static inline bool dl_is_constrained(struct sched_dl_entity *dl_se) +{ + return dl_se->dl_deadline < dl_se->dl_period; +} + static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) { struct task_struct *pi_task = rt_mutex_get_top_task(p); @@ -948,6 +990,15 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) } /* + * Check if a constrained deadline task was activated + * after the deadline but before the next period. + * If that is the case, the task will be throttled and + * the replenishment timer will be set to the next period. + */ + if (!p->dl.dl_throttled && dl_is_constrained(&p->dl)) + dl_check_constrained_dl(&p->dl); + + /* * If p is throttled, we do nothing. In fact, if it exhausted * its budget it needs a replenishment and, since it now is on * its rq, the bandwidth timer callback (which clearly has not @@ -958,7 +1009,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) enqueue_dl_entity(&p->dl, pi_se, flags); - if (!task_current(rq, p) && tsk_nr_cpus_allowed(p) > 1) + if (!task_current(rq, p) && p->nr_cpus_allowed > 1) enqueue_pushable_dl_task(rq, p); } @@ -1032,9 +1083,9 @@ select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags) * try to make it stay here, it might be important. */ if (unlikely(dl_task(curr)) && - (tsk_nr_cpus_allowed(curr) < 2 || + (curr->nr_cpus_allowed < 2 || !dl_entity_preempt(&p->dl, &curr->dl)) && - (tsk_nr_cpus_allowed(p) > 1)) { + (p->nr_cpus_allowed > 1)) { int target = find_later_rq(p); if (target != -1 && @@ -1055,7 +1106,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p) * Current can't be migrated, useless to reschedule, * let's hope p can move out. */ - if (tsk_nr_cpus_allowed(rq->curr) == 1 || + if (rq->curr->nr_cpus_allowed == 1 || cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1) return; @@ -1063,7 +1114,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p) * p is migratable, so let's not schedule it and * see if it is pushed or pulled somewhere else. */ - if (tsk_nr_cpus_allowed(p) != 1 && + if (p->nr_cpus_allowed != 1 && cpudl_find(&rq->rd->cpudl, p, NULL) != -1) return; @@ -1178,7 +1229,7 @@ static void put_prev_task_dl(struct rq *rq, struct task_struct *p) { update_curr_dl(rq); - if (on_dl_rq(&p->dl) && tsk_nr_cpus_allowed(p) > 1) + if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1) enqueue_pushable_dl_task(rq, p); } @@ -1235,7 +1286,7 @@ static void set_curr_task_dl(struct rq *rq) static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu) { if (!task_running(rq, p) && - cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) + cpumask_test_cpu(cpu, &p->cpus_allowed)) return 1; return 0; } @@ -1279,7 +1330,7 @@ static int find_later_rq(struct task_struct *task) if (unlikely(!later_mask)) return -1; - if (tsk_nr_cpus_allowed(task) == 1) + if (task->nr_cpus_allowed == 1) return -1; /* @@ -1384,8 +1435,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq) /* Retry if something changed. */ if (double_lock_balance(rq, later_rq)) { if (unlikely(task_rq(task) != rq || - !cpumask_test_cpu(later_rq->cpu, - tsk_cpus_allowed(task)) || + !cpumask_test_cpu(later_rq->cpu, &task->cpus_allowed) || task_running(rq, task) || !dl_task(task) || !task_on_rq_queued(task))) { @@ -1425,7 +1475,7 @@ static struct task_struct *pick_next_pushable_dl_task(struct rq *rq) BUG_ON(rq->cpu != task_cpu(p)); BUG_ON(task_current(rq, p)); - BUG_ON(tsk_nr_cpus_allowed(p) <= 1); + BUG_ON(p->nr_cpus_allowed <= 1); BUG_ON(!task_on_rq_queued(p)); BUG_ON(!dl_task(p)); @@ -1464,7 +1514,7 @@ retry: */ if (dl_task(rq->curr) && dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) && - tsk_nr_cpus_allowed(rq->curr) > 1) { + rq->curr->nr_cpus_allowed > 1) { resched_curr(rq); return 0; } @@ -1611,9 +1661,9 @@ static void task_woken_dl(struct rq *rq, struct task_struct *p) { if (!task_running(rq, p) && !test_tsk_need_resched(rq->curr) && - tsk_nr_cpus_allowed(p) > 1 && + p->nr_cpus_allowed > 1 && dl_task(rq->curr) && - (tsk_nr_cpus_allowed(rq->curr) < 2 || + (rq->curr->nr_cpus_allowed < 2 || !dl_entity_preempt(&p->dl, &rq->curr->dl))) { push_dl_tasks(rq); } @@ -1727,7 +1777,7 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p) if (rq->curr != p) { #ifdef CONFIG_SMP - if (tsk_nr_cpus_allowed(p) > 1 && rq->dl.overloaded) + if (p->nr_cpus_allowed > 1 && rq->dl.overloaded) queue_push_tasks(rq); #endif if (dl_task(rq->curr)) diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 109adc0e9cb9..38f019324f1a 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -11,7 +11,8 @@ */ #include <linux/proc_fs.h> -#include <linux/sched.h> +#include <linux/sched/mm.h> +#include <linux/sched/task.h> #include <linux/seq_file.h> #include <linux/kallsyms.h> #include <linux/utsname.h> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 274c747a01ce..dea138964b91 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -20,7 +20,9 @@ * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra */ -#include <linux/sched.h> +#include <linux/sched/mm.h> +#include <linux/sched/topology.h> + #include <linux/latencytop.h> #include <linux/cpumask.h> #include <linux/cpuidle.h> @@ -1551,7 +1553,7 @@ static void task_numa_compare(struct task_numa_env *env, */ if (cur) { /* Skip this swap candidate if cannot move to the source cpu */ - if (!cpumask_test_cpu(env->src_cpu, tsk_cpus_allowed(cur))) + if (!cpumask_test_cpu(env->src_cpu, &cur->cpus_allowed)) goto unlock; /* @@ -1661,7 +1663,7 @@ static void task_numa_find_cpu(struct task_numa_env *env, for_each_cpu(cpu, cpumask_of_node(env->dst_nid)) { /* Skip this CPU if the source task cannot migrate */ - if (!cpumask_test_cpu(cpu, tsk_cpus_allowed(env->p))) + if (!cpumask_test_cpu(cpu, &env->p->cpus_allowed)) continue; env->dst_cpu = cpu; @@ -5458,7 +5460,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, /* Skip over this group if it has no CPUs allowed */ if (!cpumask_intersects(sched_group_cpus(group), - tsk_cpus_allowed(p))) + &p->cpus_allowed)) continue; local_group = cpumask_test_cpu(this_cpu, @@ -5578,7 +5580,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) return cpumask_first(sched_group_cpus(group)); /* Traverse only the allowed CPUs */ - for_each_cpu_and(i, sched_group_cpus(group), tsk_cpus_allowed(p)) { + for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) { if (idle_cpu(i)) { struct rq *rq = cpu_rq(i); struct cpuidle_state *idle = idle_get_state(rq); @@ -5717,7 +5719,7 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int if (!test_idle_cores(target, false)) return -1; - cpumask_and(cpus, sched_domain_span(sd), tsk_cpus_allowed(p)); + cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed); for_each_cpu_wrap(core, cpus, target, wrap) { bool idle = true; @@ -5751,7 +5753,7 @@ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int t return -1; for_each_cpu(cpu, cpu_smt_mask(target)) { - if (!cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) + if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) continue; if (idle_cpu(cpu)) return cpu; @@ -5797,13 +5799,13 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t * Due to large variance we need a large fuzz factor; hackbench in * particularly is sensitive here. */ - if ((avg_idle / 512) < avg_cost) + if (sched_feat(SIS_AVG_CPU) && (avg_idle / 512) < avg_cost) return -1; time = local_clock(); for_each_cpu_wrap(cpu, sched_domain_span(sd), target, wrap) { - if (!cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) + if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) continue; if (idle_cpu(cpu)) break; @@ -5958,7 +5960,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f if (sd_flag & SD_BALANCE_WAKE) { record_wakee(p); want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu) - && cpumask_test_cpu(cpu, tsk_cpus_allowed(p)); + && cpumask_test_cpu(cpu, &p->cpus_allowed); } rcu_read_lock(); @@ -6698,7 +6700,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu)) return 0; - if (!cpumask_test_cpu(env->dst_cpu, tsk_cpus_allowed(p))) { + if (!cpumask_test_cpu(env->dst_cpu, &p->cpus_allowed)) { int cpu; schedstat_inc(p->se.statistics.nr_failed_migrations_affine); @@ -6718,7 +6720,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) /* Prevent to re-select dst_cpu via env's cpus */ for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) { - if (cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) { + if (cpumask_test_cpu(cpu, &p->cpus_allowed)) { env->flags |= LBF_DST_PINNED; env->new_dst_cpu = cpu; break; @@ -7252,7 +7254,7 @@ check_cpu_capacity(struct rq *rq, struct sched_domain *sd) /* * Group imbalance indicates (and tries to solve) the problem where balancing - * groups is inadequate due to tsk_cpus_allowed() constraints. + * groups is inadequate due to ->cpus_allowed constraints. * * Imagine a situation of two groups of 4 cpus each and 4 tasks each with a * cpumask covering 1 cpu of the first group and 3 cpus of the second group. @@ -8211,8 +8213,7 @@ more_balance: * if the curr task on busiest cpu can't be * moved to this_cpu */ - if (!cpumask_test_cpu(this_cpu, - tsk_cpus_allowed(busiest->curr))) { + if (!cpumask_test_cpu(this_cpu, &busiest->curr->cpus_allowed)) { raw_spin_unlock_irqrestore(&busiest->lock, flags); env.flags |= LBF_ALL_PINNED; diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 69631fa46c2f..1b3c8189b286 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -51,6 +51,11 @@ SCHED_FEAT(NONTASK_CAPACITY, true) */ SCHED_FEAT(TTWU_QUEUE, true) +/* + * When doing wakeups, attempt to limit superfluous scans of the LLC domain. + */ +SCHED_FEAT(SIS_AVG_CPU, false) + #ifdef HAVE_RT_PUSH_IPI /* * In order to avoid a thundering herd attack of CPUs that are diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 6a4bae0a649d..ac6d5176463d 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -2,6 +2,7 @@ * Generic entry point for the idle threads */ #include <linux/sched.h> +#include <linux/sched/idle.h> #include <linux/cpu.h> #include <linux/cpuidle.h> #include <linux/cpuhotplug.h> diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c index a2d6eb71f06b..f15fb2bdbc0d 100644 --- a/kernel/sched/loadavg.c +++ b/kernel/sched/loadavg.c @@ -7,6 +7,7 @@ */ #include <linux/export.h> +#include <linux/sched/loadavg.h> #include "sched.h" @@ -168,7 +169,7 @@ static inline int calc_load_write_idx(void) * If the folding window started, make sure we start writing in the * next idle-delta. */ - if (!time_before(jiffies, calc_load_update)) + if (!time_before(jiffies, READ_ONCE(calc_load_update))) idx++; return idx & 1; @@ -201,8 +202,9 @@ void calc_load_exit_idle(void) struct rq *this_rq = this_rq(); /* - * If we're still before the sample window, we're done. + * If we're still before the pending sample window, we're done. */ + this_rq->calc_load_update = READ_ONCE(calc_load_update); if (time_before(jiffies, this_rq->calc_load_update)) return; @@ -211,7 +213,6 @@ void calc_load_exit_idle(void) * accounted through the nohz accounting, so skip the entire deal and * sync up for the next window. */ - this_rq->calc_load_update = calc_load_update; if (time_before(jiffies, this_rq->calc_load_update + 10)) this_rq->calc_load_update += LOAD_FREQ; } @@ -307,13 +308,15 @@ calc_load_n(unsigned long load, unsigned long exp, */ static void calc_global_nohz(void) { + unsigned long sample_window; long delta, active, n; - if (!time_before(jiffies, calc_load_update + 10)) { + sample_window = READ_ONCE(calc_load_update); + if (!time_before(jiffies, sample_window + 10)) { /* * Catch-up, fold however many we are behind still */ - delta = jiffies - calc_load_update - 10; + delta = jiffies - sample_window - 10; n = 1 + (delta / LOAD_FREQ); active = atomic_long_read(&calc_load_tasks); @@ -323,7 +326,7 @@ static void calc_global_nohz(void) avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); - calc_load_update += n * LOAD_FREQ; + WRITE_ONCE(calc_load_update, sample_window + n * LOAD_FREQ); } /* @@ -351,9 +354,11 @@ static inline void calc_global_nohz(void) { } */ void calc_global_load(unsigned long ticks) { + unsigned long sample_window; long active, delta; - if (time_before(jiffies, calc_load_update + 10)) + sample_window = READ_ONCE(calc_load_update); + if (time_before(jiffies, sample_window + 10)) return; /* @@ -370,7 +375,7 @@ void calc_global_load(unsigned long ticks) avenrun[1] = calc_load(avenrun[1], EXP_5, active); avenrun[2] = calc_load(avenrun[2], EXP_15, active); - calc_load_update += LOAD_FREQ; + WRITE_ONCE(calc_load_update, sample_window + LOAD_FREQ); /* * In case we idled for multiple LOAD_FREQ intervals, catch up in bulk. diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index e8836cfc4cdb..9f3e40226dec 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -335,7 +335,7 @@ static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) rt_rq = &rq_of_rt_rq(rt_rq)->rt; rt_rq->rt_nr_total++; - if (tsk_nr_cpus_allowed(p) > 1) + if (p->nr_cpus_allowed > 1) rt_rq->rt_nr_migratory++; update_rt_migration(rt_rq); @@ -352,7 +352,7 @@ static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) rt_rq = &rq_of_rt_rq(rt_rq)->rt; rt_rq->rt_nr_total--; - if (tsk_nr_cpus_allowed(p) > 1) + if (p->nr_cpus_allowed > 1) rt_rq->rt_nr_migratory--; update_rt_migration(rt_rq); @@ -1324,7 +1324,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) enqueue_rt_entity(rt_se, flags); - if (!task_current(rq, p) && tsk_nr_cpus_allowed(p) > 1) + if (!task_current(rq, p) && p->nr_cpus_allowed > 1) enqueue_pushable_task(rq, p); } @@ -1413,7 +1413,7 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) * will have to sort it out. */ if (curr && unlikely(rt_task(curr)) && - (tsk_nr_cpus_allowed(curr) < 2 || + (curr->nr_cpus_allowed < 2 || curr->prio <= p->prio)) { int target = find_lowest_rq(p); @@ -1437,7 +1437,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) * Current can't be migrated, useless to reschedule, * let's hope p can move out. */ - if (tsk_nr_cpus_allowed(rq->curr) == 1 || + if (rq->curr->nr_cpus_allowed == 1 || !cpupri_find(&rq->rd->cpupri, rq->curr, NULL)) return; @@ -1445,7 +1445,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) * p is migratable, so let's not schedule it and * see if it is pushed or pulled somewhere else. */ - if (tsk_nr_cpus_allowed(p) != 1 + if (p->nr_cpus_allowed != 1 && cpupri_find(&rq->rd->cpupri, p, NULL)) return; @@ -1579,7 +1579,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) * The previous task needs to be made eligible for pushing * if it is still active */ - if (on_rt_rq(&p->rt) && tsk_nr_cpus_allowed(p) > 1) + if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1) enqueue_pushable_task(rq, p); } @@ -1591,7 +1591,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) { if (!task_running(rq, p) && - cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) + cpumask_test_cpu(cpu, &p->cpus_allowed)) return 1; return 0; } @@ -1629,7 +1629,7 @@ static int find_lowest_rq(struct task_struct *task) if (unlikely(!lowest_mask)) return -1; - if (tsk_nr_cpus_allowed(task) == 1) + if (task->nr_cpus_allowed == 1) return -1; /* No other targets possible */ if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) @@ -1726,8 +1726,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) * Also make sure that it wasn't scheduled on its rq. */ if (unlikely(task_rq(task) != rq || - !cpumask_test_cpu(lowest_rq->cpu, - tsk_cpus_allowed(task)) || + !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_allowed) || task_running(rq, task) || !rt_task(task) || !task_on_rq_queued(task))) { @@ -1762,7 +1761,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq) BUG_ON(rq->cpu != task_cpu(p)); BUG_ON(task_current(rq, p)); - BUG_ON(tsk_nr_cpus_allowed(p) <= 1); + BUG_ON(p->nr_cpus_allowed <= 1); BUG_ON(!task_on_rq_queued(p)); BUG_ON(!rt_task(p)); @@ -2122,9 +2121,9 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p) { if (!task_running(rq, p) && !test_tsk_need_resched(rq->curr) && - tsk_nr_cpus_allowed(p) > 1 && + p->nr_cpus_allowed > 1 && (dl_task(rq->curr) || rt_task(rq->curr)) && - (tsk_nr_cpus_allowed(rq->curr) < 2 || + (rq->curr->nr_cpus_allowed < 2 || rq->curr->prio <= p->prio)) push_rt_tasks(rq); } @@ -2197,7 +2196,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p) */ if (task_on_rq_queued(p) && rq->curr != p) { #ifdef CONFIG_SMP - if (tsk_nr_cpus_allowed(p) > 1 && rq->rt.overloaded) + if (p->nr_cpus_allowed > 1 && rq->rt.overloaded) queue_push_tasks(rq); #endif /* CONFIG_SMP */ if (p->prio < rq->curr->prio) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 71b10a9b73cf..5cbf92214ad8 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1,9 +1,26 @@ #include <linux/sched.h> +#include <linux/sched/autogroup.h> #include <linux/sched/sysctl.h> +#include <linux/sched/topology.h> #include <linux/sched/rt.h> -#include <linux/u64_stats_sync.h> #include <linux/sched/deadline.h> +#include <linux/sched/clock.h> +#include <linux/sched/wake_q.h> +#include <linux/sched/signal.h> +#include <linux/sched/numa_balancing.h> +#include <linux/sched/mm.h> +#include <linux/sched/cpufreq.h> +#include <linux/sched/stat.h> +#include <linux/sched/nohz.h> +#include <linux/sched/debug.h> +#include <linux/sched/hotplug.h> +#include <linux/sched/task.h> +#include <linux/sched/task_stack.h> +#include <linux/sched/cputime.h> +#include <linux/sched/init.h> + +#include <linux/u64_stats_sync.h> #include <linux/kernel_stat.h> #include <linux/binfmts.h> #include <linux/mutex.h> @@ -13,6 +30,10 @@ #include <linux/tick.h> #include <linux/slab.h> +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#endif + #include "cpupri.h" #include "cpudeadline.h" #include "cpuacct.h" @@ -1817,7 +1838,6 @@ extern void print_rt_stats(struct seq_file *m, int cpu); extern void print_dl_stats(struct seq_file *m, int cpu); extern void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); - #ifdef CONFIG_NUMA_BALANCING extern void show_numa_stats(struct task_struct *p, struct seq_file *m); diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index bf0da0aa0a14..d5710651043b 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -164,114 +164,3 @@ sched_info_switch(struct rq *rq, #define sched_info_arrive(rq, next) do { } while (0) #define sched_info_switch(rq, t, next) do { } while (0) #endif /* CONFIG_SCHED_INFO */ - -/* - * The following are functions that support scheduler-internal time accounting. - * These functions are generally called at the timer tick. None of this depends - * on CONFIG_SCHEDSTATS. - */ - -/** - * get_running_cputimer - return &tsk->signal->cputimer if cputimer is running - * - * @tsk: Pointer to target task. - */ -#ifdef CONFIG_POSIX_TIMERS -static inline -struct thread_group_cputimer *get_running_cputimer(struct task_struct *tsk) -{ - struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; - - /* Check if cputimer isn't running. This is accessed without locking. */ - if (!READ_ONCE(cputimer->running)) - return NULL; - - /* - * After we flush the task's sum_exec_runtime to sig->sum_sched_runtime - * in __exit_signal(), we won't account to the signal struct further - * cputime consumed by that task, even though the task can still be - * ticking after __exit_signal(). - * - * In order to keep a consistent behaviour between thread group cputime - * and thread group cputimer accounting, lets also ignore the cputime - * elapsing after __exit_signal() in any thread group timer running. - * - * This makes sure that POSIX CPU clocks and timers are synchronized, so - * that a POSIX CPU timer won't expire while the corresponding POSIX CPU - * clock delta is behind the expiring timer value. - */ - if (unlikely(!tsk->sighand)) - return NULL; - - return cputimer; -} -#else -static inline -struct thread_group_cputimer *get_running_cputimer(struct task_struct *tsk) -{ - return NULL; -} -#endif - -/** - * account_group_user_time - Maintain utime for a thread group. - * - * @tsk: Pointer to task structure. - * @cputime: Time value by which to increment the utime field of the - * thread_group_cputime structure. - * - * If thread group time is being maintained, get the structure for the - * running CPU and update the utime field there. - */ -static inline void account_group_user_time(struct task_struct *tsk, - u64 cputime) -{ - struct thread_group_cputimer *cputimer = get_running_cputimer(tsk); - - if (!cputimer) - return; - - atomic64_add(cputime, &cputimer->cputime_atomic.utime); -} - -/** - * account_group_system_time - Maintain stime for a thread group. - * - * @tsk: Pointer to task structure. - * @cputime: Time value by which to increment the stime field of the - * thread_group_cputime structure. - * - * If thread group time is being maintained, get the structure for the - * running CPU and update the stime field there. - */ -static inline void account_group_system_time(struct task_struct *tsk, - u64 cputime) -{ - struct thread_group_cputimer *cputimer = get_running_cputimer(tsk); - - if (!cputimer) - return; - - atomic64_add(cputime, &cputimer->cputime_atomic.stime); -} - -/** - * account_group_exec_runtime - Maintain exec runtime for a thread group. - * - * @tsk: Pointer to task structure. - * @ns: Time value by which to increment the sum_exec_runtime field - * of the thread_group_cputime structure. - * - * If thread group time is being maintained, get the structure for the - * running CPU and update the sum_exec_runtime field there. - */ -static inline void account_group_exec_runtime(struct task_struct *tsk, - unsigned long long ns) -{ - struct thread_group_cputimer *cputimer = get_running_cputimer(tsk); - - if (!cputimer) - return; - - atomic64_add(ns, &cputimer->cputime_atomic.sum_exec_runtime); -} diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c index 82f0dff90030..3d5610dcce11 100644 --- a/kernel/sched/swait.c +++ b/kernel/sched/swait.c @@ -1,4 +1,4 @@ -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/swait.h> void __init_swait_queue_head(struct swait_queue_head *q, const char *name, diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 9453efe9b25a..b8c84c6dee64 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -5,7 +5,8 @@ */ #include <linux/init.h> #include <linux/export.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> +#include <linux/sched/debug.h> #include <linux/mm.h> #include <linux/wait.h> #include <linux/hash.h> @@ -241,6 +242,45 @@ long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state) } EXPORT_SYMBOL(prepare_to_wait_event); +/* + * Note! These two wait functions are entered with the + * wait-queue lock held (and interrupts off in the _irq + * case), so there is no race with testing the wakeup + * condition in the caller before they add the wait + * entry to the wake queue. + */ +int do_wait_intr(wait_queue_head_t *wq, wait_queue_t *wait) +{ + if (likely(list_empty(&wait->task_list))) + __add_wait_queue_tail(wq, wait); + + set_current_state(TASK_INTERRUPTIBLE); + if (signal_pending(current)) + return -ERESTARTSYS; + + spin_unlock(&wq->lock); + schedule(); + spin_lock(&wq->lock); + return 0; +} +EXPORT_SYMBOL(do_wait_intr); + +int do_wait_intr_irq(wait_queue_head_t *wq, wait_queue_t *wait) +{ + if (likely(list_empty(&wait->task_list))) + __add_wait_queue_tail(wq, wait); + + set_current_state(TASK_INTERRUPTIBLE); + if (signal_pending(current)) + return -ERESTARTSYS; + + spin_unlock_irq(&wq->lock); + schedule(); + spin_lock_irq(&wq->lock); + return 0; +} +EXPORT_SYMBOL(do_wait_intr_irq); + /** * finish_wait - clean up after waiting in a queue * @q: waitqueue waited on |