diff options
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r-- | kernel/sched/core.c | 131 |
1 files changed, 66 insertions, 65 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f0f831e8a345..123673291ffb 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -306,6 +306,9 @@ __read_mostly int scheduler_running; */ int sysctl_sched_rt_runtime = 950000; +/* cpus with isolated domains */ +cpumask_var_t cpu_isolated_map; + /* * this_rq_lock - lock this runqueue and disable interrupts. */ @@ -690,6 +693,23 @@ static inline bool got_nohz_idle_kick(void) bool sched_can_stop_tick(void) { /* + * FIFO realtime policy runs the highest priority task. Other runnable + * tasks are of a lower priority. The scheduler tick does nothing. + */ + if (current->policy == SCHED_FIFO) + return true; + + /* + * Round-robin realtime tasks time slice with other tasks at the same + * realtime priority. Is this task the only one at this priority? + */ + if (current->policy == SCHED_RR) { + struct sched_rt_entity *rt_se = ¤t->rt; + + return rt_se->run_list.prev == rt_se->run_list.next; + } + + /* * More than one running task need preemption. * nr_running update is assumed to be visible * after IPI is sent from wakers. @@ -2818,7 +2838,7 @@ asmlinkage __visible void __sched schedule_user(void) * we find a better solution. * * NB: There are buggy callers of this function. Ideally we - * should warn if prev_state != IN_USER, but that will trigger + * should warn if prev_state != CONTEXT_USER, but that will trigger * too frequently to make sense yet. */ enum ctx_state prev_state = exception_enter(); @@ -3034,6 +3054,8 @@ void rt_mutex_setprio(struct task_struct *p, int prio) } else { if (dl_prio(oldprio)) p->dl.dl_boosted = 0; + if (rt_prio(oldprio)) + p->rt.timeout = 0; p->sched_class = &fair_sched_class; } @@ -3278,15 +3300,18 @@ static void __setscheduler_params(struct task_struct *p, /* Actually do priority change: must hold pi & rq lock. */ static void __setscheduler(struct rq *rq, struct task_struct *p, - const struct sched_attr *attr) + const struct sched_attr *attr, bool keep_boost) { __setscheduler_params(p, attr); /* - * If we get here, there was no pi waiters boosting the - * task. It is safe to use the normal prio. + * Keep a potential priority boosting if called from + * sched_setscheduler(). */ - p->prio = normal_prio(p); + if (keep_boost) + p->prio = rt_mutex_get_effective_prio(p, normal_prio(p)); + else + p->prio = normal_prio(p); if (dl_prio(p->prio)) p->sched_class = &dl_sched_class; @@ -3386,7 +3411,7 @@ static int __sched_setscheduler(struct task_struct *p, int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 : MAX_RT_PRIO - 1 - attr->sched_priority; int retval, oldprio, oldpolicy = -1, queued, running; - int policy = attr->sched_policy; + int new_effective_prio, policy = attr->sched_policy; unsigned long flags; const struct sched_class *prev_class; struct rq *rq; @@ -3568,15 +3593,14 @@ change: oldprio = p->prio; /* - * Special case for priority boosted tasks. - * - * If the new priority is lower or equal (user space view) - * than the current (boosted) priority, we just store the new + * Take priority boosted tasks into account. If the new + * effective priority is unchanged, we just store the new * normal parameters and do not touch the scheduler class and * the runqueue. This will be done when the task deboost * itself. */ - if (rt_mutex_check_prio(p, newprio)) { + new_effective_prio = rt_mutex_get_effective_prio(p, newprio); + if (new_effective_prio == oldprio) { __setscheduler_params(p, attr); task_rq_unlock(rq, p, &flags); return 0; @@ -3590,7 +3614,7 @@ change: put_prev_task(rq, p); prev_class = p->sched_class; - __setscheduler(rq, p, attr); + __setscheduler(rq, p, attr, true); if (running) p->sched_class->set_curr_task(rq); @@ -4365,10 +4389,7 @@ long __sched io_schedule_timeout(long timeout) long ret; current->in_iowait = 1; - if (old_iowait) - blk_schedule_flush_plug(current); - else - blk_flush_plug(current); + blk_schedule_flush_plug(current); delayacct_blkio_start(); rq = raw_rq(); @@ -5318,36 +5339,13 @@ static int sched_cpu_active(struct notifier_block *nfb, static int sched_cpu_inactive(struct notifier_block *nfb, unsigned long action, void *hcpu) { - unsigned long flags; - long cpu = (long)hcpu; - struct dl_bw *dl_b; - switch (action & ~CPU_TASKS_FROZEN) { case CPU_DOWN_PREPARE: - set_cpu_active(cpu, false); - - /* explicitly allow suspend */ - if (!(action & CPU_TASKS_FROZEN)) { - bool overflow; - int cpus; - - rcu_read_lock_sched(); - dl_b = dl_bw_of(cpu); - - raw_spin_lock_irqsave(&dl_b->lock, flags); - cpus = dl_bw_cpus(cpu); - overflow = __dl_overflow(dl_b, cpus, 0, 0); - raw_spin_unlock_irqrestore(&dl_b->lock, flags); - - rcu_read_unlock_sched(); - - if (overflow) - return notifier_from_errno(-EBUSY); - } + set_cpu_active((long)hcpu, false); return NOTIFY_OK; + default: + return NOTIFY_DONE; } - - return NOTIFY_DONE; } static int __init migration_init(void) @@ -5428,17 +5426,6 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, break; } - /* - * Even though we initialize ->capacity to something semi-sane, - * we leave capacity_orig unset. This allows us to detect if - * domain iteration is still funny without causing /0 traps. - */ - if (!group->sgc->capacity_orig) { - printk(KERN_CONT "\n"); - printk(KERN_ERR "ERROR: domain->cpu_capacity not set\n"); - break; - } - if (!cpumask_weight(sched_group_cpus(group))) { printk(KERN_CONT "\n"); printk(KERN_ERR "ERROR: empty group\n"); @@ -5811,9 +5798,6 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu) update_top_cache_domain(cpu); } -/* cpus with isolated domains */ -static cpumask_var_t cpu_isolated_map; - /* Setup the mask of cpus configured for isolated domains */ static int __init isolated_cpu_setup(char *str) { @@ -5922,7 +5906,6 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) * die on a /0 trap. */ sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span); - sg->sgc->capacity_orig = sg->sgc->capacity; /* * Make sure the first group of this domain contains the @@ -6233,6 +6216,7 @@ sd_init(struct sched_domain_topology_level *tl, int cpu) */ if (sd->flags & SD_SHARE_CPUCAPACITY) { + sd->flags |= SD_PREFER_SIBLING; sd->imbalance_pct = 110; sd->smt_gain = 1178; /* ~15% */ @@ -6998,7 +6982,6 @@ static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action, */ case CPU_ONLINE: - case CPU_DOWN_FAILED: cpuset_update_active_cpus(true); break; default: @@ -7010,8 +6993,26 @@ static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action, static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action, void *hcpu) { + unsigned long flags; + long cpu = (long)hcpu; + struct dl_bw *dl_b; + bool overflow; + int cpus; + switch (action) { case CPU_DOWN_PREPARE: + rcu_read_lock_sched(); + dl_b = dl_bw_of(cpu); + + raw_spin_lock_irqsave(&dl_b->lock, flags); + cpus = dl_bw_cpus(cpu); + overflow = __dl_overflow(dl_b, cpus, 0, 0); + raw_spin_unlock_irqrestore(&dl_b->lock, flags); + + rcu_read_unlock_sched(); + + if (overflow) + return notifier_from_errno(-EBUSY); cpuset_update_active_cpus(false); break; case CPU_DOWN_PREPARE_FROZEN: @@ -7156,8 +7157,8 @@ void __init sched_init(void) rq->calc_load_active = 0; rq->calc_load_update = jiffies + LOAD_FREQ; init_cfs_rq(&rq->cfs); - init_rt_rq(&rq->rt, rq); - init_dl_rq(&rq->dl, rq); + init_rt_rq(&rq->rt); + init_dl_rq(&rq->dl); #ifdef CONFIG_FAIR_GROUP_SCHED root_task_group.shares = ROOT_TASK_GROUP_LOAD; INIT_LIST_HEAD(&rq->leaf_cfs_rq_list); @@ -7197,7 +7198,7 @@ void __init sched_init(void) #ifdef CONFIG_SMP rq->sd = NULL; rq->rd = NULL; - rq->cpu_capacity = SCHED_CAPACITY_SCALE; + rq->cpu_capacity = rq->cpu_capacity_orig = SCHED_CAPACITY_SCALE; rq->post_schedule = 0; rq->active_balance = 0; rq->next_balance = jiffies; @@ -7340,7 +7341,7 @@ static void normalize_task(struct rq *rq, struct task_struct *p) queued = task_on_rq_queued(p); if (queued) dequeue_task(rq, p, 0); - __setscheduler(rq, p, &attr); + __setscheduler(rq, p, &attr, false); if (queued) { enqueue_task(rq, p, 0); resched_curr(rq); @@ -7796,7 +7797,7 @@ static int sched_rt_global_constraints(void) } #endif /* CONFIG_RT_GROUP_SCHED */ -static int sched_dl_global_constraints(void) +static int sched_dl_global_validate(void) { u64 runtime = global_rt_runtime(); u64 period = global_rt_period(); @@ -7897,11 +7898,11 @@ int sched_rt_handler(struct ctl_table *table, int write, if (ret) goto undo; - ret = sched_rt_global_constraints(); + ret = sched_dl_global_validate(); if (ret) goto undo; - ret = sched_dl_global_constraints(); + ret = sched_rt_global_constraints(); if (ret) goto undo; |