diff options
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/core.c | 11 | ||||
-rw-r--r-- | kernel/sched/deadline.c | 50 | ||||
-rw-r--r-- | kernel/sched/debug.c | 7 | ||||
-rw-r--r-- | kernel/sched/fair.c | 56 | ||||
-rw-r--r-- | kernel/sched/sched.h | 11 | ||||
-rw-r--r-- | kernel/sched/topology.c | 6 | ||||
-rw-r--r-- | kernel/sched/wait.c | 85 |
7 files changed, 137 insertions, 89 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 6d2c7ff9ba98..18a6966567da 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1173,6 +1173,10 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) || lockdep_is_held(&task_rq(p)->lock))); #endif + /* + * Clearly, migrating tasks to offline CPUs is a fairly daft thing. + */ + WARN_ON_ONCE(!cpu_online(new_cpu)); #endif trace_sched_migrate_task(p, new_cpu); @@ -5556,16 +5560,15 @@ static void cpuset_cpu_active(void) * operation in the resume sequence, just build a single sched * domain, ignoring cpusets. */ - num_cpus_frozen--; - if (likely(num_cpus_frozen)) { - partition_sched_domains(1, NULL, NULL); + partition_sched_domains(1, NULL, NULL); + if (--num_cpus_frozen) return; - } /* * This is the last CPU online operation. So fall through and * restore the original sched domains by considering the * cpuset configurations. */ + cpuset_force_rebuild(); } cpuset_update_active_cpus(); } diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 9e38df7649f4..0191ec7667c3 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -296,7 +296,7 @@ static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq) { struct sched_dl_entity *dl_se = &p->dl; - return dl_rq->rb_leftmost == &dl_se->rb_node; + return dl_rq->root.rb_leftmost == &dl_se->rb_node; } void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime) @@ -320,7 +320,7 @@ void init_dl_bw(struct dl_bw *dl_b) void init_dl_rq(struct dl_rq *dl_rq) { - dl_rq->rb_root = RB_ROOT; + dl_rq->root = RB_ROOT_CACHED; #ifdef CONFIG_SMP /* zero means no -deadline tasks */ @@ -328,7 +328,7 @@ void init_dl_rq(struct dl_rq *dl_rq) dl_rq->dl_nr_migratory = 0; dl_rq->overloaded = 0; - dl_rq->pushable_dl_tasks_root = RB_ROOT; + dl_rq->pushable_dl_tasks_root = RB_ROOT_CACHED; #else init_dl_bw(&dl_rq->dl_bw); #endif @@ -410,10 +410,10 @@ static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) static void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p) { struct dl_rq *dl_rq = &rq->dl; - struct rb_node **link = &dl_rq->pushable_dl_tasks_root.rb_node; + struct rb_node **link = &dl_rq->pushable_dl_tasks_root.rb_root.rb_node; struct rb_node *parent = NULL; struct task_struct *entry; - int leftmost = 1; + bool leftmost = true; BUG_ON(!RB_EMPTY_NODE(&p->pushable_dl_tasks)); @@ -425,17 +425,16 @@ static void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p) link = &parent->rb_left; else { link = &parent->rb_right; - leftmost = 0; + leftmost = false; } } - if (leftmost) { - dl_rq->pushable_dl_tasks_leftmost = &p->pushable_dl_tasks; + if (leftmost) dl_rq->earliest_dl.next = p->dl.deadline; - } rb_link_node(&p->pushable_dl_tasks, parent, link); - rb_insert_color(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root); + rb_insert_color_cached(&p->pushable_dl_tasks, + &dl_rq->pushable_dl_tasks_root, leftmost); } static void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p) @@ -445,24 +444,23 @@ static void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p) if (RB_EMPTY_NODE(&p->pushable_dl_tasks)) return; - if (dl_rq->pushable_dl_tasks_leftmost == &p->pushable_dl_tasks) { + if (dl_rq->pushable_dl_tasks_root.rb_leftmost == &p->pushable_dl_tasks) { struct rb_node *next_node; next_node = rb_next(&p->pushable_dl_tasks); - dl_rq->pushable_dl_tasks_leftmost = next_node; if (next_node) { dl_rq->earliest_dl.next = rb_entry(next_node, struct task_struct, pushable_dl_tasks)->dl.deadline; } } - rb_erase(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root); + rb_erase_cached(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root); RB_CLEAR_NODE(&p->pushable_dl_tasks); } static inline int has_pushable_dl_tasks(struct rq *rq) { - return !RB_EMPTY_ROOT(&rq->dl.pushable_dl_tasks_root); + return !RB_EMPTY_ROOT(&rq->dl.pushable_dl_tasks_root.rb_root); } static int push_dl_task(struct rq *rq); @@ -1266,7 +1264,7 @@ static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) dl_rq->earliest_dl.next = 0; cpudl_clear(&rq->rd->cpudl, rq->cpu); } else { - struct rb_node *leftmost = dl_rq->rb_leftmost; + struct rb_node *leftmost = dl_rq->root.rb_leftmost; struct sched_dl_entity *entry; entry = rb_entry(leftmost, struct sched_dl_entity, rb_node); @@ -1313,7 +1311,7 @@ void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) static void __enqueue_dl_entity(struct sched_dl_entity *dl_se) { struct dl_rq *dl_rq = dl_rq_of_se(dl_se); - struct rb_node **link = &dl_rq->rb_root.rb_node; + struct rb_node **link = &dl_rq->root.rb_root.rb_node; struct rb_node *parent = NULL; struct sched_dl_entity *entry; int leftmost = 1; @@ -1331,11 +1329,8 @@ static void __enqueue_dl_entity(struct sched_dl_entity *dl_se) } } - if (leftmost) - dl_rq->rb_leftmost = &dl_se->rb_node; - rb_link_node(&dl_se->rb_node, parent, link); - rb_insert_color(&dl_se->rb_node, &dl_rq->rb_root); + rb_insert_color_cached(&dl_se->rb_node, &dl_rq->root, leftmost); inc_dl_tasks(dl_se, dl_rq); } @@ -1347,14 +1342,7 @@ static void __dequeue_dl_entity(struct sched_dl_entity *dl_se) if (RB_EMPTY_NODE(&dl_se->rb_node)) return; - if (dl_rq->rb_leftmost == &dl_se->rb_node) { - struct rb_node *next_node; - - next_node = rb_next(&dl_se->rb_node); - dl_rq->rb_leftmost = next_node; - } - - rb_erase(&dl_se->rb_node, &dl_rq->rb_root); + rb_erase_cached(&dl_se->rb_node, &dl_rq->root); RB_CLEAR_NODE(&dl_se->rb_node); dec_dl_tasks(dl_se, dl_rq); @@ -1647,7 +1635,7 @@ static void start_hrtick_dl(struct rq *rq, struct task_struct *p) static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq, struct dl_rq *dl_rq) { - struct rb_node *left = dl_rq->rb_leftmost; + struct rb_node *left = rb_first_cached(&dl_rq->root); if (!left) return NULL; @@ -1771,7 +1759,7 @@ static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu) */ static struct task_struct *pick_earliest_pushable_dl_task(struct rq *rq, int cpu) { - struct rb_node *next_node = rq->dl.pushable_dl_tasks_leftmost; + struct rb_node *next_node = rq->dl.pushable_dl_tasks_root.rb_leftmost; struct task_struct *p = NULL; if (!has_pushable_dl_tasks(rq)) @@ -1945,7 +1933,7 @@ static struct task_struct *pick_next_pushable_dl_task(struct rq *rq) if (!has_pushable_dl_tasks(rq)) return NULL; - p = rb_entry(rq->dl.pushable_dl_tasks_leftmost, + p = rb_entry(rq->dl.pushable_dl_tasks_root.rb_leftmost, struct task_struct, pushable_dl_tasks); BUG_ON(rq->cpu != task_cpu(p)); diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 4a23bbc3111b..01217fb5a5de 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -181,11 +181,16 @@ static const struct file_operations sched_feat_fops = { .release = single_release, }; +__read_mostly bool sched_debug_enabled; + static __init int sched_init_debug(void) { debugfs_create_file("sched_features", 0644, NULL, NULL, &sched_feat_fops); + debugfs_create_bool("sched_debug", 0644, NULL, + &sched_debug_enabled); + return 0; } late_initcall(sched_init_debug); @@ -530,7 +535,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) SPLIT_NS(cfs_rq->exec_clock)); raw_spin_lock_irqsave(&rq->lock, flags); - if (cfs_rq->rb_leftmost) + if (rb_first_cached(&cfs_rq->tasks_timeline)) MIN_vruntime = (__pick_first_entity(cfs_rq))->vruntime; last = __pick_last_entity(cfs_rq); if (last) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 8bc0a883d190..70ba32e08a23 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -513,6 +513,7 @@ static inline int entity_before(struct sched_entity *a, static void update_min_vruntime(struct cfs_rq *cfs_rq) { struct sched_entity *curr = cfs_rq->curr; + struct rb_node *leftmost = rb_first_cached(&cfs_rq->tasks_timeline); u64 vruntime = cfs_rq->min_vruntime; @@ -523,10 +524,9 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq) curr = NULL; } - if (cfs_rq->rb_leftmost) { - struct sched_entity *se = rb_entry(cfs_rq->rb_leftmost, - struct sched_entity, - run_node); + if (leftmost) { /* non-empty tree */ + struct sched_entity *se; + se = rb_entry(leftmost, struct sched_entity, run_node); if (!curr) vruntime = se->vruntime; @@ -547,10 +547,10 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq) */ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) { - struct rb_node **link = &cfs_rq->tasks_timeline.rb_node; + struct rb_node **link = &cfs_rq->tasks_timeline.rb_root.rb_node; struct rb_node *parent = NULL; struct sched_entity *entry; - int leftmost = 1; + bool leftmost = true; /* * Find the right place in the rbtree: @@ -566,36 +566,23 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) link = &parent->rb_left; } else { link = &parent->rb_right; - leftmost = 0; + leftmost = false; } } - /* - * Maintain a cache of leftmost tree entries (it is frequently - * used): - */ - if (leftmost) - cfs_rq->rb_leftmost = &se->run_node; - rb_link_node(&se->run_node, parent, link); - rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline); + rb_insert_color_cached(&se->run_node, + &cfs_rq->tasks_timeline, leftmost); } static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) { - if (cfs_rq->rb_leftmost == &se->run_node) { - struct rb_node *next_node; - - next_node = rb_next(&se->run_node); - cfs_rq->rb_leftmost = next_node; - } - - rb_erase(&se->run_node, &cfs_rq->tasks_timeline); + rb_erase_cached(&se->run_node, &cfs_rq->tasks_timeline); } struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq) { - struct rb_node *left = cfs_rq->rb_leftmost; + struct rb_node *left = rb_first_cached(&cfs_rq->tasks_timeline); if (!left) return NULL; @@ -616,7 +603,7 @@ static struct sched_entity *__pick_next_entity(struct sched_entity *se) #ifdef CONFIG_SCHED_DEBUG struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) { - struct rb_node *last = rb_last(&cfs_rq->tasks_timeline); + struct rb_node *last = rb_last(&cfs_rq->tasks_timeline.rb_root); if (!last) return NULL; @@ -5437,7 +5424,7 @@ wake_affine_llc(struct sched_domain *sd, struct task_struct *p, return false; /* if this cache has capacity, come here */ - if (this_stats.has_capacity && this_stats.nr_running < prev_stats.nr_running+1) + if (this_stats.has_capacity && this_stats.nr_running+1 < prev_stats.nr_running) return true; /* @@ -7721,7 +7708,7 @@ next_group: * number. * * Return: 1 when packing is required and a task should be moved to - * this CPU. The amount of the imbalance is returned in *imbalance. + * this CPU. The amount of the imbalance is returned in env->imbalance. * * @env: The load balancing environment. * @sds: Statistics of the sched_domain which is to be packed @@ -8450,6 +8437,12 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf) this_rq->idle_stamp = rq_clock(this_rq); /* + * Do not pull tasks towards !active CPUs... + */ + if (!cpu_active(this_cpu)) + return 0; + + /* * This is OK, because current is on_cpu, which avoids it being picked * for load-balance and preemption/IRQs are still disabled avoiding * further scheduler activity on it and we're being very careful to @@ -8556,6 +8549,13 @@ static int active_load_balance_cpu_stop(void *data) struct rq_flags rf; rq_lock_irq(busiest_rq, &rf); + /* + * Between queueing the stop-work and running it is a hole in which + * CPUs can become inactive. We should not move tasks from or to + * inactive CPUs. + */ + if (!cpu_active(busiest_cpu) || !cpu_active(target_cpu)) + goto out_unlock; /* make sure the requested cpu hasn't gone down in the meantime */ if (unlikely(busiest_cpu != smp_processor_id() || @@ -9312,7 +9312,7 @@ static void set_curr_task_fair(struct rq *rq) void init_cfs_rq(struct cfs_rq *cfs_rq) { - cfs_rq->tasks_timeline = RB_ROOT; + cfs_rq->tasks_timeline = RB_ROOT_CACHED; cfs_rq->min_vruntime = (u64)(-(1LL << 20)); #ifndef CONFIG_64BIT cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 6ed7962dc896..14db76cd496f 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -426,8 +426,7 @@ struct cfs_rq { u64 min_vruntime_copy; #endif - struct rb_root tasks_timeline; - struct rb_node *rb_leftmost; + struct rb_root_cached tasks_timeline; /* * 'curr' points to currently running entity on this cfs_rq. @@ -550,8 +549,7 @@ struct rt_rq { /* Deadline class' related fields in a runqueue */ struct dl_rq { /* runqueue is an rbtree, ordered by deadline */ - struct rb_root rb_root; - struct rb_node *rb_leftmost; + struct rb_root_cached root; unsigned long dl_nr_running; @@ -575,8 +573,7 @@ struct dl_rq { * an rb-tree, ordered by tasks' deadlines, with caching * of the leftmost (earliest deadline) element. */ - struct rb_root pushable_dl_tasks_root; - struct rb_node *pushable_dl_tasks_leftmost; + struct rb_root_cached pushable_dl_tasks_root; #else struct dl_bw dl_bw; #endif @@ -1954,6 +1951,8 @@ extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq); extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq); #ifdef CONFIG_SCHED_DEBUG +extern bool sched_debug_enabled; + extern void print_cfs_stats(struct seq_file *m, int cpu); extern void print_rt_stats(struct seq_file *m, int cpu); extern void print_dl_stats(struct seq_file *m, int cpu); diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 6f7b43982f73..f1cf4f306a82 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -14,11 +14,9 @@ cpumask_var_t sched_domains_tmpmask2; #ifdef CONFIG_SCHED_DEBUG -static __read_mostly int sched_debug_enabled; - static int __init sched_debug_setup(char *str) { - sched_debug_enabled = 1; + sched_debug_enabled = true; return 0; } @@ -473,7 +471,7 @@ static int __init isolated_cpu_setup(char *str) alloc_bootmem_cpumask_var(&cpu_isolated_map); ret = cpulist_parse(str, cpu_isolated_map); if (ret) { - pr_err("sched: Error, all isolcpus= values must be between 0 and %d\n", nr_cpu_ids); + pr_err("sched: Error, all isolcpus= values must be between 0 and %u\n", nr_cpu_ids); return 0; } return 1; diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index d6afed6d0752..98feab7933c7 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -53,6 +53,12 @@ void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry } EXPORT_SYMBOL(remove_wait_queue); +/* + * Scan threshold to break wait queue walk. + * This allows a waker to take a break from holding the + * wait queue lock during the wait queue walk. + */ +#define WAITQUEUE_WALK_BREAK_CNT 64 /* * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just @@ -63,18 +69,67 @@ EXPORT_SYMBOL(remove_wait_queue); * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns * zero in this (rare) case, and we handle it by continuing to scan the queue. */ -static void __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode, - int nr_exclusive, int wake_flags, void *key) +static int __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode, + int nr_exclusive, int wake_flags, void *key, + wait_queue_entry_t *bookmark) { wait_queue_entry_t *curr, *next; + int cnt = 0; + + if (bookmark && (bookmark->flags & WQ_FLAG_BOOKMARK)) { + curr = list_next_entry(bookmark, entry); + + list_del(&bookmark->entry); + bookmark->flags = 0; + } else + curr = list_first_entry(&wq_head->head, wait_queue_entry_t, entry); - list_for_each_entry_safe(curr, next, &wq_head->head, entry) { + if (&curr->entry == &wq_head->head) + return nr_exclusive; + + list_for_each_entry_safe_from(curr, next, &wq_head->head, entry) { unsigned flags = curr->flags; - int ret = curr->func(curr, mode, wake_flags, key); + int ret; + + if (flags & WQ_FLAG_BOOKMARK) + continue; + + ret = curr->func(curr, mode, wake_flags, key); if (ret < 0) break; if (ret && (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) break; + + if (bookmark && (++cnt > WAITQUEUE_WALK_BREAK_CNT) && + (&next->entry != &wq_head->head)) { + bookmark->flags = WQ_FLAG_BOOKMARK; + list_add_tail(&bookmark->entry, &next->entry); + break; + } + } + return nr_exclusive; +} + +static void __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int mode, + int nr_exclusive, int wake_flags, void *key) +{ + unsigned long flags; + wait_queue_entry_t bookmark; + + bookmark.flags = 0; + bookmark.private = NULL; + bookmark.func = NULL; + INIT_LIST_HEAD(&bookmark.entry); + + spin_lock_irqsave(&wq_head->lock, flags); + nr_exclusive = __wake_up_common(wq_head, mode, nr_exclusive, wake_flags, key, &bookmark); + spin_unlock_irqrestore(&wq_head->lock, flags); + + while (bookmark.flags & WQ_FLAG_BOOKMARK) { + spin_lock_irqsave(&wq_head->lock, flags); + nr_exclusive = __wake_up_common(wq_head, mode, nr_exclusive, + wake_flags, key, &bookmark); + spin_unlock_irqrestore(&wq_head->lock, flags); } } @@ -91,11 +146,7 @@ static void __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode, void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr_exclusive, void *key) { - unsigned long flags; - - spin_lock_irqsave(&wq_head->lock, flags); - __wake_up_common(wq_head, mode, nr_exclusive, 0, key); - spin_unlock_irqrestore(&wq_head->lock, flags); + __wake_up_common_lock(wq_head, mode, nr_exclusive, 0, key); } EXPORT_SYMBOL(__wake_up); @@ -104,16 +155,23 @@ EXPORT_SYMBOL(__wake_up); */ void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr) { - __wake_up_common(wq_head, mode, nr, 0, NULL); + __wake_up_common(wq_head, mode, nr, 0, NULL, NULL); } EXPORT_SYMBOL_GPL(__wake_up_locked); void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key) { - __wake_up_common(wq_head, mode, 1, 0, key); + __wake_up_common(wq_head, mode, 1, 0, key, NULL); } EXPORT_SYMBOL_GPL(__wake_up_locked_key); +void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head, + unsigned int mode, void *key, wait_queue_entry_t *bookmark) +{ + __wake_up_common(wq_head, mode, 1, 0, key, bookmark); +} +EXPORT_SYMBOL_GPL(__wake_up_locked_key_bookmark); + /** * __wake_up_sync_key - wake up threads blocked on a waitqueue. * @wq_head: the waitqueue @@ -134,7 +192,6 @@ EXPORT_SYMBOL_GPL(__wake_up_locked_key); void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, int nr_exclusive, void *key) { - unsigned long flags; int wake_flags = 1; /* XXX WF_SYNC */ if (unlikely(!wq_head)) @@ -143,9 +200,7 @@ void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, if (unlikely(nr_exclusive != 1)) wake_flags = 0; - spin_lock_irqsave(&wq_head->lock, flags); - __wake_up_common(wq_head, mode, nr_exclusive, wake_flags, key); - spin_unlock_irqrestore(&wq_head->lock, flags); + __wake_up_common_lock(wq_head, mode, nr_exclusive, wake_flags, key); } EXPORT_SYMBOL_GPL(__wake_up_sync_key); |