diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/irq/spurious.c | 2 | ||||
-rw-r--r-- | kernel/kthread.c | 23 | ||||
-rw-r--r-- | kernel/rcutree.c | 16 | ||||
-rw-r--r-- | kernel/rcutree.h | 7 | ||||
-rw-r--r-- | kernel/sched.c | 43 | ||||
-rw-r--r-- | kernel/sched_fair.c | 73 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 6 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 2 | ||||
-rw-r--r-- | kernel/user.c | 2 | ||||
-rw-r--r-- | kernel/workqueue.c | 28 |
10 files changed, 121 insertions, 81 deletions
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index 114e704760fe..bd7273e6282e 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -121,7 +121,9 @@ static void poll_all_shared_irqs(void) if (!(status & IRQ_SPURIOUS_DISABLED)) continue; + local_irq_disable(); try_one_irq(i, desc); + local_irq_enable(); } } diff --git a/kernel/kthread.c b/kernel/kthread.c index 5fe709982caa..ab7ae57773e1 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -150,29 +150,6 @@ struct task_struct *kthread_create(int (*threadfn)(void *data), EXPORT_SYMBOL(kthread_create); /** - * kthread_bind - bind a just-created kthread to a cpu. - * @k: thread created by kthread_create(). - * @cpu: cpu (might not be online, must be possible) for @k to run on. - * - * Description: This function is equivalent to set_cpus_allowed(), - * except that @cpu doesn't need to be online, and the thread must be - * stopped (i.e., just returned from kthread_create()). - */ -void kthread_bind(struct task_struct *k, unsigned int cpu) -{ - /* Must have done schedule() in kthread() before we set_task_cpu */ - if (!wait_task_inactive(k, TASK_UNINTERRUPTIBLE)) { - WARN_ON(1); - return; - } - set_task_cpu(k, cpu); - k->cpus_allowed = cpumask_of_cpu(cpu); - k->rt.nr_cpus_allowed = 1; - k->flags |= PF_THREAD_BOUND; -} -EXPORT_SYMBOL(kthread_bind); - -/** * kthread_stop - stop a thread created by kthread_create(). * @k: thread created by kthread_create(). * diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 0536125b0497..f3077c0ab181 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -59,7 +59,7 @@ NUM_RCU_LVL_2, \ NUM_RCU_LVL_3, /* == MAX_RCU_LVLS */ \ }, \ - .signaled = RCU_SIGNAL_INIT, \ + .signaled = RCU_GP_IDLE, \ .gpnum = -300, \ .completed = -300, \ .onofflock = __SPIN_LOCK_UNLOCKED(&name.onofflock), \ @@ -657,14 +657,17 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) * irqs disabled. */ rcu_for_each_node_breadth_first(rsp, rnp) { - spin_lock(&rnp->lock); /* irqs already disabled. */ + spin_lock(&rnp->lock); /* irqs already disabled. */ rcu_preempt_check_blocked_tasks(rnp); rnp->qsmask = rnp->qsmaskinit; rnp->gpnum = rsp->gpnum; - spin_unlock(&rnp->lock); /* irqs already disabled. */ + spin_unlock(&rnp->lock); /* irqs remain disabled. */ } + rnp = rcu_get_root(rsp); + spin_lock(&rnp->lock); /* irqs already disabled. */ rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */ + spin_unlock(&rnp->lock); /* irqs remain disabled. */ spin_unlock_irqrestore(&rsp->onofflock, flags); } @@ -706,6 +709,7 @@ static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags) { WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); rsp->completed = rsp->gpnum; + rsp->signaled = RCU_GP_IDLE; rcu_process_gp_end(rsp, rsp->rda[smp_processor_id()]); rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ } @@ -1162,9 +1166,10 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) } spin_unlock(&rnp->lock); switch (signaled) { + case RCU_GP_IDLE: case RCU_GP_INIT: - break; /* grace period still initializing, ignore. */ + break; /* grace period idle or initializing, ignore. */ case RCU_SAVE_DYNTICK: @@ -1178,7 +1183,8 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) /* Update state, record completion counter. */ spin_lock(&rnp->lock); - if (lastcomp == rsp->completed) { + if (lastcomp == rsp->completed && + rsp->signaled == RCU_SAVE_DYNTICK) { rsp->signaled = RCU_FORCE_QS; dyntick_record_completed(rsp, lastcomp); } diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 1823c6e20609..1899023b0962 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -201,9 +201,10 @@ struct rcu_data { }; /* Values for signaled field in struct rcu_state. */ -#define RCU_GP_INIT 0 /* Grace period being initialized. */ -#define RCU_SAVE_DYNTICK 1 /* Need to scan dyntick state. */ -#define RCU_FORCE_QS 2 /* Need to force quiescent state. */ +#define RCU_GP_IDLE 0 /* No grace period in progress. */ +#define RCU_GP_INIT 1 /* Grace period being initialized. */ +#define RCU_SAVE_DYNTICK 2 /* Need to scan dyntick state. */ +#define RCU_FORCE_QS 3 /* Need to force quiescent state. */ #ifdef CONFIG_NO_HZ #define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK #else /* #ifdef CONFIG_NO_HZ */ diff --git a/kernel/sched.c b/kernel/sched.c index a455dca884a6..3c11ae0a948d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -309,6 +309,8 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq); */ static DEFINE_SPINLOCK(task_group_lock); +#ifdef CONFIG_FAIR_GROUP_SCHED + #ifdef CONFIG_SMP static int root_task_group_empty(void) { @@ -316,7 +318,6 @@ static int root_task_group_empty(void) } #endif -#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_USER_SCHED # define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) #else /* !CONFIG_USER_SCHED */ @@ -1992,6 +1993,38 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p, p->sched_class->prio_changed(rq, p, oldprio, running); } +/** + * kthread_bind - bind a just-created kthread to a cpu. + * @p: thread created by kthread_create(). + * @cpu: cpu (might not be online, must be possible) for @k to run on. + * + * Description: This function is equivalent to set_cpus_allowed(), + * except that @cpu doesn't need to be online, and the thread must be + * stopped (i.e., just returned from kthread_create()). + * + * Function lives here instead of kthread.c because it messes with + * scheduler internals which require locking. + */ +void kthread_bind(struct task_struct *p, unsigned int cpu) +{ + struct rq *rq = cpu_rq(cpu); + unsigned long flags; + + /* Must have done schedule() in kthread() before we set_task_cpu */ + if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) { + WARN_ON(1); + return; + } + + spin_lock_irqsave(&rq->lock, flags); + set_task_cpu(p, cpu); + p->cpus_allowed = cpumask_of_cpu(cpu); + p->rt.nr_cpus_allowed = 1; + p->flags |= PF_THREAD_BOUND; + spin_unlock_irqrestore(&rq->lock, flags); +} +EXPORT_SYMBOL(kthread_bind); + #ifdef CONFIG_SMP /* * Is this task likely cache-hot: @@ -2004,7 +2037,7 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) /* * Buddy candidates are cache hot: */ - if (sched_feat(CACHE_HOT_BUDDY) && + if (sched_feat(CACHE_HOT_BUDDY) && this_rq()->nr_running && (&p->se == cfs_rq_of(&p->se)->next || &p->se == cfs_rq_of(&p->se)->last)) return 1; @@ -9532,13 +9565,13 @@ void __init sched_init(void) current->sched_class = &fair_sched_class; /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */ - alloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT); + zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT); #ifdef CONFIG_SMP #ifdef CONFIG_NO_HZ - alloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT); + zalloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT); alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT); #endif - alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); + zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); #endif /* SMP */ perf_event_init(); diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index c32c3e643daa..37087a7fac22 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -822,6 +822,26 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) * re-elected due to buddy favours. */ clear_buddies(cfs_rq, curr); + return; + } + + /* + * Ensure that a task that missed wakeup preemption by a + * narrow margin doesn't have to wait for a full slice. + * This also mitigates buddy induced latencies under load. + */ + if (!sched_feat(WAKEUP_PREEMPT)) + return; + + if (delta_exec < sysctl_sched_min_granularity) + return; + + if (cfs_rq->nr_running > 1) { + struct sched_entity *se = __pick_next_entity(cfs_rq); + s64 delta = curr->vruntime - se->vruntime; + + if (delta > ideal_runtime) + resched_task(rq_of(cfs_rq)->curr); } } @@ -861,21 +881,18 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se); static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) { struct sched_entity *se = __pick_next_entity(cfs_rq); - struct sched_entity *buddy; + struct sched_entity *left = se; - if (cfs_rq->next) { - buddy = cfs_rq->next; - cfs_rq->next = NULL; - if (wakeup_preempt_entity(buddy, se) < 1) - return buddy; - } + if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1) + se = cfs_rq->next; - if (cfs_rq->last) { - buddy = cfs_rq->last; - cfs_rq->last = NULL; - if (wakeup_preempt_entity(buddy, se) < 1) - return buddy; - } + /* + * Prefer last buddy, try to return the CPU to a preempted task. + */ + if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1) + se = cfs_rq->last; + + clear_buddies(cfs_rq, se); return se; } @@ -1577,6 +1594,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ struct sched_entity *se = &curr->se, *pse = &p->se; struct cfs_rq *cfs_rq = task_cfs_rq(curr); int sync = wake_flags & WF_SYNC; + int scale = cfs_rq->nr_running >= sched_nr_latency; update_curr(cfs_rq); @@ -1591,18 +1609,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ if (unlikely(se == pse)) return; - /* - * Only set the backward buddy when the current task is still on the - * rq. This can happen when a wakeup gets interleaved with schedule on - * the ->pre_schedule() or idle_balance() point, either of which can - * drop the rq lock. - * - * Also, during early boot the idle thread is in the fair class, for - * obvious reasons its a bad idea to schedule back to the idle thread. - */ - if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle)) - set_last_buddy(se); - if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK)) + if (sched_feat(NEXT_BUDDY) && scale && !(wake_flags & WF_FORK)) set_next_buddy(pse); /* @@ -1648,8 +1655,22 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ BUG_ON(!pse); - if (wakeup_preempt_entity(se, pse) == 1) + if (wakeup_preempt_entity(se, pse) == 1) { resched_task(curr); + /* + * Only set the backward buddy when the current task is still + * on the rq. This can happen when a wakeup gets interleaved + * with schedule on the ->pre_schedule() or idle_balance() + * point, either of which can * drop the rq lock. + * + * Also, during early boot the idle thread is in the fair class, + * for obvious reasons its a bad idea to schedule back to it. + */ + if (unlikely(!se->on_rq || curr == rq->idle)) + return; + if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se)) + set_last_buddy(se); + } } static struct task_struct *pick_next_task_fair(struct rq *rq) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 9c451a1930b6..6dc4e5ef7a01 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2222,15 +2222,15 @@ ftrace_regex_write(struct file *file, const char __user *ubuf, ret = ftrace_process_regex(parser->buffer, parser->idx, enable); if (ret) - goto out; + goto out_unlock; trace_parser_clear(parser); } ret = read; - +out_unlock: mutex_unlock(&ftrace_regex_lock); -out: + return ret; } diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 3ffa502fb243..5dd017fea6f5 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1193,6 +1193,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) atomic_inc(&cpu_buffer->record_disabled); synchronize_sched(); + spin_lock_irq(&cpu_buffer->reader_lock); rb_head_page_deactivate(cpu_buffer); for (i = 0; i < nr_pages; i++) { @@ -1207,6 +1208,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) return; rb_reset_cpu(cpu_buffer); + spin_unlock_irq(&cpu_buffer->reader_lock); rb_check_pages(cpu_buffer); diff --git a/kernel/user.c b/kernel/user.c index 2c000e7132ac..46d0165ca70c 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -330,9 +330,9 @@ done: */ static void free_user(struct user_struct *up, unsigned long flags) { - spin_unlock_irqrestore(&uidhash_lock, flags); INIT_DELAYED_WORK(&up->work, cleanup_user_struct); schedule_delayed_work(&up->work, msecs_to_jiffies(1000)); + spin_unlock_irqrestore(&uidhash_lock, flags); } #else /* CONFIG_USER_SCHED && CONFIG_SYSFS */ diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 12328147132c..67e526b6ae81 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -692,31 +692,29 @@ int schedule_on_each_cpu(work_func_t func) if (!works) return -ENOMEM; + get_online_cpus(); + /* - * when running in keventd don't schedule a work item on itself. - * Can just call directly because the work queue is already bound. - * This also is faster. - * Make this a generic parameter for other workqueues? + * When running in keventd don't schedule a work item on + * itself. Can just call directly because the work queue is + * already bound. This also is faster. */ - if (current_is_keventd()) { + if (current_is_keventd()) orig = raw_smp_processor_id(); - INIT_WORK(per_cpu_ptr(works, orig), func); - func(per_cpu_ptr(works, orig)); - } - get_online_cpus(); for_each_online_cpu(cpu) { struct work_struct *work = per_cpu_ptr(works, cpu); - if (cpu == orig) - continue; INIT_WORK(work, func); - schedule_work_on(cpu, work); - } - for_each_online_cpu(cpu) { if (cpu != orig) - flush_work(per_cpu_ptr(works, cpu)); + schedule_work_on(cpu, work); } + if (orig >= 0) + func(per_cpu_ptr(works, orig)); + + for_each_online_cpu(cpu) + flush_work(per_cpu_ptr(works, cpu)); + put_online_cpus(); free_percpu(works); return 0; |