summaryrefslogtreecommitdiffstats
path: root/kernel/sched/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r--kernel/sched/core.c216
1 files changed, 137 insertions, 79 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4da0f4bb2ca4..b8e2162fc803 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -933,6 +933,8 @@ static int effective_prio(struct task_struct *p)
/**
* task_curr - is this task currently executing on a CPU?
* @p: the task in question.
+ *
+ * Return: 1 if the task is currently executing. 0 otherwise.
*/
inline int task_curr(const struct task_struct *p)
{
@@ -1482,7 +1484,7 @@ static void ttwu_queue(struct task_struct *p, int cpu)
* the simpler "current->state = TASK_RUNNING" to mark yourself
* runnable without the overhead of this.
*
- * Returns %true if @p was woken up, %false if it was already running
+ * Return: %true if @p was woken up, %false if it was already running.
* or @state didn't match @p's state.
*/
static int
@@ -1491,7 +1493,13 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
unsigned long flags;
int cpu, success = 0;
- smp_wmb();
+ /*
+ * If we are going to wake up a thread waiting for CONDITION we
+ * need to ensure that CONDITION=1 done by the caller can not be
+ * reordered with p->state check below. This pairs with mb() in
+ * set_current_state() the waiting thread does.
+ */
+ smp_mb__before_spinlock();
raw_spin_lock_irqsave(&p->pi_lock, flags);
if (!(p->state & state))
goto out;
@@ -1577,8 +1585,9 @@ out:
* @p: The process to be woken up.
*
* Attempt to wake up the nominated process and move it to the set of runnable
- * processes. Returns 1 if the process was woken up, 0 if it was already
- * running.
+ * processes.
+ *
+ * Return: 1 if the process was woken up, 0 if it was already running.
*
* It may be assumed that this function implies a write memory barrier before
* changing the task state if and only if any tasks are woken up.
@@ -2191,6 +2200,8 @@ void scheduler_tick(void)
* This makes sure that uptime, CFS vruntime, load
* balancing, etc... continue to move forward, even
* with a very low granularity.
+ *
+ * Return: Maximum deferment in nanoseconds.
*/
u64 scheduler_tick_max_deferment(void)
{
@@ -2394,6 +2405,12 @@ need_resched:
if (sched_feat(HRTICK))
hrtick_clear(rq);
+ /*
+ * Make sure that signal_pending_state()->signal_pending() below
+ * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
+ * done by the caller to avoid the race with signal_wake_up().
+ */
+ smp_mb__before_spinlock();
raw_spin_lock_irq(&rq->lock);
switch_count = &prev->nivcsw;
@@ -2796,8 +2813,8 @@ EXPORT_SYMBOL(wait_for_completion);
* specified timeout to expire. The timeout is in jiffies. It is not
* interruptible.
*
- * The return value is 0 if timed out, and positive (at least 1, or number of
- * jiffies left till timeout) if completed.
+ * Return: 0 if timed out, and positive (at least 1, or number of jiffies left
+ * till timeout) if completed.
*/
unsigned long __sched
wait_for_completion_timeout(struct completion *x, unsigned long timeout)
@@ -2829,8 +2846,8 @@ EXPORT_SYMBOL(wait_for_completion_io);
* specified timeout to expire. The timeout is in jiffies. It is not
* interruptible. The caller is accounted as waiting for IO.
*
- * The return value is 0 if timed out, and positive (at least 1, or number of
- * jiffies left till timeout) if completed.
+ * Return: 0 if timed out, and positive (at least 1, or number of jiffies left
+ * till timeout) if completed.
*/
unsigned long __sched
wait_for_completion_io_timeout(struct completion *x, unsigned long timeout)
@@ -2846,7 +2863,7 @@ EXPORT_SYMBOL(wait_for_completion_io_timeout);
* This waits for completion of a specific task to be signaled. It is
* interruptible.
*
- * The return value is -ERESTARTSYS if interrupted, 0 if completed.
+ * Return: -ERESTARTSYS if interrupted, 0 if completed.
*/
int __sched wait_for_completion_interruptible(struct completion *x)
{
@@ -2865,8 +2882,8 @@ EXPORT_SYMBOL(wait_for_completion_interruptible);
* This waits for either a completion of a specific task to be signaled or for a
* specified timeout to expire. It is interruptible. The timeout is in jiffies.
*
- * The return value is -ERESTARTSYS if interrupted, 0 if timed out,
- * positive (at least 1, or number of jiffies left till timeout) if completed.
+ * Return: -ERESTARTSYS if interrupted, 0 if timed out, positive (at least 1,
+ * or number of jiffies left till timeout) if completed.
*/
long __sched
wait_for_completion_interruptible_timeout(struct completion *x,
@@ -2883,7 +2900,7 @@ EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
* This waits to be signaled for completion of a specific task. It can be
* interrupted by a kill signal.
*
- * The return value is -ERESTARTSYS if interrupted, 0 if completed.
+ * Return: -ERESTARTSYS if interrupted, 0 if completed.
*/
int __sched wait_for_completion_killable(struct completion *x)
{
@@ -2903,8 +2920,8 @@ EXPORT_SYMBOL(wait_for_completion_killable);
* signaled or for a specified timeout to expire. It can be
* interrupted by a kill signal. The timeout is in jiffies.
*
- * The return value is -ERESTARTSYS if interrupted, 0 if timed out,
- * positive (at least 1, or number of jiffies left till timeout) if completed.
+ * Return: -ERESTARTSYS if interrupted, 0 if timed out, positive (at least 1,
+ * or number of jiffies left till timeout) if completed.
*/
long __sched
wait_for_completion_killable_timeout(struct completion *x,
@@ -2918,7 +2935,7 @@ EXPORT_SYMBOL(wait_for_completion_killable_timeout);
* try_wait_for_completion - try to decrement a completion without blocking
* @x: completion structure
*
- * Returns: 0 if a decrement cannot be done without blocking
+ * Return: 0 if a decrement cannot be done without blocking
* 1 if a decrement succeeded.
*
* If a completion is being used as a counting completion,
@@ -2945,7 +2962,7 @@ EXPORT_SYMBOL(try_wait_for_completion);
* completion_done - Test to see if a completion has any waiters
* @x: completion structure
*
- * Returns: 0 if there are waiters (wait_for_completion() in progress)
+ * Return: 0 if there are waiters (wait_for_completion() in progress)
* 1 if there are no waiters.
*
*/
@@ -3182,7 +3199,7 @@ SYSCALL_DEFINE1(nice, int, increment)
* task_prio - return the priority value of a given task.
* @p: the task in question.
*
- * This is the priority value as seen by users in /proc.
+ * Return: The priority value as seen by users in /proc.
* RT tasks are offset by -200. Normal tasks are centered
* around 0, value goes from -16 to +15.
*/
@@ -3194,6 +3211,8 @@ int task_prio(const struct task_struct *p)
/**
* task_nice - return the nice value of a given task.
* @p: the task in question.
+ *
+ * Return: The nice value [ -20 ... 0 ... 19 ].
*/
int task_nice(const struct task_struct *p)
{
@@ -3204,6 +3223,8 @@ EXPORT_SYMBOL(task_nice);
/**
* idle_cpu - is a given cpu idle currently?
* @cpu: the processor in question.
+ *
+ * Return: 1 if the CPU is currently idle. 0 otherwise.
*/
int idle_cpu(int cpu)
{
@@ -3226,6 +3247,8 @@ int idle_cpu(int cpu)
/**
* idle_task - return the idle task for a given cpu.
* @cpu: the processor in question.
+ *
+ * Return: The idle task for the cpu @cpu.
*/
struct task_struct *idle_task(int cpu)
{
@@ -3235,6 +3258,8 @@ struct task_struct *idle_task(int cpu)
/**
* find_process_by_pid - find a process with a matching PID value.
* @pid: the pid in question.
+ *
+ * The task of @pid, if found. %NULL otherwise.
*/
static struct task_struct *find_process_by_pid(pid_t pid)
{
@@ -3432,6 +3457,8 @@ recheck:
* @policy: new policy.
* @param: structure containing the new RT priority.
*
+ * Return: 0 on success. An error code otherwise.
+ *
* NOTE that the task may be already dead.
*/
int sched_setscheduler(struct task_struct *p, int policy,
@@ -3451,6 +3478,8 @@ EXPORT_SYMBOL_GPL(sched_setscheduler);
* current context has permission. For example, this is needed in
* stop_machine(): we create temporary high priority worker threads,
* but our caller might not have that capability.
+ *
+ * Return: 0 on success. An error code otherwise.
*/
int sched_setscheduler_nocheck(struct task_struct *p, int policy,
const struct sched_param *param)
@@ -3485,6 +3514,8 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
* @pid: the pid in question.
* @policy: new policy.
* @param: structure containing the new RT priority.
+ *
+ * Return: 0 on success. An error code otherwise.
*/
SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy,
struct sched_param __user *, param)
@@ -3500,6 +3531,8 @@ SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy,
* sys_sched_setparam - set/change the RT priority of a thread
* @pid: the pid in question.
* @param: structure containing the new RT priority.
+ *
+ * Return: 0 on success. An error code otherwise.
*/
SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
{
@@ -3509,6 +3542,9 @@ SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
/**
* sys_sched_getscheduler - get the policy (scheduling class) of a thread
* @pid: the pid in question.
+ *
+ * Return: On success, the policy of the thread. Otherwise, a negative error
+ * code.
*/
SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
{
@@ -3535,6 +3571,9 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
* sys_sched_getparam - get the RT priority of a thread
* @pid: the pid in question.
* @param: structure containing the RT priority.
+ *
+ * Return: On success, 0 and the RT priority is in @param. Otherwise, an error
+ * code.
*/
SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
{
@@ -3659,6 +3698,8 @@ static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
* @pid: pid of the process
* @len: length in bytes of the bitmask pointed to by user_mask_ptr
* @user_mask_ptr: user-space pointer to the new cpu mask
+ *
+ * Return: 0 on success. An error code otherwise.
*/
SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
unsigned long __user *, user_mask_ptr)
@@ -3710,6 +3751,8 @@ out_unlock:
* @pid: pid of the process
* @len: length in bytes of the bitmask pointed to by user_mask_ptr
* @user_mask_ptr: user-space pointer to hold the current cpu mask
+ *
+ * Return: 0 on success. An error code otherwise.
*/
SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
unsigned long __user *, user_mask_ptr)
@@ -3744,6 +3787,8 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
*
* This function yields the current CPU to other tasks. If there are no
* other threads running on this CPU then this function will return.
+ *
+ * Return: 0.
*/
SYSCALL_DEFINE0(sched_yield)
{
@@ -3869,7 +3914,7 @@ EXPORT_SYMBOL(yield);
* It's the caller's job to ensure that the target task struct
* can't go away on us before we can do any checks.
*
- * Returns:
+ * Return:
* true (>0) if we indeed boosted the target task.
* false (0) if we failed to boost the target.
* -ESRCH if there's no task to yield to.
@@ -3972,8 +4017,9 @@ long __sched io_schedule_timeout(long timeout)
* sys_sched_get_priority_max - return maximum RT priority.
* @policy: scheduling class.
*
- * this syscall returns the maximum rt_priority that can be used
- * by a given scheduling class.
+ * Return: On success, this syscall returns the maximum
+ * rt_priority that can be used by a given scheduling class.
+ * On failure, a negative error code is returned.
*/
SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
{
@@ -3997,8 +4043,9 @@ SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
* sys_sched_get_priority_min - return minimum RT priority.
* @policy: scheduling class.
*
- * this syscall returns the minimum rt_priority that can be used
- * by a given scheduling class.
+ * Return: On success, this syscall returns the minimum
+ * rt_priority that can be used by a given scheduling class.
+ * On failure, a negative error code is returned.
*/
SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
{
@@ -4024,6 +4071,9 @@ SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
*
* this syscall writes the default timeslice value of a given process
* into the user-space timespec buffer. A value of '0' means infinity.
+ *
+ * Return: On success, 0 and the timeslice is in @interval. Otherwise,
+ * an error code.
*/
SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
struct timespec __user *, interval)
@@ -5084,18 +5134,23 @@ static void destroy_sched_domains(struct sched_domain *sd, int cpu)
* two cpus are in the same cache domain, see cpus_share_cache().
*/
DEFINE_PER_CPU(struct sched_domain *, sd_llc);
+DEFINE_PER_CPU(int, sd_llc_size);
DEFINE_PER_CPU(int, sd_llc_id);
static void update_top_cache_domain(int cpu)
{
struct sched_domain *sd;
int id = cpu;
+ int size = 1;
sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES);
- if (sd)
+ if (sd) {
id = cpumask_first(sched_domain_span(sd));
+ size = cpumask_weight(sched_domain_span(sd));
+ }
rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
+ per_cpu(sd_llc_size, cpu) = size;
per_cpu(sd_llc_id, cpu) = id;
}
@@ -6641,6 +6696,8 @@ void normalize_rt_tasks(void)
* @cpu: the processor in question.
*
* ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
+ *
+ * Return: The current task for @cpu.
*/
struct task_struct *curr_task(int cpu)
{
@@ -6772,7 +6829,7 @@ void sched_move_task(struct task_struct *tsk)
if (unlikely(running))
tsk->sched_class->put_prev_task(rq, tsk);
- tg = container_of(task_subsys_state_check(tsk, cpu_cgroup_subsys_id,
+ tg = container_of(task_css_check(tsk, cpu_cgroup_subsys_id,
lockdep_is_held(&tsk->sighand->siglock)),
struct task_group, css);
tg = autogroup_task_group(tsk, tg);
@@ -7094,23 +7151,22 @@ int sched_rt_handler(struct ctl_table *table, int write,
#ifdef CONFIG_CGROUP_SCHED
-/* return corresponding task_group object of a cgroup */
-static inline struct task_group *cgroup_tg(struct cgroup *cgrp)
+static inline struct task_group *css_tg(struct cgroup_subsys_state *css)
{
- return container_of(cgroup_subsys_state(cgrp, cpu_cgroup_subsys_id),
- struct task_group, css);
+ return css ? container_of(css, struct task_group, css) : NULL;
}
-static struct cgroup_subsys_state *cpu_cgroup_css_alloc(struct cgroup *cgrp)
+static struct cgroup_subsys_state *
+cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
{
- struct task_group *tg, *parent;
+ struct task_group *parent = css_tg(parent_css);
+ struct task_group *tg;
- if (!cgrp->parent) {
+ if (!parent) {
/* This is early initialization for the top cgroup */
return &root_task_group.css;
}
- parent = cgroup_tg(cgrp->parent);
tg = sched_create_group(parent);
if (IS_ERR(tg))
return ERR_PTR(-ENOMEM);
@@ -7118,41 +7174,38 @@ static struct cgroup_subsys_state *cpu_cgroup_css_alloc(struct cgroup *cgrp)
return &tg->css;
}
-static int cpu_cgroup_css_online(struct cgroup *cgrp)
+static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
{
- struct task_group *tg = cgroup_tg(cgrp);
- struct task_group *parent;
+ struct task_group *tg = css_tg(css);
+ struct task_group *parent = css_tg(css_parent(css));
- if (!cgrp->parent)
- return 0;
-
- parent = cgroup_tg(cgrp->parent);
- sched_online_group(tg, parent);
+ if (parent)
+ sched_online_group(tg, parent);
return 0;
}
-static void cpu_cgroup_css_free(struct cgroup *cgrp)
+static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
{
- struct task_group *tg = cgroup_tg(cgrp);
+ struct task_group *tg = css_tg(css);
sched_destroy_group(tg);
}
-static void cpu_cgroup_css_offline(struct cgroup *cgrp)
+static void cpu_cgroup_css_offline(struct cgroup_subsys_state *css)
{
- struct task_group *tg = cgroup_tg(cgrp);
+ struct task_group *tg = css_tg(css);
sched_offline_group(tg);
}
-static int cpu_cgroup_can_attach(struct cgroup *cgrp,
+static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
{
struct task_struct *task;
- cgroup_taskset_for_each(task, cgrp, tset) {
+ cgroup_taskset_for_each(task, css, tset) {
#ifdef CONFIG_RT_GROUP_SCHED
- if (!sched_rt_can_attach(cgroup_tg(cgrp), task))
+ if (!sched_rt_can_attach(css_tg(css), task))
return -EINVAL;
#else
/* We don't support RT-tasks being in separate groups */
@@ -7163,18 +7216,18 @@ static int cpu_cgroup_can_attach(struct cgroup *cgrp,
return 0;
}
-static void cpu_cgroup_attach(struct cgroup *cgrp,
+static void cpu_cgroup_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
{
struct task_struct *task;
- cgroup_taskset_for_each(task, cgrp, tset)
+ cgroup_taskset_for_each(task, css, tset)
sched_move_task(task);
}
-static void
-cpu_cgroup_exit(struct cgroup *cgrp, struct cgroup *old_cgrp,
- struct task_struct *task)
+static void cpu_cgroup_exit(struct cgroup_subsys_state *css,
+ struct cgroup_subsys_state *old_css,
+ struct task_struct *task)
{
/*
* cgroup_exit() is called in the copy_process() failure path.
@@ -7188,15 +7241,16 @@ cpu_cgroup_exit(struct cgroup *cgrp, struct cgroup *old_cgrp,
}
#ifdef CONFIG_FAIR_GROUP_SCHED
-static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype,
- u64 shareval)
+static int cpu_shares_write_u64(struct cgroup_subsys_state *css,
+ struct cftype *cftype, u64 shareval)
{
- return sched_group_set_shares(cgroup_tg(cgrp), scale_load(shareval));
+ return sched_group_set_shares(css_tg(css), scale_load(shareval));
}
-static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft)
+static u64 cpu_shares_read_u64(struct cgroup_subsys_state *css,
+ struct cftype *cft)
{
- struct task_group *tg = cgroup_tg(cgrp);
+ struct task_group *tg = css_tg(css);
return (u64) scale_load_down(tg->shares);
}
@@ -7318,26 +7372,28 @@ long tg_get_cfs_period(struct task_group *tg)
return cfs_period_us;
}
-static s64 cpu_cfs_quota_read_s64(struct cgroup *cgrp, struct cftype *cft)
+static s64 cpu_cfs_quota_read_s64(struct cgroup_subsys_state *css,
+ struct cftype *cft)
{
- return tg_get_cfs_quota(cgroup_tg(cgrp));
+ return tg_get_cfs_quota(css_tg(css));
}
-static int cpu_cfs_quota_write_s64(struct cgroup *cgrp, struct cftype *cftype,
- s64 cfs_quota_us)
+static int cpu_cfs_quota_write_s64(struct cgroup_subsys_state *css,
+ struct cftype *cftype, s64 cfs_quota_us)
{
- return tg_set_cfs_quota(cgroup_tg(cgrp), cfs_quota_us);
+ return tg_set_cfs_quota(css_tg(css), cfs_quota_us);
}
-static u64 cpu_cfs_period_read_u64(struct cgroup *cgrp, struct cftype *cft)
+static u64 cpu_cfs_period_read_u64(struct cgroup_subsys_state *css,
+ struct cftype *cft)
{
- return tg_get_cfs_period(cgroup_tg(cgrp));
+ return tg_get_cfs_period(css_tg(css));
}
-static int cpu_cfs_period_write_u64(struct cgroup *cgrp, struct cftype *cftype,
- u64 cfs_period_us)
+static int cpu_cfs_period_write_u64(struct cgroup_subsys_state *css,
+ struct cftype *cftype, u64 cfs_period_us)
{
- return tg_set_cfs_period(cgroup_tg(cgrp), cfs_period_us);
+ return tg_set_cfs_period(css_tg(css), cfs_period_us);
}
struct cfs_schedulable_data {
@@ -7418,10 +7474,10 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota)
return ret;
}
-static int cpu_stats_show(struct cgroup *cgrp, struct cftype *cft,
+static int cpu_stats_show(struct cgroup_subsys_state *css, struct cftype *cft,
struct cgroup_map_cb *cb)
{
- struct task_group *tg = cgroup_tg(cgrp);
+ struct task_group *tg = css_tg(css);
struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
cb->fill(cb, "nr_periods", cfs_b->nr_periods);
@@ -7434,26 +7490,28 @@ static int cpu_stats_show(struct cgroup *cgrp, struct cftype *cft,
#endif /* CONFIG_FAIR_GROUP_SCHED */
#ifdef CONFIG_RT_GROUP_SCHED
-static int cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft,
- s64 val)
+static int cpu_rt_runtime_write(struct cgroup_subsys_state *css,
+ struct cftype *cft, s64 val)
{
- return sched_group_set_rt_runtime(cgroup_tg(cgrp), val);
+ return sched_group_set_rt_runtime(css_tg(css), val);
}
-static s64 cpu_rt_runtime_read(struct cgroup *cgrp, struct cftype *cft)
+static s64 cpu_rt_runtime_read(struct cgroup_subsys_state *css,
+ struct cftype *cft)
{
- return sched_group_rt_runtime(cgroup_tg(cgrp));
+ return sched_group_rt_runtime(css_tg(css));
}
-static int cpu_rt_period_write_uint(struct cgroup *cgrp, struct cftype *cftype,
- u64 rt_period_us)
+static int cpu_rt_period_write_uint(struct cgroup_subsys_state *css,
+ struct cftype *cftype, u64 rt_period_us)
{
- return sched_group_set_rt_period(cgroup_tg(cgrp), rt_period_us);
+ return sched_group_set_rt_period(css_tg(css), rt_period_us);
}
-static u64 cpu_rt_period_read_uint(struct cgroup *cgrp, struct cftype *cft)
+static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css,
+ struct cftype *cft)
{
- return sched_group_rt_period(cgroup_tg(cgrp));
+ return sched_group_rt_period(css_tg(css));
}
#endif /* CONFIG_RT_GROUP_SCHED */
OpenPOWER on IntegriCloud