From efbbd05a595343a413964ad85a2ad359b7b7efbd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Dec 2009 18:04:40 +0100 Subject: sched: Add pre and post wakeup hooks As will be apparent in the next patch, we need a pre wakeup hook for sched_fair task migration, hence rename the post wakeup hook and one pre wakeup. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith LKML-Reference: <20091216170518.114746117@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5c858f38e81a..2c9fa1ccebff 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1091,7 +1091,8 @@ struct sched_class { enum cpu_idle_type idle); void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); void (*post_schedule) (struct rq *this_rq); - void (*task_wake_up) (struct rq *this_rq, struct task_struct *task); + void (*task_waking) (struct rq *this_rq, struct task_struct *task); + void (*task_woken) (struct rq *this_rq, struct task_struct *task); void (*set_cpus_allowed)(struct task_struct *p, const struct cpumask *newmask); -- cgit v1.2.3 From 88ec22d3edb72b261f8628226cd543589a6d5e1b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Dec 2009 18:04:41 +0100 Subject: sched: Remove the cfs_rq dependency from set_task_cpu() In order to remove the cfs_rq dependency from set_task_cpu() we need to ensure the task is cfs_rq invariant for all callsites. The simple approach is to substract cfs_rq->min_vruntime from se->vruntime on dequeue, and add cfs_rq->min_vruntime on enqueue. However, this has the downside of breaking FAIR_SLEEPERS since we loose the old vruntime as we only maintain the relative position. To solve this, we observe that we only migrate runnable tasks, we do this using deactivate_task(.sleep=0) and activate_task(.wakeup=0), therefore we can restrain the min_vruntime invariance to that state. The only other case is wakeup balancing, since we want to maintain the old vruntime we cannot make it relative on dequeue, but since we don't migrate inactive tasks, we can do so right before we activate it again. This is where we need the new pre-wakeup hook, we need to call this while still holding the old rq->lock. We could fold it into ->select_task_rq(), but since that has multiple callsites and would obfuscate the locking requirements, that seems like a fudge. This leaves the fork() case, simply make sure that ->task_fork() leaves the ->vruntime in a relative state. This covers all cases where set_task_cpu() gets called, and ensures it sees a relative vruntime. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith LKML-Reference: <20091216170518.191697025@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- kernel/sched.c | 6 +----- kernel/sched_fair.c | 50 ++++++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 46 insertions(+), 12 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 2c9fa1ccebff..973b2b89f86d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1116,7 +1116,7 @@ struct sched_class { struct task_struct *task); #ifdef CONFIG_FAIR_GROUP_SCHED - void (*moved_group) (struct task_struct *p); + void (*moved_group) (struct task_struct *p, int on_rq); #endif }; diff --git a/kernel/sched.c b/kernel/sched.c index 6c571bdd5658..f92ce63edfff 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2038,8 +2038,6 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) void set_task_cpu(struct task_struct *p, unsigned int new_cpu) { int old_cpu = task_cpu(p); - struct cfs_rq *old_cfsrq = task_cfs_rq(p), - *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu); #ifdef CONFIG_SCHED_DEBUG /* @@ -2056,8 +2054,6 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 1, NULL, 0); } - p->se.vruntime -= old_cfsrq->min_vruntime - - new_cfsrq->min_vruntime; __set_task_cpu(p, new_cpu); } @@ -10102,7 +10098,7 @@ void sched_move_task(struct task_struct *tsk) #ifdef CONFIG_FAIR_GROUP_SCHED if (tsk->sched_class->moved_group) - tsk->sched_class->moved_group(tsk); + tsk->sched_class->moved_group(tsk, on_rq); #endif if (unlikely(running)) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index ec1d2715620c..42ac3c9f66f6 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -510,6 +510,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, curr->sum_exec_runtime += delta_exec; schedstat_add(cfs_rq, exec_clock, delta_exec); delta_exec_weighted = calc_delta_fair(delta_exec, curr); + curr->vruntime += delta_exec_weighted; update_min_vruntime(cfs_rq); } @@ -765,16 +766,26 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) se->vruntime = vruntime; } +#define ENQUEUE_WAKEUP 1 +#define ENQUEUE_MIGRATE 2 + static void -enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup) +enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) { + /* + * Update the normalized vruntime before updating min_vruntime + * through callig update_curr(). + */ + if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATE)) + se->vruntime += cfs_rq->min_vruntime; + /* * Update run-time statistics of the 'current'. */ update_curr(cfs_rq); account_entity_enqueue(cfs_rq, se); - if (wakeup) { + if (flags & ENQUEUE_WAKEUP) { place_entity(cfs_rq, se, 0); enqueue_sleeper(cfs_rq, se); } @@ -828,6 +839,14 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) __dequeue_entity(cfs_rq, se); account_entity_dequeue(cfs_rq, se); update_min_vruntime(cfs_rq); + + /* + * Normalize the entity after updating the min_vruntime because the + * update can refer to the ->curr item and we need to reflect this + * movement in our normalized position. + */ + if (!sleep) + se->vruntime -= cfs_rq->min_vruntime; } /* @@ -1038,13 +1057,19 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup) { struct cfs_rq *cfs_rq; struct sched_entity *se = &p->se; + int flags = 0; + + if (wakeup) + flags |= ENQUEUE_WAKEUP; + if (p->state == TASK_WAKING) + flags |= ENQUEUE_MIGRATE; for_each_sched_entity(se) { if (se->on_rq) break; cfs_rq = cfs_rq_of(se); - enqueue_entity(cfs_rq, se, wakeup); - wakeup = 1; + enqueue_entity(cfs_rq, se, flags); + flags = ENQUEUE_WAKEUP; } hrtick_update(rq); @@ -1120,6 +1145,14 @@ static void yield_task_fair(struct rq *rq) #ifdef CONFIG_SMP +static void task_waking_fair(struct rq *rq, struct task_struct *p) +{ + struct sched_entity *se = &p->se; + struct cfs_rq *cfs_rq = cfs_rq_of(se); + + se->vruntime -= cfs_rq->min_vruntime; +} + #ifdef CONFIG_FAIR_GROUP_SCHED /* * effective_load() calculates the load change as seen from the root_task_group @@ -1978,6 +2011,8 @@ static void task_fork_fair(struct task_struct *p) resched_task(rq->curr); } + se->vruntime -= cfs_rq->min_vruntime; + raw_spin_unlock_irqrestore(&rq->lock, flags); } @@ -2031,12 +2066,13 @@ static void set_curr_task_fair(struct rq *rq) } #ifdef CONFIG_FAIR_GROUP_SCHED -static void moved_group_fair(struct task_struct *p) +static void moved_group_fair(struct task_struct *p, int on_rq) { struct cfs_rq *cfs_rq = task_cfs_rq(p); update_curr(cfs_rq); - place_entity(cfs_rq, &p->se, 1); + if (!on_rq) + place_entity(cfs_rq, &p->se, 1); } #endif @@ -2076,6 +2112,8 @@ static const struct sched_class fair_sched_class = { .move_one_task = move_one_task_fair, .rq_online = rq_online_fair, .rq_offline = rq_offline_fair, + + .task_waking = task_waking_fair, #endif .set_curr_task = set_curr_task_fair, -- cgit v1.2.3 From 733421516b42c44b9e21f1793c430cc801ef8324 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 17 Dec 2009 13:16:27 +0100 Subject: sched: Move TASK_STATE_TO_CHAR_STR near the TASK_state bits So that we don't keep forgetting about it. Signed-off-by: Peter Zijlstra LKML-Reference: <20091217121829.815779372@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 973b2b89f86d..c28ed1b1d7c2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -193,6 +193,8 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) #define TASK_WAKEKILL 128 #define TASK_WAKING 256 +#define TASK_STATE_TO_CHAR_STR "RSDTtZX" + /* Convenience macros for the sake of set_task_state */ #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) #define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) @@ -2595,8 +2597,6 @@ static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p) } #endif /* CONFIG_MM_OWNER */ -#define TASK_STATE_TO_CHAR_STR "RSDTtZX" - #endif /* __KERNEL__ */ #endif -- cgit v1.2.3 From 44d90df6b757c59651ddd55f1a84f28132b50d29 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 17 Dec 2009 13:16:28 +0100 Subject: sched: Add missing state chars to TASK_STATE_TO_CHAR_STR We grew 3 new task states since the last time someone touched it. Signed-off-by: Peter Zijlstra LKML-Reference: <20091217121829.892737686@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index c28ed1b1d7c2..94858df38a87 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -193,7 +193,7 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) #define TASK_WAKEKILL 128 #define TASK_WAKING 256 -#define TASK_STATE_TO_CHAR_STR "RSDTtZX" +#define TASK_STATE_TO_CHAR_STR "RSDTtZXxKW" /* Convenience macros for the sake of set_task_state */ #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) -- cgit v1.2.3 From e1781538cf5c870ab696e9b8f0a5c498d3900f2f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 17 Dec 2009 13:16:30 +0100 Subject: sched: Assert task state bits at build time Since everybody is lazy and prone to forgetting things, make the compiler help us a bit. Signed-off-by: Peter Zijlstra LKML-Reference: <20091217121830.060186433@chello.nl> Signed-off-by: Ingo Molnar --- fs/proc/array.c | 18 ++++++++++-------- include/linux/sched.h | 4 ++++ 2 files changed, 14 insertions(+), 8 deletions(-) (limited to 'include/linux/sched.h') diff --git a/fs/proc/array.c b/fs/proc/array.c index 96361e8fa3a8..f560325c444f 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -134,14 +134,14 @@ static inline void task_name(struct seq_file *m, struct task_struct *p) * simple bit tests. */ static const char *task_state_array[] = { - "R (running)", /* 0 */ - "S (sleeping)", /* 1 */ - "D (disk sleep)", /* 2 */ - "T (stopped)", /* 4 */ - "t (tracing stop)", /* 8 */ - "Z (zombie)", /* 16 */ - "X (dead)", /* 32 */ - "x (dead)", /* 64 */ + "R (running)", /* 0 */ + "S (sleeping)", /* 1 */ + "D (disk sleep)", /* 2 */ + "T (stopped)", /* 4 */ + "t (tracing stop)", /* 8 */ + "Z (zombie)", /* 16 */ + "X (dead)", /* 32 */ + "x (dead)", /* 64 */ "K (wakekill)", /* 128 */ "W (waking)", /* 256 */ }; @@ -151,6 +151,8 @@ static inline const char *get_task_state(struct task_struct *tsk) unsigned int state = (tsk->state & TASK_REPORT) | tsk->exit_state; const char **p = &task_state_array[0]; + BUILD_BUG_ON(1 + ilog2(TASK_STATE_MAX) != ARRAY_SIZE(task_state_array)); + while (state) { p++; state >>= 1; diff --git a/include/linux/sched.h b/include/linux/sched.h index 94858df38a87..37543876ddf5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -192,9 +192,13 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) #define TASK_DEAD 64 #define TASK_WAKEKILL 128 #define TASK_WAKING 256 +#define TASK_STATE_MAX 512 #define TASK_STATE_TO_CHAR_STR "RSDTtZXxKW" +extern char ___assert_task_state[1 - 2*!!( + sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)]; + /* Convenience macros for the sake of set_task_state */ #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) #define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) -- cgit v1.2.3