From 1799e35d5baab6e06168b46cc78b968e728ea3d1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 19 Sep 2007 23:34:46 +0200
Subject: sched: add /proc/sys/kernel/sched_compat_yield

add /proc/sys/kernel/sched_compat_yield to make sys_sched_yield()
more agressive, by moving the yielding task to the last position
in the rbtree.

with sched_compat_yield=0:

   PID USER      PR  NI  VIRT  RES  SHR S %CPU %MEM    TIME+  COMMAND
  2539 mingo     20   0  1576  252  204 R   50  0.0   0:02.03 loop_yield
  2541 mingo     20   0  1576  244  196 R   50  0.0   0:02.05 loop

with sched_compat_yield=1:

   PID USER      PR  NI  VIRT  RES  SHR S %CPU %MEM    TIME+  COMMAND
  2584 mingo     20   0  1576  248  196 R   99  0.0   0:52.45 loop
  2582 mingo     20   0  1576  256  204 R    0  0.0   0:00.00 loop_yield

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 kernel/sched.c      |  5 +----
 kernel/sched_fair.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 kernel/sysctl.c     |  8 +++++++
 3 files changed, 66 insertions(+), 10 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index deeb1f8e0c30..63e0971c8fbb 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4550,10 +4550,7 @@ asmlinkage long sys_sched_yield(void)
 	struct rq *rq = this_rq_lock();
 
 	schedstat_inc(rq, yld_cnt);
-	if (unlikely(rq->nr_running == 1))
-		schedstat_inc(rq, yld_act_empty);
-	else
-		current->sched_class->yield_task(rq, current);
+	current->sched_class->yield_task(rq, current);
 
 	/*
 	 * Since we are going to call schedule() anyway, there's
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 892616bf2c77..c9fbe8e73a45 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -42,6 +42,14 @@ unsigned int sysctl_sched_latency __read_mostly = 20000000ULL;
  */
 unsigned int sysctl_sched_min_granularity __read_mostly = 2000000ULL;
 
+/*
+ * sys_sched_yield() compat mode
+ *
+ * This option switches the agressive yield implementation of the
+ * old scheduler back on.
+ */
+unsigned int __read_mostly sysctl_sched_compat_yield;
+
 /*
  * SCHED_BATCH wake-up granularity.
  * (default: 25 msec, units: nanoseconds)
@@ -897,19 +905,62 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep)
 }
 
 /*
- * sched_yield() support is very simple - we dequeue and enqueue
+ * sched_yield() support is very simple - we dequeue and enqueue.
+ *
+ * If compat_yield is turned on then we requeue to the end of the tree.
  */
 static void yield_task_fair(struct rq *rq, struct task_struct *p)
 {
 	struct cfs_rq *cfs_rq = task_cfs_rq(p);
+	struct rb_node **link = &cfs_rq->tasks_timeline.rb_node;
+	struct sched_entity *rightmost, *se = &p->se;
+	struct rb_node *parent;
 
-	__update_rq_clock(rq);
 	/*
-	 * Dequeue and enqueue the task to update its
-	 * position within the tree:
+	 * Are we the only task in the tree?
+	 */
+	if (unlikely(cfs_rq->nr_running == 1))
+		return;
+
+	if (likely(!sysctl_sched_compat_yield)) {
+		__update_rq_clock(rq);
+		/*
+		 * Dequeue and enqueue the task to update its
+		 * position within the tree:
+		 */
+		dequeue_entity(cfs_rq, &p->se, 0);
+		enqueue_entity(cfs_rq, &p->se, 0);
+
+		return;
+	}
+	/*
+	 * Find the rightmost entry in the rbtree:
 	 */
-	dequeue_entity(cfs_rq, &p->se, 0);
-	enqueue_entity(cfs_rq, &p->se, 0);
+	do {
+		parent = *link;
+		link = &parent->rb_right;
+	} while (*link);
+
+	rightmost = rb_entry(parent, struct sched_entity, run_node);
+	/*
+	 * Already in the rightmost position?
+	 */
+	if (unlikely(rightmost == se))
+		return;
+
+	/*
+	 * Minimally necessary key value to be last in the tree:
+	 */
+	se->fair_key = rightmost->fair_key + 1;
+
+	if (cfs_rq->rb_leftmost == &se->run_node)
+		cfs_rq->rb_leftmost = rb_next(&se->run_node);
+	/*
+	 * Relink the task to the rightmost position:
+	 */
+	rb_erase(&se->run_node, &cfs_rq->tasks_timeline);
+	rb_link_node(&se->run_node, parent, link);
+	rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline);
 }
 
 /*
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 6ace893c17c9..53a456ebf6d5 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -303,6 +303,14 @@ static ctl_table kern_table[] = {
 		.proc_handler	= &proc_dointvec,
 	},
 #endif
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "sched_compat_yield",
+		.data		= &sysctl_sched_compat_yield,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 #ifdef CONFIG_PROVE_LOCKING
 	{
 		.ctl_name	= CTL_UNNUMBERED,
-- 
cgit v1.2.1


From 9c95e7319ba98585ebb6d304eca2d56f401ed70c Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 19 Sep 2007 23:34:46 +0200
Subject: sched: fix invalid sched_class use

When using rt_mutex, a NULL pointer dereference is occurred at
enqueue_task_rt. Here is a scenario;
1) there are two threads, the thread A is fair_sched_class and
   thread B is rt_sched_class.
2) Thread A is boosted up to rt_sched_class, because the thread A
   has a rt_mutex lock and the thread B is waiting the lock.
3) At this time, when thread A create a new thread C, the thread
   C has a rt_sched_class.
4) When doing wake_up_new_task() for the thread C, the priority
   of the thread C is out of the RT priority range, because the
   normal priority of thread A is not the RT priority. It makes
   data corruption by overflowing the rt_prio_array.
The new thread C should be fair_sched_class.

The new thread should be valid scheduler class before queuing.
This patch fixes to set the suitable scheduler class.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 kernel/sched.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 63e0971c8fbb..6107a0cd6325 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1682,6 +1682,11 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 
 	p->prio = effective_prio(p);
 
+	if (rt_prio(p->prio))
+		p->sched_class = &rt_sched_class;
+	else
+		p->sched_class = &fair_sched_class;
+
 	if (!p->sched_class->task_new || !sysctl_sched_child_runs_first ||
 			(clone_flags & CLONE_VM) || task_cpu(p) != this_cpu ||
 			!current->se.on_rq) {
-- 
cgit v1.2.1


From b8fceee17a310f189188599a8fa5e9beaff57eb0 Mon Sep 17 00:00:00 2001
From: Davide Libenzi <davidel@xmailserver.org>
Date: Thu, 20 Sep 2007 12:40:16 -0700
Subject: signalfd simplification

This simplifies signalfd code, by avoiding it to remain attached to the
sighand during its lifetime.

In this way, the signalfd remain attached to the sighand only during
poll(2) (and select and epoll) and read(2).  This also allows to remove
all the custom "tsk == current" checks in kernel/signal.c, since
dequeue_signal() will only be called by "current".

I think this is also what Ben was suggesting time ago.

The external effect of this, is that a thread can extract only its own
private signals and the group ones.  I think this is an acceptable
behaviour, in that those are the signals the thread would be able to
fetch w/out signalfd.

Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/exit.c   | 9 ---------
 kernel/fork.c   | 2 +-
 kernel/signal.c | 8 +++-----
 3 files changed, 4 insertions(+), 15 deletions(-)

(limited to 'kernel')

diff --git a/kernel/exit.c b/kernel/exit.c
index 06b24b3aa370..993369ee94d1 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -24,7 +24,6 @@
 #include <linux/pid_namespace.h>
 #include <linux/ptrace.h>
 #include <linux/profile.h>
-#include <linux/signalfd.h>
 #include <linux/mount.h>
 #include <linux/proc_fs.h>
 #include <linux/kthread.h>
@@ -86,14 +85,6 @@ static void __exit_signal(struct task_struct *tsk)
 	sighand = rcu_dereference(tsk->sighand);
 	spin_lock(&sighand->siglock);
 
-	/*
-	 * Notify that this sighand has been detached. This must
-	 * be called with the tsk->sighand lock held. Also, this
-	 * access tsk->sighand internally, so it must be called
-	 * before tsk->sighand is reset.
-	 */
-	signalfd_detach_locked(tsk);
-
 	posix_cpu_timers_exit(tsk);
 	if (atomic_dec_and_test(&sig->count))
 		posix_cpu_timers_exit_group(tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index 7332e236d367..33f12f48684a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1438,7 +1438,7 @@ static void sighand_ctor(void *data, struct kmem_cache *cachep,
 	struct sighand_struct *sighand = data;
 
 	spin_lock_init(&sighand->siglock);
-	INIT_LIST_HEAD(&sighand->signalfd_list);
+	init_waitqueue_head(&sighand->signalfd_wqh);
 }
 
 void __init proc_caches_init(void)
diff --git a/kernel/signal.c b/kernel/signal.c
index 3169bed0b4d0..9fb91a32edda 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -378,8 +378,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
 	/* We only dequeue private signals from ourselves, we don't let
 	 * signalfd steal them
 	 */
-	if (likely(tsk == current))
-		signr = __dequeue_signal(&tsk->pending, mask, info);
+	signr = __dequeue_signal(&tsk->pending, mask, info);
 	if (!signr) {
 		signr = __dequeue_signal(&tsk->signal->shared_pending,
 					 mask, info);
@@ -407,8 +406,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
 			}
 		}
 	}
-	if (likely(tsk == current))
-		recalc_sigpending();
+	recalc_sigpending();
 	if (signr && unlikely(sig_kernel_stop(signr))) {
 		/*
 		 * Set a marker that we have dequeued a stop signal.  Our
@@ -425,7 +423,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
 		if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT))
 			tsk->signal->flags |= SIGNAL_STOP_DEQUEUED;
 	}
-	if (signr && likely(tsk == current) &&
+	if (signr &&
 	     ((info->si_code & __SI_MASK) == __SI_TIMER) &&
 	     info->si_sys_private){
 		/*
-- 
cgit v1.2.1


From b7e113dc9d52c4a37d2da6fafe77959f3a28eccf Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 22 Sep 2007 22:29:06 +0000
Subject: clockevents: remove the suspend/resume workaround^Wthinko

In a desparate attempt to fix the suspend/resume problem on Andrews
VAIO I added a workaround which enforced the broadcast of the oneshot
timer on resume. This was actually resolving the problem on the VAIO
but was just a stupid workaround, which was not tackling the root
cause: the assignement of lower idle C-States in the ACPI processor_idle
code. The cpuidle patches, which utilize the dynamic tick feature and
go faster into deeper C-states exposed the problem again. The correct
solution is the previous patch, which prevents lower C-states across
the suspend/resume.

Remove the enforcement code, including the conditional broadcast timer
arming, which helped to pamper over the real problem for quite a time.
The oneshot broadcast flag for the cpu, which runs the resume code can
never be set at the time when this code is executed. It only gets set,
when the CPU is entering a lower idle C-State.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Len Brown <lenb@kernel.org>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/time/tick-broadcast.c | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

(limited to 'kernel')

diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index aab881c86a1a..0962e0577660 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -382,23 +382,8 @@ static int tick_broadcast_set_event(ktime_t expires, int force)
 
 int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
 {
-	int cpu = smp_processor_id();
-
-	/*
-	 * If the CPU is marked for broadcast, enforce oneshot
-	 * broadcast mode. The jinxed VAIO does not resume otherwise.
-	 * No idea why it ends up in a lower C State during resume
-	 * without notifying the clock events layer.
-	 */
-	if (cpu_isset(cpu, tick_broadcast_mask))
-		cpu_set(cpu, tick_broadcast_oneshot_mask);
-
 	clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
-
-	if(!cpus_empty(tick_broadcast_oneshot_mask))
-		tick_broadcast_set_event(ktime_get(), 1);
-
-	return cpu_isset(cpu, tick_broadcast_oneshot_mask);
+	return 0;
 }
 
 /*
-- 
cgit v1.2.1


From 459685c75b82a0431da102365d507fdb72858b84 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Wed, 26 Sep 2007 01:54:12 +0100
Subject: hibernation doesn't even build on frv - tons of helpers are missing

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-By: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/power/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index c8580a1e6873..14b0e10dc95c 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -110,7 +110,7 @@ config SUSPEND
 
 config HIBERNATION_UP_POSSIBLE
 	bool
-	depends on X86 || PPC64_SWSUSP || FRV || PPC32
+	depends on X86 || PPC64_SWSUSP || PPC32
 	depends on !SMP
 	default y
 
-- 
cgit v1.2.1


From 4047727e5ae33f9b8d2b7766d1994ea6e5ec2991 Mon Sep 17 00:00:00 2001
From: Mark Lord <lkml@rtr.ca>
Date: Mon, 1 Oct 2007 01:20:10 -0700
Subject: Fix SMP poweroff hangs

We need to disable all CPUs other than the boot CPU (usually 0) before
attempting to power-off modern SMP machines.  This fixes the
hang-on-poweroff issue on my MythTV SMP box, and also on Thomas Gleixner's
new toybox.

Signed-off-by: Mark Lord <mlord@pobox.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sys.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'kernel')

diff --git a/kernel/sys.c b/kernel/sys.c
index 1b33b05d346b..8ae2e636eb1b 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -32,6 +32,7 @@
 #include <linux/getcpu.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/seccomp.h>
+#include <linux/cpu.h>
 
 #include <linux/compat.h>
 #include <linux/syscalls.h>
@@ -878,6 +879,7 @@ void kernel_power_off(void)
 	kernel_shutdown_prepare(SYSTEM_POWER_OFF);
 	if (pm_power_off_prepare)
 		pm_power_off_prepare();
+	disable_nonboot_cpus();
 	sysdev_shutdown();
 	printk(KERN_EMERG "Power down.\n");
 	machine_power_off();
-- 
cgit v1.2.1


From 9f96cb1e8bca179a92afa40dfc3c49990f1cfc71 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 1 Oct 2007 01:20:13 -0700
Subject: robust futex thread exit race

Calling handle_futex_death in exit_robust_list for the different robust
mutexes of a thread basically frees the mutex.  Another thread might grab
the lock immediately which updates the next pointer of the mutex.
fetch_robust_entry over the next pointer might therefore branch into the
robust mutex list of a different thread.  This can cause two problems: 1)
some mutexes held by the dead thread are not getting freed and 2) some
mutexs held by a different thread are freed.

The next point need to be read before calling handle_futex_death.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/futex.c        | 26 ++++++++++++++++----------
 kernel/futex_compat.c | 28 ++++++++++++++++++----------
 2 files changed, 34 insertions(+), 20 deletions(-)

(limited to 'kernel')

diff --git a/kernel/futex.c b/kernel/futex.c
index e8935b195e88..fcc94e7b4086 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1943,9 +1943,10 @@ static inline int fetch_robust_entry(struct robust_list __user **entry,
 void exit_robust_list(struct task_struct *curr)
 {
 	struct robust_list_head __user *head = curr->robust_list;
-	struct robust_list __user *entry, *pending;
-	unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
+	struct robust_list __user *entry, *next_entry, *pending;
+	unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip;
 	unsigned long futex_offset;
+	int rc;
 
 	/*
 	 * Fetch the list head (which was registered earlier, via
@@ -1965,11 +1966,13 @@ void exit_robust_list(struct task_struct *curr)
 	if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
 		return;
 
-	if (pending)
-		handle_futex_death((void __user *)pending + futex_offset,
-				   curr, pip);
-
+	next_entry = NULL;	/* avoid warning with gcc */
 	while (entry != &head->list) {
+		/*
+		 * Fetch the next entry in the list before calling
+		 * handle_futex_death:
+		 */
+		rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
 		/*
 		 * A pending lock might already be on the list, so
 		 * don't process it twice:
@@ -1978,11 +1981,10 @@ void exit_robust_list(struct task_struct *curr)
 			if (handle_futex_death((void __user *)entry + futex_offset,
 						curr, pi))
 				return;
-		/*
-		 * Fetch the next entry in the list:
-		 */
-		if (fetch_robust_entry(&entry, &entry->next, &pi))
+		if (rc)
 			return;
+		entry = next_entry;
+		pi = next_pi;
 		/*
 		 * Avoid excessively long or circular lists:
 		 */
@@ -1991,6 +1993,10 @@ void exit_robust_list(struct task_struct *curr)
 
 		cond_resched();
 	}
+
+	if (pending)
+		handle_futex_death((void __user *)pending + futex_offset,
+				   curr, pip);
 }
 
 long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 7e52eb051f22..2c2e2954b713 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -38,10 +38,11 @@ fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry,
 void compat_exit_robust_list(struct task_struct *curr)
 {
 	struct compat_robust_list_head __user *head = curr->compat_robust_list;
-	struct robust_list __user *entry, *pending;
-	unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
-	compat_uptr_t uentry, upending;
+	struct robust_list __user *entry, *next_entry, *pending;
+	unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip;
+	compat_uptr_t uentry, next_uentry, upending;
 	compat_long_t futex_offset;
+	int rc;
 
 	/*
 	 * Fetch the list head (which was registered earlier, via
@@ -61,10 +62,15 @@ void compat_exit_robust_list(struct task_struct *curr)
 	if (fetch_robust_entry(&upending, &pending,
 			       &head->list_op_pending, &pip))
 		return;
-	if (pending)
-		handle_futex_death((void __user *)pending + futex_offset, curr, pip);
 
+	next_entry = NULL;	/* avoid warning with gcc */
 	while (entry != (struct robust_list __user *) &head->list) {
+		/*
+		 * Fetch the next entry in the list before calling
+		 * handle_futex_death:
+		 */
+		rc = fetch_robust_entry(&next_uentry, &next_entry,
+			(compat_uptr_t __user *)&entry->next, &next_pi);
 		/*
 		 * A pending lock might already be on the list, so
 		 * dont process it twice:
@@ -74,12 +80,11 @@ void compat_exit_robust_list(struct task_struct *curr)
 						curr, pi))
 				return;
 
-		/*
-		 * Fetch the next entry in the list:
-		 */
-		if (fetch_robust_entry(&uentry, &entry,
-				       (compat_uptr_t __user *)&entry->next, &pi))
+		if (rc)
 			return;
+		uentry = next_uentry;
+		entry = next_entry;
+		pi = next_pi;
 		/*
 		 * Avoid excessively long or circular lists:
 		 */
@@ -88,6 +93,9 @@ void compat_exit_robust_list(struct task_struct *curr)
 
 		cond_resched();
 	}
+	if (pending)
+		handle_futex_death((void __user *)pending + futex_offset,
+				   curr, pip);
 }
 
 asmlinkage long
-- 
cgit v1.2.1


From 30084fbd1caa4b2e1a336fcdef60b68129d1d8f8 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 2 Oct 2007 14:13:08 +0200
Subject: sched: fix profile=sleep

fix sleep profiling - we lost this chunk in the CFS merge.

Found-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched_fair.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'kernel')

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index c9fbe8e73a45..67c67a87146e 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -639,6 +639,16 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 
 		se->block_start = 0;
 		se->sum_sleep_runtime += delta;
+
+		/*
+		 * Blocking time is in units of nanosecs, so shift by 20 to
+		 * get a milliseconds-range estimation of the amount of
+		 * time that the task spent sleeping:
+		 */
+		if (unlikely(prof_on == SLEEP_PROFILING)) {
+			profile_hits(SLEEP_PROFILING, (void *)get_wchan(tsk),
+				     delta >> 20);
+		}
 	}
 #endif
 }
-- 
cgit v1.2.1