summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2007-10-03 15:33:17 +1000
committerPaul Mackerras <paulus@samba.org>2007-10-03 15:33:17 +1000
commit70f227d8846a8a9b1f36f71c42e11cc7c6e9408d (patch)
treefb4dd5c8240bdaada819fb569c01a392b52847b9 /kernel
parenta0c7ce9c877ceef8428798ac91fb794f83609aed (diff)
parentf778089cb2445dfc6dfd30a7a567925fd8589f1e (diff)
downloadblackbird-op-linux-70f227d8846a8a9b1f36f71c42e11cc7c6e9408d.tar.gz
blackbird-op-linux-70f227d8846a8a9b1f36f71c42e11cc7c6e9408d.zip
Merge branch 'linux-2.6' into for-2.6.24
Diffstat (limited to 'kernel')
-rw-r--r--kernel/exit.c9
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/futex.c26
-rw-r--r--kernel/futex_compat.c28
-rw-r--r--kernel/power/Kconfig2
-rw-r--r--kernel/sched.c10
-rw-r--r--kernel/sched_fair.c73
-rw-r--r--kernel/signal.c8
-rw-r--r--kernel/sys.c2
-rw-r--r--kernel/sysctl.c8
-rw-r--r--kernel/time/tick-broadcast.c17
11 files changed, 123 insertions, 62 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index 06b24b3aa370..993369ee94d1 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -24,7 +24,6 @@
#include <linux/pid_namespace.h>
#include <linux/ptrace.h>
#include <linux/profile.h>
-#include <linux/signalfd.h>
#include <linux/mount.h>
#include <linux/proc_fs.h>
#include <linux/kthread.h>
@@ -86,14 +85,6 @@ static void __exit_signal(struct task_struct *tsk)
sighand = rcu_dereference(tsk->sighand);
spin_lock(&sighand->siglock);
- /*
- * Notify that this sighand has been detached. This must
- * be called with the tsk->sighand lock held. Also, this
- * access tsk->sighand internally, so it must be called
- * before tsk->sighand is reset.
- */
- signalfd_detach_locked(tsk);
-
posix_cpu_timers_exit(tsk);
if (atomic_dec_and_test(&sig->count))
posix_cpu_timers_exit_group(tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index 7332e236d367..33f12f48684a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1438,7 +1438,7 @@ static void sighand_ctor(void *data, struct kmem_cache *cachep,
struct sighand_struct *sighand = data;
spin_lock_init(&sighand->siglock);
- INIT_LIST_HEAD(&sighand->signalfd_list);
+ init_waitqueue_head(&sighand->signalfd_wqh);
}
void __init proc_caches_init(void)
diff --git a/kernel/futex.c b/kernel/futex.c
index e8935b195e88..fcc94e7b4086 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1943,9 +1943,10 @@ static inline int fetch_robust_entry(struct robust_list __user **entry,
void exit_robust_list(struct task_struct *curr)
{
struct robust_list_head __user *head = curr->robust_list;
- struct robust_list __user *entry, *pending;
- unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
+ struct robust_list __user *entry, *next_entry, *pending;
+ unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip;
unsigned long futex_offset;
+ int rc;
/*
* Fetch the list head (which was registered earlier, via
@@ -1965,12 +1966,14 @@ void exit_robust_list(struct task_struct *curr)
if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
return;
- if (pending)
- handle_futex_death((void __user *)pending + futex_offset,
- curr, pip);
-
+ next_entry = NULL; /* avoid warning with gcc */
while (entry != &head->list) {
/*
+ * Fetch the next entry in the list before calling
+ * handle_futex_death:
+ */
+ rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
+ /*
* A pending lock might already be on the list, so
* don't process it twice:
*/
@@ -1978,11 +1981,10 @@ void exit_robust_list(struct task_struct *curr)
if (handle_futex_death((void __user *)entry + futex_offset,
curr, pi))
return;
- /*
- * Fetch the next entry in the list:
- */
- if (fetch_robust_entry(&entry, &entry->next, &pi))
+ if (rc)
return;
+ entry = next_entry;
+ pi = next_pi;
/*
* Avoid excessively long or circular lists:
*/
@@ -1991,6 +1993,10 @@ void exit_robust_list(struct task_struct *curr)
cond_resched();
}
+
+ if (pending)
+ handle_futex_death((void __user *)pending + futex_offset,
+ curr, pip);
}
long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 7e52eb051f22..2c2e2954b713 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -38,10 +38,11 @@ fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry,
void compat_exit_robust_list(struct task_struct *curr)
{
struct compat_robust_list_head __user *head = curr->compat_robust_list;
- struct robust_list __user *entry, *pending;
- unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
- compat_uptr_t uentry, upending;
+ struct robust_list __user *entry, *next_entry, *pending;
+ unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip;
+ compat_uptr_t uentry, next_uentry, upending;
compat_long_t futex_offset;
+ int rc;
/*
* Fetch the list head (which was registered earlier, via
@@ -61,11 +62,16 @@ void compat_exit_robust_list(struct task_struct *curr)
if (fetch_robust_entry(&upending, &pending,
&head->list_op_pending, &pip))
return;
- if (pending)
- handle_futex_death((void __user *)pending + futex_offset, curr, pip);
+ next_entry = NULL; /* avoid warning with gcc */
while (entry != (struct robust_list __user *) &head->list) {
/*
+ * Fetch the next entry in the list before calling
+ * handle_futex_death:
+ */
+ rc = fetch_robust_entry(&next_uentry, &next_entry,
+ (compat_uptr_t __user *)&entry->next, &next_pi);
+ /*
* A pending lock might already be on the list, so
* dont process it twice:
*/
@@ -74,12 +80,11 @@ void compat_exit_robust_list(struct task_struct *curr)
curr, pi))
return;
- /*
- * Fetch the next entry in the list:
- */
- if (fetch_robust_entry(&uentry, &entry,
- (compat_uptr_t __user *)&entry->next, &pi))
+ if (rc)
return;
+ uentry = next_uentry;
+ entry = next_entry;
+ pi = next_pi;
/*
* Avoid excessively long or circular lists:
*/
@@ -88,6 +93,9 @@ void compat_exit_robust_list(struct task_struct *curr)
cond_resched();
}
+ if (pending)
+ handle_futex_death((void __user *)pending + futex_offset,
+ curr, pip);
}
asmlinkage long
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index c8580a1e6873..14b0e10dc95c 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -110,7 +110,7 @@ config SUSPEND
config HIBERNATION_UP_POSSIBLE
bool
- depends on X86 || PPC64_SWSUSP || FRV || PPC32
+ depends on X86 || PPC64_SWSUSP || PPC32
depends on !SMP
default y
diff --git a/kernel/sched.c b/kernel/sched.c
index deeb1f8e0c30..6107a0cd6325 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1682,6 +1682,11 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
p->prio = effective_prio(p);
+ if (rt_prio(p->prio))
+ p->sched_class = &rt_sched_class;
+ else
+ p->sched_class = &fair_sched_class;
+
if (!p->sched_class->task_new || !sysctl_sched_child_runs_first ||
(clone_flags & CLONE_VM) || task_cpu(p) != this_cpu ||
!current->se.on_rq) {
@@ -4550,10 +4555,7 @@ asmlinkage long sys_sched_yield(void)
struct rq *rq = this_rq_lock();
schedstat_inc(rq, yld_cnt);
- if (unlikely(rq->nr_running == 1))
- schedstat_inc(rq, yld_act_empty);
- else
- current->sched_class->yield_task(rq, current);
+ current->sched_class->yield_task(rq, current);
/*
* Since we are going to call schedule() anyway, there's
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 892616bf2c77..67c67a87146e 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -43,6 +43,14 @@ unsigned int sysctl_sched_latency __read_mostly = 20000000ULL;
unsigned int sysctl_sched_min_granularity __read_mostly = 2000000ULL;
/*
+ * sys_sched_yield() compat mode
+ *
+ * This option switches the agressive yield implementation of the
+ * old scheduler back on.
+ */
+unsigned int __read_mostly sysctl_sched_compat_yield;
+
+/*
* SCHED_BATCH wake-up granularity.
* (default: 25 msec, units: nanoseconds)
*
@@ -631,6 +639,16 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
se->block_start = 0;
se->sum_sleep_runtime += delta;
+
+ /*
+ * Blocking time is in units of nanosecs, so shift by 20 to
+ * get a milliseconds-range estimation of the amount of
+ * time that the task spent sleeping:
+ */
+ if (unlikely(prof_on == SLEEP_PROFILING)) {
+ profile_hits(SLEEP_PROFILING, (void *)get_wchan(tsk),
+ delta >> 20);
+ }
}
#endif
}
@@ -897,19 +915,62 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep)
}
/*
- * sched_yield() support is very simple - we dequeue and enqueue
+ * sched_yield() support is very simple - we dequeue and enqueue.
+ *
+ * If compat_yield is turned on then we requeue to the end of the tree.
*/
static void yield_task_fair(struct rq *rq, struct task_struct *p)
{
struct cfs_rq *cfs_rq = task_cfs_rq(p);
+ struct rb_node **link = &cfs_rq->tasks_timeline.rb_node;
+ struct sched_entity *rightmost, *se = &p->se;
+ struct rb_node *parent;
- __update_rq_clock(rq);
/*
- * Dequeue and enqueue the task to update its
- * position within the tree:
+ * Are we the only task in the tree?
+ */
+ if (unlikely(cfs_rq->nr_running == 1))
+ return;
+
+ if (likely(!sysctl_sched_compat_yield)) {
+ __update_rq_clock(rq);
+ /*
+ * Dequeue and enqueue the task to update its
+ * position within the tree:
+ */
+ dequeue_entity(cfs_rq, &p->se, 0);
+ enqueue_entity(cfs_rq, &p->se, 0);
+
+ return;
+ }
+ /*
+ * Find the rightmost entry in the rbtree:
+ */
+ do {
+ parent = *link;
+ link = &parent->rb_right;
+ } while (*link);
+
+ rightmost = rb_entry(parent, struct sched_entity, run_node);
+ /*
+ * Already in the rightmost position?
*/
- dequeue_entity(cfs_rq, &p->se, 0);
- enqueue_entity(cfs_rq, &p->se, 0);
+ if (unlikely(rightmost == se))
+ return;
+
+ /*
+ * Minimally necessary key value to be last in the tree:
+ */
+ se->fair_key = rightmost->fair_key + 1;
+
+ if (cfs_rq->rb_leftmost == &se->run_node)
+ cfs_rq->rb_leftmost = rb_next(&se->run_node);
+ /*
+ * Relink the task to the rightmost position:
+ */
+ rb_erase(&se->run_node, &cfs_rq->tasks_timeline);
+ rb_link_node(&se->run_node, parent, link);
+ rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline);
}
/*
diff --git a/kernel/signal.c b/kernel/signal.c
index 3169bed0b4d0..9fb91a32edda 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -378,8 +378,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
/* We only dequeue private signals from ourselves, we don't let
* signalfd steal them
*/
- if (likely(tsk == current))
- signr = __dequeue_signal(&tsk->pending, mask, info);
+ signr = __dequeue_signal(&tsk->pending, mask, info);
if (!signr) {
signr = __dequeue_signal(&tsk->signal->shared_pending,
mask, info);
@@ -407,8 +406,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
}
}
}
- if (likely(tsk == current))
- recalc_sigpending();
+ recalc_sigpending();
if (signr && unlikely(sig_kernel_stop(signr))) {
/*
* Set a marker that we have dequeued a stop signal. Our
@@ -425,7 +423,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT))
tsk->signal->flags |= SIGNAL_STOP_DEQUEUED;
}
- if (signr && likely(tsk == current) &&
+ if (signr &&
((info->si_code & __SI_MASK) == __SI_TIMER) &&
info->si_sys_private){
/*
diff --git a/kernel/sys.c b/kernel/sys.c
index 1b33b05d346b..8ae2e636eb1b 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -32,6 +32,7 @@
#include <linux/getcpu.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/seccomp.h>
+#include <linux/cpu.h>
#include <linux/compat.h>
#include <linux/syscalls.h>
@@ -878,6 +879,7 @@ void kernel_power_off(void)
kernel_shutdown_prepare(SYSTEM_POWER_OFF);
if (pm_power_off_prepare)
pm_power_off_prepare();
+ disable_nonboot_cpus();
sysdev_shutdown();
printk(KERN_EMERG "Power down.\n");
machine_power_off();
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 6ace893c17c9..53a456ebf6d5 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -303,6 +303,14 @@ static ctl_table kern_table[] = {
.proc_handler = &proc_dointvec,
},
#endif
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "sched_compat_yield",
+ .data = &sysctl_sched_compat_yield,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
#ifdef CONFIG_PROVE_LOCKING
{
.ctl_name = CTL_UNNUMBERED,
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index aab881c86a1a..0962e0577660 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -382,23 +382,8 @@ static int tick_broadcast_set_event(ktime_t expires, int force)
int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
{
- int cpu = smp_processor_id();
-
- /*
- * If the CPU is marked for broadcast, enforce oneshot
- * broadcast mode. The jinxed VAIO does not resume otherwise.
- * No idea why it ends up in a lower C State during resume
- * without notifying the clock events layer.
- */
- if (cpu_isset(cpu, tick_broadcast_mask))
- cpu_set(cpu, tick_broadcast_oneshot_mask);
-
clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
-
- if(!cpus_empty(tick_broadcast_oneshot_mask))
- tick_broadcast_set_event(ktime_get(), 1);
-
- return cpu_isset(cpu, tick_broadcast_oneshot_mask);
+ return 0;
}
/*
OpenPOWER on IntegriCloud