diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 4 | ||||
-rw-r--r-- | kernel/compat.c | 20 | ||||
-rw-r--r-- | kernel/crash_dump.c | 64 | ||||
-rw-r--r-- | kernel/exit.c | 39 | ||||
-rw-r--r-- | kernel/fork.c | 6 | ||||
-rw-r--r-- | kernel/hrtimer.c | 821 | ||||
-rw-r--r-- | kernel/itimer.c | 106 | ||||
-rw-r--r-- | kernel/kexec.c | 20 | ||||
-rw-r--r-- | kernel/kprobes.c | 97 | ||||
-rw-r--r-- | kernel/ksysfs.c | 13 | ||||
-rw-r--r-- | kernel/posix-cpu-timers.c | 63 | ||||
-rw-r--r-- | kernel/posix-timers.c | 887 | ||||
-rw-r--r-- | kernel/resource.c | 2 | ||||
-rw-r--r-- | kernel/stop_machine.c | 6 | ||||
-rw-r--r-- | kernel/time.c | 117 | ||||
-rw-r--r-- | kernel/timer.c | 57 |
16 files changed, 1292 insertions, 1030 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index a940bac02837..355126606d1b 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -7,7 +7,8 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ sysctl.o capability.o ptrace.o timer.o user.o \ signal.o sys.o kmod.o workqueue.o pid.o \ rcupdate.o intermodule.o extable.o params.o posix-timers.o \ - kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o + kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ + hrtimer.o obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o obj-$(CONFIG_FUTEX) += futex.o @@ -30,7 +31,6 @@ obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_SYSFS) += ksysfs.o obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ -obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_SECCOMP) += seccomp.o obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o diff --git a/kernel/compat.c b/kernel/compat.c index 102296e21ea8..256e5d9f0647 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -514,6 +514,24 @@ static int put_compat_itimerspec(struct compat_itimerspec __user *dst, return 0; } +long compat_sys_timer_create(clockid_t which_clock, + struct compat_sigevent __user *timer_event_spec, + timer_t __user *created_timer_id) +{ + struct sigevent __user *event = NULL; + + if (timer_event_spec) { + struct sigevent kevent; + + event = compat_alloc_user_space(sizeof(*event)); + if (get_compat_sigevent(&kevent, timer_event_spec) || + copy_to_user(event, &kevent, sizeof(*event))) + return -EFAULT; + } + + return sys_timer_create(which_clock, event, created_timer_id); +} + long compat_sys_timer_settime(timer_t timer_id, int flags, struct compat_itimerspec __user *new, struct compat_itimerspec __user *old) @@ -649,8 +667,6 @@ int get_compat_sigevent(struct sigevent *event, ? -EFAULT : 0; } -/* timer_create is architecture specific because it needs sigevent conversion */ - long compat_get_bitmap(unsigned long *mask, compat_ulong_t __user *umask, unsigned long bitmap_size) { diff --git a/kernel/crash_dump.c b/kernel/crash_dump.c deleted file mode 100644 index fccb27dbc623..000000000000 --- a/kernel/crash_dump.c +++ /dev/null @@ -1,64 +0,0 @@ -/* - * kernel/crash_dump.c - Memory preserving reboot related code. - * - * Created by: Hariprasad Nellitheertha (hari@in.ibm.com) - * Copyright (C) IBM Corporation, 2004. All rights reserved - */ - -#include <linux/smp_lock.h> -#include <linux/errno.h> -#include <linux/proc_fs.h> -#include <linux/bootmem.h> -#include <linux/highmem.h> -#include <linux/crash_dump.h> - -#include <asm/io.h> -#include <asm/uaccess.h> -#include <asm/kexec.h> - -/* Stores the physical address of elf header of crash image. */ -unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX; - -#ifndef HAVE_ARCH_COPY_OLDMEM_PAGE -/** - * copy_oldmem_page - copy one page from "oldmem" - * @pfn: page frame number to be copied - * @buf: target memory address for the copy; this can be in kernel address - * space or user address space (see @userbuf) - * @csize: number of bytes to copy - * @offset: offset in bytes into the page (based on pfn) to begin the copy - * @userbuf: if set, @buf is in user address space, use copy_to_user(), - * otherwise @buf is in kernel address space, use memcpy(). - * - * Copy a page from "oldmem". For this page, there is no pte mapped - * in the current kernel. We stitch up a pte, similar to kmap_atomic. - */ -ssize_t copy_oldmem_page(unsigned long pfn, char *buf, - size_t csize, unsigned long offset, int userbuf) -{ - void *page, *vaddr; - - if (!csize) - return 0; - - page = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!page) - return -ENOMEM; - - vaddr = kmap_atomic_pfn(pfn, KM_PTE0); - copy_page(page, vaddr); - kunmap_atomic(vaddr, KM_PTE0); - - if (userbuf) { - if (copy_to_user(buf, (page + offset), csize)) { - kfree(page); - return -EFAULT; - } - } else { - memcpy(buf, (page + offset), csize); - } - - kfree(page); - return csize; -} -#endif diff --git a/kernel/exit.c b/kernel/exit.c index 309a46fa16f8..802722814925 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -842,7 +842,7 @@ fastcall NORET_TYPE void do_exit(long code) } group_dead = atomic_dec_and_test(&tsk->signal->live); if (group_dead) { - del_timer_sync(&tsk->signal->real_timer); + hrtimer_cancel(&tsk->signal->real_timer); exit_itimers(tsk->signal); acct_process(code); } @@ -1071,6 +1071,9 @@ static int wait_task_zombie(task_t *p, int noreap, } if (likely(p->real_parent == p->parent) && likely(p->signal)) { + struct signal_struct *psig; + struct signal_struct *sig; + /* * The resource counters for the group leader are in its * own task_struct. Those for dead threads in the group @@ -1087,24 +1090,26 @@ static int wait_task_zombie(task_t *p, int noreap, * here reaping other children at the same time. */ spin_lock_irq(&p->parent->sighand->siglock); - p->parent->signal->cutime = - cputime_add(p->parent->signal->cutime, + psig = p->parent->signal; + sig = p->signal; + psig->cutime = + cputime_add(psig->cutime, cputime_add(p->utime, - cputime_add(p->signal->utime, - p->signal->cutime))); - p->parent->signal->cstime = - cputime_add(p->parent->signal->cstime, + cputime_add(sig->utime, + sig->cutime))); + psig->cstime = + cputime_add(psig->cstime, cputime_add(p->stime, - cputime_add(p->signal->stime, - p->signal->cstime))); - p->parent->signal->cmin_flt += - p->min_flt + p->signal->min_flt + p->signal->cmin_flt; - p->parent->signal->cmaj_flt += - p->maj_flt + p->signal->maj_flt + p->signal->cmaj_flt; - p->parent->signal->cnvcsw += - p->nvcsw + p->signal->nvcsw + p->signal->cnvcsw; - p->parent->signal->cnivcsw += - p->nivcsw + p->signal->nivcsw + p->signal->cnivcsw; + cputime_add(sig->stime, + sig->cstime))); + psig->cmin_flt += + p->min_flt + sig->min_flt + sig->cmin_flt; + psig->cmaj_flt += + p->maj_flt + sig->maj_flt + sig->cmaj_flt; + psig->cnvcsw += + p->nvcsw + sig->nvcsw + sig->cnvcsw; + psig->cnivcsw += + p->nivcsw + sig->nivcsw + sig->cnivcsw; spin_unlock_irq(&p->parent->sighand->siglock); } diff --git a/kernel/fork.c b/kernel/fork.c index b18d64554feb..3bdcab49998d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -801,10 +801,10 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts init_sigpending(&sig->shared_pending); INIT_LIST_HEAD(&sig->posix_timers); - sig->it_real_value = sig->it_real_incr = 0; + hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC); + sig->it_real_incr.tv64 = 0; sig->real_timer.function = it_real_fn; - sig->real_timer.data = (unsigned long) tsk; - init_timer(&sig->real_timer); + sig->real_timer.data = tsk; sig->it_virt_expires = cputime_zero; sig->it_virt_incr = cputime_zero; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c new file mode 100644 index 000000000000..f073a2461faa --- /dev/null +++ b/kernel/hrtimer.c @@ -0,0 +1,821 @@ +/* + * linux/kernel/hrtimer.c + * + * Copyright(C) 2005, Thomas Gleixner <tglx@linutronix.de> + * Copyright(C) 2005, Red Hat, Inc., Ingo Molnar + * + * High-resolution kernel timers + * + * In contrast to the low-resolution timeout API implemented in + * kernel/timer.c, hrtimers provide finer resolution and accuracy + * depending on system configuration and capabilities. + * + * These timers are currently used for: + * - itimers + * - POSIX timers + * - nanosleep + * - precise in-kernel timing + * + * Started by: Thomas Gleixner and Ingo Molnar + * + * Credits: + * based on kernel/timer.c + * + * For licencing details see kernel-base/COPYING + */ + +#include <linux/cpu.h> +#include <linux/module.h> +#include <linux/percpu.h> +#include <linux/hrtimer.h> +#include <linux/notifier.h> +#include <linux/syscalls.h> +#include <linux/interrupt.h> + +#include <asm/uaccess.h> + +/** + * ktime_get - get the monotonic time in ktime_t format + * + * returns the time in ktime_t format + */ +static ktime_t ktime_get(void) +{ + struct timespec now; + + ktime_get_ts(&now); + + return timespec_to_ktime(now); +} + +/** + * ktime_get_real - get the real (wall-) time in ktime_t format + * + * returns the time in ktime_t format + */ +static ktime_t ktime_get_real(void) +{ + struct timespec now; + + getnstimeofday(&now); + + return timespec_to_ktime(now); +} + +EXPORT_SYMBOL_GPL(ktime_get_real); + +/* + * The timer bases: + */ + +#define MAX_HRTIMER_BASES 2 + +static DEFINE_PER_CPU(struct hrtimer_base, hrtimer_bases[MAX_HRTIMER_BASES]) = +{ + { + .index = CLOCK_REALTIME, + .get_time = &ktime_get_real, + .resolution = KTIME_REALTIME_RES, + }, + { + .index = CLOCK_MONOTONIC, + .get_time = &ktime_get, + .resolution = KTIME_MONOTONIC_RES, + }, +}; + +/** + * ktime_get_ts - get the monotonic clock in timespec format + * + * @ts: pointer to timespec variable + * + * The function calculates the monotonic clock from the realtime + * clock and the wall_to_monotonic offset and stores the result + * in normalized timespec format in the variable pointed to by ts. + */ +void ktime_get_ts(struct timespec *ts) +{ + struct timespec tomono; + unsigned long seq; + + do { + seq = read_seqbegin(&xtime_lock); + getnstimeofday(ts); + tomono = wall_to_monotonic; + + } while (read_seqretry(&xtime_lock, seq)); + + set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec, + ts->tv_nsec + tomono.tv_nsec); +} +EXPORT_SYMBOL_GPL(ktime_get_ts); + +/* + * Functions and macros which are different for UP/SMP systems are kept in a + * single place + */ +#ifdef CONFIG_SMP + +#define set_curr_timer(b, t) do { (b)->curr_timer = (t); } while (0) + +/* + * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock + * means that all timers which are tied to this base via timer->base are + * locked, and the base itself is locked too. + * + * So __run_timers/migrate_timers can safely modify all timers which could + * be found on the lists/queues. + * + * When the timer's base is locked, and the timer removed from list, it is + * possible to set timer->base = NULL and drop the lock: the timer remains + * locked. + */ +static struct hrtimer_base *lock_hrtimer_base(const struct hrtimer *timer, + unsigned long *flags) +{ + struct hrtimer_base *base; + + for (;;) { + base = timer->base; + if (likely(base != NULL)) { + spin_lock_irqsave(&base->lock, *flags); + if (likely(base == timer->base)) + return base; + /* The timer has migrated to another CPU: */ + spin_unlock_irqrestore(&base->lock, *flags); + } + cpu_relax(); + } +} + +/* + * Switch the timer base to the current CPU when possible. + */ +static inline struct hrtimer_base * +switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_base *base) +{ + struct hrtimer_base *new_base; + + new_base = &__get_cpu_var(hrtimer_bases[base->index]); + + if (base != new_base) { + /* + * We are trying to schedule the timer on the local CPU. + * However we can't change timer's base while it is running, + * so we keep it on the same CPU. No hassle vs. reprogramming + * the event source in the high resolution case. The softirq + * code will take care of this when the timer function has + * completed. There is no conflict as we hold the lock until + * the timer is enqueued. + */ + if (unlikely(base->curr_timer == timer)) + return base; + + /* See the comment in lock_timer_base() */ + timer->base = NULL; + spin_unlock(&base->lock); + spin_lock(&new_base->lock); + timer->base = new_base; + } + return new_base; +} + +#else /* CONFIG_SMP */ + +#define set_curr_timer(b, t) do { } while (0) + +static inline struct hrtimer_base * +lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) +{ + struct hrtimer_base *base = timer->base; + + spin_lock_irqsave(&base->lock, *flags); + + return base; +} + +#define switch_hrtimer_base(t, b) (b) + +#endif /* !CONFIG_SMP */ + +/* + * Functions for the union type storage format of ktime_t which are + * too large for inlining: + */ +#if BITS_PER_LONG < 64 +# ifndef CONFIG_KTIME_SCALAR +/** + * ktime_add_ns - Add a scalar nanoseconds value to a ktime_t variable + * + * @kt: addend + * @nsec: the scalar nsec value to add + * + * Returns the sum of kt and nsec in ktime_t format + */ +ktime_t ktime_add_ns(const ktime_t kt, u64 nsec) +{ + ktime_t tmp; + + if (likely(nsec < NSEC_PER_SEC)) { + tmp.tv64 = nsec; + } else { + unsigned long rem = do_div(nsec, NSEC_PER_SEC); + + tmp = ktime_set((long)nsec, rem); + } + + return ktime_add(kt, tmp); +} + +#else /* CONFIG_KTIME_SCALAR */ + +# endif /* !CONFIG_KTIME_SCALAR */ + +/* + * Divide a ktime value by a nanosecond value + */ +static unsigned long ktime_divns(const ktime_t kt, nsec_t div) +{ + u64 dclc, inc, dns; + int sft = 0; + + dclc = dns = ktime_to_ns(kt); + inc = div; + /* Make sure the divisor is less than 2^32: */ + while (div >> 32) { + sft++; + div >>= 1; + } + dclc >>= sft; + do_div(dclc, (unsigned long) div); + + return (unsigned long) dclc; +} + +#else /* BITS_PER_LONG < 64 */ +# define ktime_divns(kt, div) (unsigned long)((kt).tv64 / (div)) +#endif /* BITS_PER_LONG >= 64 */ + +/* + * Counterpart to lock_timer_base above: + */ +static inline +void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) +{ + spin_unlock_irqrestore(&timer->base->lock, *flags); +} + +/** + * hrtimer_forward - forward the timer expiry + * + * @timer: hrtimer to forward + * @interval: the interval to forward + * + * Forward the timer expiry so it will expire in the future. + * The number of overruns is added to the overrun field. + */ +unsigned long +hrtimer_forward(struct hrtimer *timer, const ktime_t interval) +{ + unsigned long orun = 1; + ktime_t delta, now; + + now = timer->base->get_time(); + + delta = ktime_sub(now, timer->expires); + + if (delta.tv64 < 0) + return 0; + + if (unlikely(delta.tv64 >= interval.tv64)) { + nsec_t incr = ktime_to_ns(interval); + + orun = ktime_divns(delta, incr); + timer->expires = ktime_add_ns(timer->expires, incr * orun); + if (timer->expires.tv64 > now.tv64) + return orun; + /* + * This (and the ktime_add() below) is the + * correction for exact: + */ + orun++; + } + timer->expires = ktime_add(timer->expires, interval); + + return orun; +} + +/* + * enqueue_hrtimer - internal function to (re)start a timer + * + * The timer is inserted in expiry order. Insertion into the + * red black tree is O(log(n)). Must hold the base lock. + */ +static void enqueue_hrtimer(struct hrtimer *timer, struct hrtimer_base *base) +{ + struct rb_node **link = &base->active.rb_node; + struct list_head *prev = &base->pending; + struct rb_node *parent = NULL; + struct hrtimer *entry; + + /* + * Find the right place in the rbtree: + */ + while (*link) { + parent = *link; + entry = rb_entry(parent, struct hrtimer, node); + /* + * We dont care about collisions. Nodes with + * the same expiry time stay together. + */ + if (timer->expires.tv64 < entry->expires.tv64) + link = &(*link)->rb_left; + else { + link = &(*link)->rb_right; + prev = &entry->list; + } + } + + /* + * Insert the timer to the rbtree and to the sorted list: + */ + rb_link_node(&timer->node, parent, link); + rb_insert_color(&timer->node, &base->active); + list_add(&timer->list, prev); + + timer->state = HRTIMER_PENDING; +} + + +/* + * __remove_hrtimer - internal function to remove a timer + * + * Caller must hold the base lock. + */ +static void __remove_hrtimer(struct hrtimer *timer, struct hrtimer_base *base) +{ + /* + * Remove the timer from the sorted list and from the rbtree: + */ + list_del(&timer->list); + rb_erase(&timer->node, &base->active); +} + +/* + * remove hrtimer, called with base lock held + */ +static inline int +remove_hrtimer(struct hrtimer *timer, struct hrtimer_base *base) +{ + if (hrtimer_active(timer)) { + __remove_hrtimer(timer, base); + timer->state = HRTIMER_INACTIVE; + return 1; + } + return 0; +} + +/** + * hrtimer_start - (re)start an relative timer on the current CPU + * + * @timer: the timer to be added + * @tim: expiry time + * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) + * + * Returns: + * 0 on success + * 1 when the timer was active + */ +int +hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) +{ + struct hrtimer_base *base, *new_base; + unsigned long flags; + int ret; + + base = lock_hrtimer_base(timer, &flags); + + /* Remove an active timer from the queue: */ + ret = remove_hrtimer(timer, base); + + /* Switch the timer base, if necessary: */ + new_base = switch_hrtimer_base(timer, base); + + if (mode == HRTIMER_REL) + tim = ktime_add(tim, new_base->get_time()); + timer->expires = tim; + + enqueue_hrtimer(timer, new_base); + + unlock_hrtimer_base(timer, &flags); + + return ret; +} + +/** + * hrtimer_try_to_cancel - try to deactivate a timer + * + * @timer: hrtimer to stop + * + * Returns: + * 0 when the timer was not active + * 1 when the timer was active + * -1 when the timer is currently excuting the callback function and + * can not be stopped + */ +int hrtimer_try_to_cancel(struct hrtimer *timer) +{ + struct hrtimer_base *base; + unsigned long flags; + int ret = -1; + + base = lock_hrtimer_base(timer, &flags); + + if (base->curr_timer != timer) + ret = remove_hrtimer(timer, base); + + unlock_hrtimer_base(timer, &flags); + + return ret; + +} + +/** + * hrtimer_cancel - cancel a timer and wait for the handler to finish. + * + * @timer: the timer to be cancelled + * + * Returns: + * 0 when the timer was not active + * 1 when the timer was active + */ +int hrtimer_cancel(struct hrtimer *timer) +{ + for (;;) { + int ret = hrtimer_try_to_cancel(timer); + + if (ret >= 0) + return ret; + } +} + +/** + * hrtimer_get_remaining - get remaining time for the timer + * + * @timer: the timer to read + */ +ktime_t hrtimer_get_remaining(const struct hrtimer *timer) +{ + struct hrtimer_base *base; + unsigned long flags; + ktime_t rem; + + base = lock_hrtimer_base(timer, &flags); + rem = ktime_sub(timer->expires, timer->base->get_time()); + unlock_hrtimer_base(timer, &flags); + + return rem; +} + +/** + * hrtimer_rebase - rebase an initialized hrtimer to a different base + * + * @timer: the timer to be rebased + * @clock_id: the clock to be used + */ +void hrtimer_rebase(struct hrtimer *timer, const clockid_t clock_id) +{ + struct hrtimer_base *bases; + + bases = per_cpu(hrtimer_bases, raw_smp_processor_id()); + timer->base = &bases[clock_id]; +} + +/** + * hrtimer_init - initialize a timer to the given clock + * + * @timer: the timer to be initialized + * @clock_id: the clock to be used + */ +void hrtimer_init(struct hrtimer *timer, const clockid_t clock_id) +{ + memset(timer, 0, sizeof(struct hrtimer)); + hrtimer_rebase(timer, clock_id); +} + +/** + * hrtimer_get_res - get the timer resolution for a clock + * + * @which_clock: which clock to query + * @tp: pointer to timespec variable to store the resolution + * + * Store the resolution of the clock selected by which_clock in the + * variable pointed to by tp. + */ +int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp) +{ + struct hrtimer_base *bases; + + tp->tv_sec = 0; + bases = per_cpu(hrtimer_bases, raw_smp_processor_id()); + tp->tv_nsec = bases[which_clock].resolution; + + return 0; +} + +/* + * Expire the per base hrtimer-queue: + */ +static inline void run_hrtimer_queue(struct hrtimer_base *base) +{ + ktime_t now = base->get_time(); + + spin_lock_irq(&base->lock); + + while (!list_empty(&base->pending)) { + struct hrtimer *timer; + int (*fn)(void *); + int restart; + void *data; + + timer = list_entry(base->pending.next, struct hrtimer, list); + if (now.tv64 <= timer->expires.tv64) + break; + + fn = timer->function; + data = timer->data; + set_curr_timer(base, timer); + __remove_hrtimer(timer, base); + spin_unlock_irq(&base->lock); + + /* + * fn == NULL is special case for the simplest timer + * variant - wake up process and do not restart: + */ + if (!fn) { + wake_up_process(data); + restart = HRTIMER_NORESTART; + } else + restart = fn(data); + + spin_lock_irq(&base->lock); + + if (restart == HRTIMER_RESTART) + enqueue_hrtimer(timer, base); + else + timer->state = HRTIMER_EXPIRED; + } + set_curr_timer(base, NULL); + spin_unlock_irq(&base->lock); +} + +/* + * Called from timer softirq every jiffy, expire hrtimers: + */ +void hrtimer_run_queues(void) +{ + struct hrtimer_base *base = __get_cpu_var(hrtimer_bases); + int i; + + for (i = 0; i < MAX_HRTIMER_BASES; i++) + run_hrtimer_queue(&base[i]); +} + +/* + * Sleep related functions: + */ + +/** + * schedule_hrtimer - sleep until timeout + * + * @timer: hrtimer variable initialized with the correct clock base + * @mode: timeout value is abs/rel + * + * Make the current task sleep until @timeout is + * elapsed. + * + * You can set the task state as follows - + * + * %TASK_UNINTERRUPTIBLE - at least @timeout is guaranteed to + * pass before the routine returns. The routine will return 0 + * + * %TASK_INTERRUPTIBLE - the routine may return early if a signal is + * delivered to the current task. In this case the remaining time + * will be returned + * + * The current task state is guaranteed to be TASK_RUNNING when this + * routine returns. + */ +static ktime_t __sched +schedule_hrtimer(struct hrtimer *timer, const enum hrtimer_mode mode) +{ + /* fn stays NULL, meaning single-shot wakeup: */ + timer->data = current; + + hrtimer_start(timer, timer->expires, mode); + + schedule(); + hrtimer_cancel(timer); + + /* Return the remaining time: */ + if (timer->state != HRTIMER_EXPIRED) + return ktime_sub(timer->expires, timer->base->get_time()); + else + return (ktime_t) {.tv64 = 0 }; +} + +static inline ktime_t __sched +schedule_hrtimer_interruptible(struct hrtimer *timer, + const enum hrtimer_mode mode) +{ + set_current_state(TASK_INTERRUPTIBLE); + + return schedule_hrtimer(timer, mode); +} + +static long __sched +nanosleep_restart(struct restart_block *restart, clockid_t clockid) +{ + struct timespec __user *rmtp, tu; + void *rfn_save = restart->fn; + struct hrtimer timer; + ktime_t rem; + + restart->fn = do_no_restart_syscall; + + hrtimer_init(&timer, clockid); + + timer.expires.tv64 = ((u64)restart->arg1 << 32) | (u64) restart->arg0; + + rem = schedule_hrtimer_interruptible(&timer, HRTIMER_ABS); + + if (rem.tv64 <= 0) + return 0; + + rmtp = (struct timespec __user *) restart->arg2; + tu = ktime_to_timespec(rem); + if (rmtp && copy_to_user(rmtp, &tu, sizeof(tu))) + return -EFAULT; + + restart->fn = rfn_save; + + /* The other values in restart are already filled in */ + return -ERESTART_RESTARTBLOCK; +} + +static long __sched nanosleep_restart_mono(struct restart_block *restart) +{ + return nanosleep_restart(restart, CLOCK_MONOTONIC); +} + +static long __sched nanosleep_restart_real(struct restart_block *restart) +{ + return nanosleep_restart(restart, CLOCK_REALTIME); +} + +long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, + const enum hrtimer_mode mode, const clockid_t clockid) +{ + struct restart_block *restart; + struct hrtimer timer; + struct timespec tu; + ktime_t rem; + + hrtimer_init(&timer, clockid); + + timer.expires = timespec_to_ktime(*rqtp); + + rem = schedule_hrtimer_interruptible(&timer, mode); + if (rem.tv64 <= 0) + return 0; + + /* Absolute timers do not update the rmtp value: */ + if (mode == HRTIMER_ABS) + return -ERESTARTNOHAND; + + tu = ktime_to_timespec(rem); + + if (rmtp && copy_to_user(rmtp, &tu, sizeof(tu))) + return -EFAULT; + + restart = ¤t_thread_info()->restart_block; + restart->fn = (clockid == CLOCK_MONOTONIC) ? + nanosleep_restart_mono : nanosleep_restart_real; + restart->arg0 = timer.expires.tv64 & 0xFFFFFFFF; + restart->arg1 = timer.expires.tv64 >> 32; + restart->arg2 = (unsigned long) rmtp; + + return -ERESTART_RESTARTBLOCK; +} + +asmlinkage long +sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp) +{ + struct timespec tu; + + if (copy_from_user(&tu, rqtp, sizeof(tu))) + return -EFAULT; + + if (!timespec_valid(&tu)) + return -EINVAL; + + return hrtimer_nanosleep(&tu, rmtp, HRTIMER_REL, CLOCK_MONOTONIC); +} + +/* + * Functions related to boot-time initialization: + */ +static void __devinit init_hrtimers_cpu(int cpu) +{ + struct hrtimer_base *base = per_cpu(hrtimer_bases, cpu); + int i; + + for (i = 0; i < MAX_HRTIMER_BASES; i++) { + spin_lock_init(&base->lock); + INIT_LIST_HEAD(&base->pending); + base++; + } +} + +#ifdef CONFIG_HOTPLUG_CPU + +static void migrate_hrtimer_list(struct hrtimer_base *old_base, + struct hrtimer_base *new_base) +{ + struct hrtimer *timer; + struct rb_node *node; + + while ((node = rb_first(&old_base->active))) { + timer = rb_entry(node, struct hrtimer, node); + __remove_hrtimer(timer, old_base); + timer->base = new_base; + enqueue_hrtimer(timer, new_base); + } +} + +static void migrate_hrtimers(int cpu) +{ + struct hrtimer_base *old_base, *new_base; + int i; + + BUG_ON(cpu_online(cpu)); + old_base = per_cpu(hrtimer_bases, cpu); + new_base = get_cpu_var(hrtimer_bases); + + local_irq_disable(); + + for (i = 0; i < MAX_HRTIMER_BASES; i++) { + + spin_lock(&new_base->lock); + spin_lock(&old_base->lock); + + BUG_ON(old_base->curr_timer); + + migrate_hrtimer_list(old_base, new_base); + + spin_unlock(&old_base->lock); + spin_unlock(&new_base->lock); + old_base++; + new_base++; + } + + local_irq_enable(); + put_cpu_var(hrtimer_bases); +} +#endif /* CONFIG_HOTPLUG_CPU */ + +static int __devinit hrtimer_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + long cpu = (long)hcpu; + + switch (action) { + + case CPU_UP_PREPARE: + init_hrtimers_cpu(cpu); + break; + +#ifdef CONFIG_HOTPLUG_CPU + case CPU_DEAD: + migrate_hrtimers(cpu); + break; +#endif + + default: + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block __devinitdata hrtimers_nb = { + .notifier_call = hrtimer_cpu_notify, +}; + +void __init hrtimers_init(void) +{ + hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE, + (void *)(long)smp_processor_id()); + register_cpu_notifier(&hrtimers_nb); +} + diff --git a/kernel/itimer.c b/kernel/itimer.c index 7c1b25e25e47..c2c05c4ff28d 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c @@ -12,36 +12,46 @@ #include <linux/syscalls.h> #include <linux/time.h> #include <linux/posix-timers.h> +#include <linux/hrtimer.h> #include <asm/uaccess.h> -static unsigned long it_real_value(struct signal_struct *sig) +/** + * itimer_get_remtime - get remaining time for the timer + * + * @timer: the timer to read + * + * Returns the delta between the expiry time and now, which can be + * less than zero or 1usec for an pending expired timer + */ +static struct timeval itimer_get_remtime(struct hrtimer *timer) { - unsigned long val = 0; - if (timer_pending(&sig->real_timer)) { - val = sig->real_timer.expires - jiffies; + ktime_t rem = hrtimer_get_remaining(timer); - /* look out for negative/zero itimer.. */ - if ((long) val <= 0) - val = 1; - } - return val; + /* + * Racy but safe: if the itimer expires after the above + * hrtimer_get_remtime() call but before this condition + * then we return 0 - which is correct. + */ + if (hrtimer_active(timer)) { + if (rem.tv64 <= 0) + rem.tv64 = NSEC_PER_USEC; + } else + rem.tv64 = 0; + + return ktime_to_timeval(rem); } int do_getitimer(int which, struct itimerval *value) { struct task_struct *tsk = current; - unsigned long interval, val; cputime_t cinterval, cval; switch (which) { case ITIMER_REAL: - spin_lock_irq(&tsk->sighand->siglock); - interval = tsk->signal->it_real_incr; - val = it_real_value(tsk->signal); - spin_unlock_irq(&tsk->sighand->siglock); - jiffies_to_timeval(val, &value->it_value); - jiffies_to_timeval(interval, &value->it_interval); + value->it_value = itimer_get_remtime(&tsk->signal->real_timer); + value->it_interval = + ktime_to_timeval(tsk->signal->it_real_incr); break; case ITIMER_VIRTUAL: read_lock(&tasklist_lock); @@ -113,59 +123,45 @@ asmlinkage long sys_getitimer(int which, struct itimerval __user *value) } -void it_real_fn(unsigned long __data) +/* + * The timer is automagically restarted, when interval != 0 + */ +int it_real_fn(void *data) { - struct task_struct * p = (struct task_struct *) __data; - unsigned long inc = p->signal->it_real_incr; + struct task_struct *tsk = (struct task_struct *) data; - send_group_sig_info(SIGALRM, SEND_SIG_PRIV, p); + send_group_sig_info(SIGALRM, SEND_SIG_PRIV, tsk); - /* - * Now restart the timer if necessary. We don't need any locking - * here because do_setitimer makes sure we have finished running - * before it touches anything. - * Note, we KNOW we are (or should be) at a jiffie edge here so - * we don't need the +1 stuff. Also, we want to use the prior - * expire value so as to not "slip" a jiffie if we are late. - * Deal with requesting a time prior to "now" here rather than - * in add_timer. - */ - if (!inc) - return; - while (time_before_eq(p->signal->real_timer.expires, jiffies)) - p->signal->real_timer.expires += inc; - add_timer(&p->signal->real_timer); + if (tsk->signal->it_real_incr.tv64 != 0) { + hrtimer_forward(&tsk->signal->real_timer, + tsk->signal->it_real_incr); + + return HRTIMER_RESTART; + } + return HRTIMER_NORESTART; } int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue) { struct task_struct *tsk = current; - unsigned long val, interval, expires; + struct hrtimer *timer; + ktime_t expires; cputime_t cval, cinterval, nval, ninterval; switch (which) { case ITIMER_REAL: -again: - spin_lock_irq(&tsk->sighand->siglock); - interval = tsk->signal->it_real_incr; - val = it_real_value(tsk->signal); - /* We are sharing ->siglock with it_real_fn() */ - if (try_to_del_timer_sync(&tsk->signal->real_timer) < 0) { - spin_unlock_irq(&tsk->sighand->siglock); - goto again; - } - tsk->signal->it_real_incr = - timeval_to_jiffies(&value->it_interval); - expires = timeval_to_jiffies(&value->it_value); - if (expires) - mod_timer(&tsk->signal->real_timer, - jiffies + 1 + expires); - spin_unlock_irq(&tsk->sighand->siglock); + timer = &tsk->signal->real_timer; + hrtimer_cancel(timer); if (ovalue) { - jiffies_to_timeval(val, &ovalue->it_value); - jiffies_to_timeval(interval, - &ovalue->it_interval); + ovalue->it_value = itimer_get_remtime(timer); + ovalue->it_interval + = ktime_to_timeval(tsk->signal->it_real_incr); } + tsk->signal->it_real_incr = + timeval_to_ktime(value->it_interval); + expires = timeval_to_ktime(value->it_value); + if (expires.tv64 != 0) + hrtimer_start(timer, expires, HRTIMER_REL); break; case ITIMER_VIRTUAL: nval = timeval_to_cputime(&value->it_value); diff --git a/kernel/kexec.c b/kernel/kexec.c index 2c95848fbce8..de1441656efd 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -26,6 +26,9 @@ #include <asm/system.h> #include <asm/semaphore.h> +/* Per cpu memory for storing cpu states in case of system crash. */ +note_buf_t* crash_notes; + /* Location of the reserved area for the crash kernel */ struct resource crashk_res = { .name = "Crash kernel", @@ -1054,9 +1057,24 @@ void crash_kexec(struct pt_regs *regs) if (!locked) { image = xchg(&kexec_crash_image, NULL); if (image) { - machine_crash_shutdown(regs); + struct pt_regs fixed_regs; + crash_setup_regs(&fixed_regs, regs); + machine_crash_shutdown(&fixed_regs); machine_kexec(image); } xchg(&kexec_lock, 0); } } + +static int __init crash_notes_memory_init(void) +{ + /* Allocate memory for saving cpu registers. */ + crash_notes = alloc_percpu(note_buf_t); + if (!crash_notes) { + printk("Kexec: Memory allocation for saving cpu register" + " states failed\n"); + return -ENOMEM; + } + return 0; +} +module_init(crash_notes_memory_init) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 3bb71e63a37e..34a885bb82e0 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -48,10 +48,11 @@ static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; -static DEFINE_SPINLOCK(kprobe_lock); /* Protects kprobe_table */ +DECLARE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ DEFINE_SPINLOCK(kretprobe_lock); /* Protects kretprobe_inst_table */ static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; +#ifdef __ARCH_WANT_KPROBES_INSN_SLOT /* * kprobe->ainsn.insn points to the copy of the instruction to be * single-stepped. x86_64, POWER4 and above have no-exec support and @@ -151,6 +152,7 @@ void __kprobes free_insn_slot(kprobe_opcode_t *slot) } } } +#endif /* We have preemption disabled.. so it is safe to use __ versions */ static inline void set_kprobe_instance(struct kprobe *kp) @@ -165,7 +167,7 @@ static inline void reset_kprobe_instance(void) /* * This routine is called either: - * - under the kprobe_lock spinlock - during kprobe_[un]register() + * - under the kprobe_mutex - during kprobe_[un]register() * OR * - with preemption disabled - from arch/xxx/kernel/kprobes.c */ @@ -418,7 +420,6 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p) /* * This is the second or subsequent kprobe at the address - handle * the intricacies - * TODO: Move kcalloc outside the spin_lock */ static int __kprobes register_aggr_kprobe(struct kprobe *old_p, struct kprobe *p) @@ -430,7 +431,7 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p, copy_kprobe(old_p, p); ret = add_new_kprobe(old_p, p); } else { - ap = kcalloc(1, sizeof(struct kprobe), GFP_ATOMIC); + ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL); if (!ap) return -ENOMEM; add_aggr_kprobe(ap, old_p); @@ -440,25 +441,6 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p, return ret; } -/* kprobe removal house-keeping routines */ -static inline void cleanup_kprobe(struct kprobe *p, unsigned long flags) -{ - arch_disarm_kprobe(p); - hlist_del_rcu(&p->hlist); - spin_unlock_irqrestore(&kprobe_lock, flags); - arch_remove_kprobe(p); -} - -static inline void cleanup_aggr_kprobe(struct kprobe *old_p, - struct kprobe *p, unsigned long flags) -{ - list_del_rcu(&p->list); - if (list_empty(&old_p->list)) - cleanup_kprobe(old_p, flags); - else - spin_unlock_irqrestore(&kprobe_lock, flags); -} - static int __kprobes in_kprobes_functions(unsigned long addr) { if (addr >= (unsigned long)__kprobes_text_start @@ -470,7 +452,6 @@ static int __kprobes in_kprobes_functions(unsigned long addr) int __kprobes register_kprobe(struct kprobe *p) { int ret = 0; - unsigned long flags = 0; struct kprobe *old_p; struct module *mod; @@ -482,18 +463,17 @@ int __kprobes register_kprobe(struct kprobe *p) (unlikely(!try_module_get(mod)))) return -EINVAL; - if ((ret = arch_prepare_kprobe(p)) != 0) - goto rm_kprobe; - p->nmissed = 0; - spin_lock_irqsave(&kprobe_lock, flags); + down(&kprobe_mutex); old_p = get_kprobe(p->addr); if (old_p) { ret = register_aggr_kprobe(old_p, p); goto out; } - arch_copy_kprobe(p); + if ((ret = arch_prepare_kprobe(p)) != 0) + goto out; + INIT_HLIST_NODE(&p->hlist); hlist_add_head_rcu(&p->hlist, &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); @@ -501,10 +481,8 @@ int __kprobes register_kprobe(struct kprobe *p) arch_arm_kprobe(p); out: - spin_unlock_irqrestore(&kprobe_lock, flags); -rm_kprobe: - if (ret == -EEXIST) - arch_remove_kprobe(p); + up(&kprobe_mutex); + if (ret && mod) module_put(mod); return ret; @@ -512,29 +490,50 @@ rm_kprobe: void __kprobes unregister_kprobe(struct kprobe *p) { - unsigned long flags; - struct kprobe *old_p; struct module *mod; + struct kprobe *old_p, *list_p; + int cleanup_p; - spin_lock_irqsave(&kprobe_lock, flags); + down(&kprobe_mutex); old_p = get_kprobe(p->addr); - if (old_p) { - /* cleanup_*_kprobe() does the spin_unlock_irqrestore */ - if (old_p->pre_handler == aggr_pre_handler) - cleanup_aggr_kprobe(old_p, p, flags); - else - cleanup_kprobe(p, flags); + if (unlikely(!old_p)) { + up(&kprobe_mutex); + return; + } + if (p != old_p) { + list_for_each_entry_rcu(list_p, &old_p->list, list) + if (list_p == p) + /* kprobe p is a valid probe */ + goto valid_p; + up(&kprobe_mutex); + return; + } +valid_p: + if ((old_p == p) || ((old_p->pre_handler == aggr_pre_handler) && + (p->list.next == &old_p->list) && + (p->list.prev == &old_p->list))) { + /* Only probe on the hash list */ + arch_disarm_kprobe(p); + hlist_del_rcu(&old_p->hlist); + cleanup_p = 1; + } else { + list_del_rcu(&p->list); + cleanup_p = 0; + } - synchronize_sched(); + up(&kprobe_mutex); - if ((mod = module_text_address((unsigned long)p->addr))) - module_put(mod); + synchronize_sched(); + if ((mod = module_text_address((unsigned long)p->addr))) + module_put(mod); - if (old_p->pre_handler == aggr_pre_handler && - list_empty(&old_p->list)) + if (cleanup_p) { + if (p != old_p) { + list_del_rcu(&p->list); kfree(old_p); - } else - spin_unlock_irqrestore(&kprobe_lock, flags); + } + arch_remove_kprobe(p); + } } static struct notifier_block kprobe_exceptions_nb = { diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index 99af8b05eeaa..d5eeae0fa5bc 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -51,16 +51,6 @@ static ssize_t uevent_helper_store(struct subsystem *subsys, const char *page, s KERNEL_ATTR_RW(uevent_helper); #endif -#ifdef CONFIG_KEXEC -#include <asm/kexec.h> - -static ssize_t crash_notes_show(struct subsystem *subsys, char *page) -{ - return sprintf(page, "%p\n", (void *)crash_notes); -} -KERNEL_ATTR_RO(crash_notes); -#endif - decl_subsys(kernel, NULL, NULL); EXPORT_SYMBOL_GPL(kernel_subsys); @@ -69,9 +59,6 @@ static struct attribute * kernel_attrs[] = { &uevent_seqnum_attr.attr, &uevent_helper_attr.attr, #endif -#ifdef CONFIG_KEXEC - &crash_notes_attr.attr, -#endif NULL }; diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 4c68edff900b..520f6c59948d 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -7,7 +7,7 @@ #include <asm/uaccess.h> #include <linux/errno.h> -static int check_clock(clockid_t which_clock) +static int check_clock(const clockid_t which_clock) { int error = 0; struct task_struct *p; @@ -31,7 +31,7 @@ static int check_clock(clockid_t which_clock) } static inline union cpu_time_count -timespec_to_sample(clockid_t which_clock, const struct timespec *tp) +timespec_to_sample(const clockid_t which_clock, const struct timespec *tp) { union cpu_time_count ret; ret.sched = 0; /* high half always zero when .cpu used */ @@ -43,7 +43,7 @@ timespec_to_sample(clockid_t which_clock, const struct timespec *tp) return ret; } -static void sample_to_timespec(clockid_t which_clock, +static void sample_to_timespec(const clockid_t which_clock, union cpu_time_count cpu, struct timespec *tp) { @@ -55,7 +55,7 @@ static void sample_to_timespec(clockid_t which_clock, } } -static inline int cpu_time_before(clockid_t which_clock, +static inline int cpu_time_before(const clockid_t which_clock, union cpu_time_count now, union cpu_time_count then) { @@ -65,7 +65,7 @@ static inline int cpu_time_before(clockid_t which_clock, return cputime_lt(now.cpu, then.cpu); } } -static inline void cpu_time_add(clockid_t which_clock, +static inline void cpu_time_add(const clockid_t which_clock, union cpu_time_count *acc, union cpu_time_count val) { @@ -75,7 +75,7 @@ static inline void cpu_time_add(clockid_t which_clock, acc->cpu = cputime_add(acc->cpu, val.cpu); } } -static inline union cpu_time_count cpu_time_sub(clockid_t which_clock, +static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock, union cpu_time_count a, union cpu_time_count b) { @@ -151,7 +151,7 @@ static inline unsigned long long sched_ns(struct task_struct *p) return (p == current) ? current_sched_time(p) : p->sched_time; } -int posix_cpu_clock_getres(clockid_t which_clock, struct timespec *tp) +int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp) { int error = check_clock(which_clock); if (!error) { @@ -169,7 +169,7 @@ int posix_cpu_clock_getres(clockid_t which_clock, struct timespec *tp) return error; } -int posix_cpu_clock_set(clockid_t which_clock, const struct timespec *tp) +int posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp) { /* * You can never reset a CPU clock, but we check for other errors @@ -186,7 +186,7 @@ int posix_cpu_clock_set(clockid_t which_clock, const struct timespec *tp) /* * Sample a per-thread clock for the given task. */ -static int cpu_clock_sample(clockid_t which_clock, struct task_struct *p, +static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, union cpu_time_count *cpu) { switch (CPUCLOCK_WHICH(which_clock)) { @@ -248,7 +248,7 @@ static int cpu_clock_sample_group_locked(unsigned int clock_idx, * Sample a process (thread group) clock for the given group_leader task. * Must be called with tasklist_lock held for reading. */ -static int cpu_clock_sample_group(clockid_t which_clock, +static int cpu_clock_sample_group(const clockid_t which_clock, struct task_struct *p, union cpu_time_count *cpu) { @@ -262,7 +262,7 @@ static int cpu_clock_sample_group(clockid_t which_clock, } -int posix_cpu_clock_get(clockid_t which_clock, struct timespec *tp) +int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) { const pid_t pid = CPUCLOCK_PID(which_clock); int error = -EINVAL; @@ -1399,8 +1399,8 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, static long posix_cpu_clock_nanosleep_restart(struct restart_block *); -int posix_cpu_nsleep(clockid_t which_clock, int flags, - struct timespec *rqtp) +int posix_cpu_nsleep(const clockid_t which_clock, int flags, + struct timespec *rqtp, struct timespec __user *rmtp) { struct restart_block *restart_block = ¤t_thread_info()->restart_block; @@ -1425,7 +1425,6 @@ int posix_cpu_nsleep(clockid_t which_clock, int flags, error = posix_cpu_timer_create(&timer); timer.it_process = current; if (!error) { - struct timespec __user *rmtp; static struct itimerspec zero_it; struct itimerspec it = { .it_value = *rqtp, .it_interval = {} }; @@ -1472,7 +1471,6 @@ int posix_cpu_nsleep(clockid_t which_clock, int flags, /* * Report back to the user the time still remaining. */ - rmtp = (struct timespec __user *) restart_block->arg1; if (rmtp != NULL && !(flags & TIMER_ABSTIME) && copy_to_user(rmtp, &it.it_value, sizeof *rmtp)) return -EFAULT; @@ -1480,6 +1478,7 @@ int posix_cpu_nsleep(clockid_t which_clock, int flags, restart_block->fn = posix_cpu_clock_nanosleep_restart; /* Caller already set restart_block->arg1 */ restart_block->arg0 = which_clock; + restart_block->arg1 = (unsigned long) rmtp; restart_block->arg2 = rqtp->tv_sec; restart_block->arg3 = rqtp->tv_nsec; @@ -1493,21 +1492,28 @@ static long posix_cpu_clock_nanosleep_restart(struct restart_block *restart_block) { clockid_t which_clock = restart_block->arg0; - struct timespec t = { .tv_sec = restart_block->arg2, - .tv_nsec = restart_block->arg3 }; + struct timespec __user *rmtp; + struct timespec t; + + rmtp = (struct timespec __user *) restart_block->arg1; + t.tv_sec = restart_block->arg2; + t.tv_nsec = restart_block->arg3; + restart_block->fn = do_no_restart_syscall; - return posix_cpu_nsleep(which_clock, TIMER_ABSTIME, &t); + return posix_cpu_nsleep(which_clock, TIMER_ABSTIME, &t, rmtp); } #define PROCESS_CLOCK MAKE_PROCESS_CPUCLOCK(0, CPUCLOCK_SCHED) #define THREAD_CLOCK MAKE_THREAD_CPUCLOCK(0, CPUCLOCK_SCHED) -static int process_cpu_clock_getres(clockid_t which_clock, struct timespec *tp) +static int process_cpu_clock_getres(const clockid_t which_clock, + struct timespec *tp) { return posix_cpu_clock_getres(PROCESS_CLOCK, tp); } -static int process_cpu_clock_get(clockid_t which_clock, struct timespec *tp) +static int process_cpu_clock_get(const clockid_t which_clock, + struct timespec *tp) { return posix_cpu_clock_get(PROCESS_CLOCK, tp); } @@ -1516,16 +1522,19 @@ static int process_cpu_timer_create(struct k_itimer *timer) timer->it_clock = PROCESS_CLOCK; return posix_cpu_timer_create(timer); } -static int process_cpu_nsleep(clockid_t which_clock, int flags, - struct timespec *rqtp) +static int process_cpu_nsleep(const clockid_t which_clock, int flags, + struct timespec *rqtp, + struct timespec __user *rmtp) { - return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp); + return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp, rmtp); } -static int thread_cpu_clock_getres(clockid_t which_clock, struct timespec *tp) +static int thread_cpu_clock_getres(const clockid_t which_clock, + struct timespec *tp) { return posix_cpu_clock_getres(THREAD_CLOCK, tp); } -static int thread_cpu_clock_get(clockid_t which_clock, struct timespec *tp) +static int thread_cpu_clock_get(const clockid_t which_clock, + struct timespec *tp) { return posix_cpu_clock_get(THREAD_CLOCK, tp); } @@ -1534,8 +1543,8 @@ static int thread_cpu_timer_create(struct k_itimer *timer) timer->it_clock = THREAD_CLOCK; return posix_cpu_timer_create(timer); } -static int thread_cpu_nsleep(clockid_t which_clock, int flags, - struct timespec *rqtp) +static int thread_cpu_nsleep(const clockid_t which_clock, int flags, + struct timespec *rqtp, struct timespec __user *rmtp) { return -EINVAL; } diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 5870efb3e200..9e66e614862a 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -48,21 +48,6 @@ #include <linux/workqueue.h> #include <linux/module.h> -#ifndef div_long_long_rem -#include <asm/div64.h> - -#define div_long_long_rem(dividend,divisor,remainder) ({ \ - u64 result = dividend; \ - *remainder = do_div(result,divisor); \ - result; }) - -#endif -#define CLOCK_REALTIME_RES TICK_NSEC /* In nano seconds. */ - -static inline u64 mpy_l_X_l_ll(unsigned long mpy1,unsigned long mpy2) -{ - return (u64)mpy1 * mpy2; -} /* * Management arrays for POSIX timers. Timers are kept in slab memory * Timer ids are allocated by an external routine that keeps track of the @@ -148,18 +133,18 @@ static DEFINE_SPINLOCK(idr_lock); */ static struct k_clock posix_clocks[MAX_CLOCKS]; + /* - * We only have one real clock that can be set so we need only one abs list, - * even if we should want to have several clocks with differing resolutions. + * These ones are defined below. */ -static struct k_clock_abs abs_list = {.list = LIST_HEAD_INIT(abs_list.list), - .lock = SPIN_LOCK_UNLOCKED}; +static int common_nsleep(const clockid_t, int flags, struct timespec *t, + struct timespec __user *rmtp); +static void common_timer_get(struct k_itimer *, struct itimerspec *); +static int common_timer_set(struct k_itimer *, int, + struct itimerspec *, struct itimerspec *); +static int common_timer_del(struct k_itimer *timer); -static void posix_timer_fn(unsigned long); -static u64 do_posix_clock_monotonic_gettime_parts( - struct timespec *tp, struct timespec *mo); -int do_posix_clock_monotonic_gettime(struct timespec *tp); -static int do_posix_clock_monotonic_get(clockid_t, struct timespec *tp); +static int posix_timer_fn(void *data); static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags); @@ -184,7 +169,7 @@ static inline void unlock_timer(struct k_itimer *timr, unsigned long flags) * the function pointer CALL in struct k_clock. */ -static inline int common_clock_getres(clockid_t which_clock, +static inline int common_clock_getres(const clockid_t which_clock, struct timespec *tp) { tp->tv_sec = 0; @@ -192,39 +177,33 @@ static inline int common_clock_getres(clockid_t which_clock, return 0; } -static inline int common_clock_get(clockid_t which_clock, struct timespec *tp) +/* + * Get real time for posix timers + */ +static int common_clock_get(clockid_t which_clock, struct timespec *tp) { - getnstimeofday(tp); + ktime_get_real_ts(tp); return 0; } -static inline int common_clock_set(clockid_t which_clock, struct timespec *tp) +static inline int common_clock_set(const clockid_t which_clock, + struct timespec *tp) { return do_sys_settimeofday(tp, NULL); } static inline int common_timer_create(struct k_itimer *new_timer) { - INIT_LIST_HEAD(&new_timer->it.real.abs_timer_entry); - init_timer(&new_timer->it.real.timer); - new_timer->it.real.timer.data = (unsigned long) new_timer; + hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock); + new_timer->it.real.timer.data = new_timer; new_timer->it.real.timer.function = posix_timer_fn; return 0; } /* - * These ones are defined below. - */ -static int common_nsleep(clockid_t, int flags, struct timespec *t); -static void common_timer_get(struct k_itimer *, struct itimerspec *); -static int common_timer_set(struct k_itimer *, int, - struct itimerspec *, struct itimerspec *); -static int common_timer_del(struct k_itimer *timer); - -/* - * Return nonzero iff we know a priori this clockid_t value is bogus. + * Return nonzero if we know a priori this clockid_t value is bogus. */ -static inline int invalid_clockid(clockid_t which_clock) +static inline int invalid_clockid(const clockid_t which_clock) { if (which_clock < 0) /* CPU clock, posix_cpu_* will check it */ return 0; @@ -232,26 +211,32 @@ static inline int invalid_clockid(clockid_t which_clock) return 1; if (posix_clocks[which_clock].clock_getres != NULL) return 0; -#ifndef CLOCK_DISPATCH_DIRECT if (posix_clocks[which_clock].res != 0) return 0; -#endif return 1; } +/* + * Get monotonic time for posix timers + */ +static int posix_ktime_get_ts(clockid_t which_clock, struct timespec *tp) +{ + ktime_get_ts(tp); + return 0; +} /* * Initialize everything, well, just everything in Posix clocks/timers ;) */ static __init int init_posix_timers(void) { - struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES, - .abs_struct = &abs_list + struct k_clock clock_realtime = { + .clock_getres = hrtimer_get_res, }; - struct k_clock clock_monotonic = {.res = CLOCK_REALTIME_RES, - .abs_struct = NULL, - .clock_get = do_posix_clock_monotonic_get, - .clock_set = do_posix_clock_nosettime + struct k_clock clock_monotonic = { + .clock_getres = hrtimer_get_res, + .clock_get = posix_ktime_get_ts, + .clock_set = do_posix_clock_nosettime, }; register_posix_clock(CLOCK_REALTIME, &clock_realtime); @@ -265,117 +250,17 @@ static __init int init_posix_timers(void) __initcall(init_posix_timers); -static void tstojiffie(struct timespec *tp, int res, u64 *jiff) -{ - long sec = tp->tv_sec; - long nsec = tp->tv_nsec + res - 1; - - if (nsec >= NSEC_PER_SEC) { - sec++; - nsec -= NSEC_PER_SEC; - } - - /* - * The scaling constants are defined in <linux/time.h> - * The difference between there and here is that we do the - * res rounding and compute a 64-bit result (well so does that - * but it then throws away the high bits). - */ - *jiff = (mpy_l_X_l_ll(sec, SEC_CONVERSION) + - (mpy_l_X_l_ll(nsec, NSEC_CONVERSION) >> - (NSEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC; -} - -/* - * This function adjusts the timer as needed as a result of the clock - * being set. It should only be called for absolute timers, and then - * under the abs_list lock. It computes the time difference and sets - * the new jiffies value in the timer. It also updates the timers - * reference wall_to_monotonic value. It is complicated by the fact - * that tstojiffies() only handles positive times and it needs to work - * with both positive and negative times. Also, for negative offsets, - * we need to defeat the res round up. - * - * Return is true if there is a new time, else false. - */ -static long add_clockset_delta(struct k_itimer *timr, - struct timespec *new_wall_to) -{ - struct timespec delta; - int sign = 0; - u64 exp; - - set_normalized_timespec(&delta, - new_wall_to->tv_sec - - timr->it.real.wall_to_prev.tv_sec, - new_wall_to->tv_nsec - - timr->it.real.wall_to_prev.tv_nsec); - if (likely(!(delta.tv_sec | delta.tv_nsec))) - return 0; - if (delta.tv_sec < 0) { - set_normalized_timespec(&delta, - -delta.tv_sec, - 1 - delta.tv_nsec - - posix_clocks[timr->it_clock].res); - sign++; - } - tstojiffie(&delta, posix_clocks[timr->it_clock].res, &exp); - timr->it.real.wall_to_prev = *new_wall_to; - timr->it.real.timer.expires += (sign ? -exp : exp); - return 1; -} - -static void remove_from_abslist(struct k_itimer *timr) -{ - if (!list_empty(&timr->it.real.abs_timer_entry)) { - spin_lock(&abs_list.lock); - list_del_init(&timr->it.real.abs_timer_entry); - spin_unlock(&abs_list.lock); - } -} - static void schedule_next_timer(struct k_itimer *timr) { - struct timespec new_wall_to; - struct now_struct now; - unsigned long seq; - - /* - * Set up the timer for the next interval (if there is one). - * Note: this code uses the abs_timer_lock to protect - * it.real.wall_to_prev and must hold it until exp is set, not exactly - * obvious... - - * This function is used for CLOCK_REALTIME* and - * CLOCK_MONOTONIC* timers. If we ever want to handle other - * CLOCKs, the calling code (do_schedule_next_timer) would need - * to pull the "clock" info from the timer and dispatch the - * "other" CLOCKs "next timer" code (which, I suppose should - * also be added to the k_clock structure). - */ - if (!timr->it.real.incr) + if (timr->it.real.interval.tv64 == 0) return; - do { - seq = read_seqbegin(&xtime_lock); - new_wall_to = wall_to_monotonic; - posix_get_now(&now); - } while (read_seqretry(&xtime_lock, seq)); - - if (!list_empty(&timr->it.real.abs_timer_entry)) { - spin_lock(&abs_list.lock); - add_clockset_delta(timr, &new_wall_to); - - posix_bump_timer(timr, now); - - spin_unlock(&abs_list.lock); - } else { - posix_bump_timer(timr, now); - } + timr->it_overrun += hrtimer_forward(&timr->it.real.timer, + timr->it.real.interval); timr->it_overrun_last = timr->it_overrun; timr->it_overrun = -1; ++timr->it_requeue_pending; - add_timer(&timr->it.real.timer); + hrtimer_restart(&timr->it.real.timer); } /* @@ -396,31 +281,23 @@ void do_schedule_next_timer(struct siginfo *info) timr = lock_timer(info->si_tid, &flags); - if (!timr || timr->it_requeue_pending != info->si_sys_private) - goto exit; + if (timr && timr->it_requeue_pending == info->si_sys_private) { + if (timr->it_clock < 0) + posix_cpu_timer_schedule(timr); + else + schedule_next_timer(timr); - if (timr->it_clock < 0) /* CPU clock */ - posix_cpu_timer_schedule(timr); - else - schedule_next_timer(timr); - info->si_overrun = timr->it_overrun_last; -exit: - if (timr) - unlock_timer(timr, flags); + info->si_overrun = timr->it_overrun_last; + } + + unlock_timer(timr, flags); } int posix_timer_event(struct k_itimer *timr,int si_private) { memset(&timr->sigq->info, 0, sizeof(siginfo_t)); timr->sigq->info.si_sys_private = si_private; - /* - * Send signal to the process that owns this timer. - - * This code assumes that all the possible abs_lists share the - * same lock (there is only one list at this time). If this is - * not the case, the CLOCK info would need to be used to find - * the proper abs list lock. - */ + /* Send signal to the process that owns this timer.*/ timr->sigq->info.si_signo = timr->it_sigev_signo; timr->sigq->info.si_errno = 0; @@ -454,64 +331,35 @@ EXPORT_SYMBOL_GPL(posix_timer_event); * This code is for CLOCK_REALTIME* and CLOCK_MONOTONIC* timers. */ -static void posix_timer_fn(unsigned long __data) +static int posix_timer_fn(void *data) { - struct k_itimer *timr = (struct k_itimer *) __data; + struct k_itimer *timr = data; unsigned long flags; - unsigned long seq; - struct timespec delta, new_wall_to; - u64 exp = 0; - int do_notify = 1; + int si_private = 0; + int ret = HRTIMER_NORESTART; spin_lock_irqsave(&timr->it_lock, flags); - if (!list_empty(&timr->it.real.abs_timer_entry)) { - spin_lock(&abs_list.lock); - do { - seq = read_seqbegin(&xtime_lock); - new_wall_to = wall_to_monotonic; - } while (read_seqretry(&xtime_lock, seq)); - set_normalized_timespec(&delta, - new_wall_to.tv_sec - - timr->it.real.wall_to_prev.tv_sec, - new_wall_to.tv_nsec - - timr->it.real.wall_to_prev.tv_nsec); - if (likely((delta.tv_sec | delta.tv_nsec ) == 0)) { - /* do nothing, timer is on time */ - } else if (delta.tv_sec < 0) { - /* do nothing, timer is already late */ - } else { - /* timer is early due to a clock set */ - tstojiffie(&delta, - posix_clocks[timr->it_clock].res, - &exp); - timr->it.real.wall_to_prev = new_wall_to; - timr->it.real.timer.expires += exp; - add_timer(&timr->it.real.timer); - do_notify = 0; - } - spin_unlock(&abs_list.lock); - } - if (do_notify) { - int si_private=0; + if (timr->it.real.interval.tv64 != 0) + si_private = ++timr->it_requeue_pending; - if (timr->it.real.incr) - si_private = ++timr->it_requeue_pending; - else { - remove_from_abslist(timr); + if (posix_timer_event(timr, si_private)) { + /* + * signal was not sent because of sig_ignor + * we will not get a call back to restart it AND + * it should be restarted. + */ + if (timr->it.real.interval.tv64 != 0) { + timr->it_overrun += + hrtimer_forward(&timr->it.real.timer, + timr->it.real.interval); + ret = HRTIMER_RESTART; } - - if (posix_timer_event(timr, si_private)) - /* - * signal was not sent because of sig_ignor - * we will not get a call back to restart it AND - * it should be restarted. - */ - schedule_next_timer(timr); } - unlock_timer(timr, flags); /* hold thru abs lock to keep irq off */ -} + unlock_timer(timr, flags); + return ret; +} static inline struct task_struct * good_sigevent(sigevent_t * event) { @@ -530,7 +378,7 @@ static inline struct task_struct * good_sigevent(sigevent_t * event) return rtn; } -void register_posix_clock(clockid_t clock_id, struct k_clock *new_clock) +void register_posix_clock(const clockid_t clock_id, struct k_clock *new_clock) { if ((unsigned) clock_id >= MAX_CLOCKS) { printk("POSIX clock register failed for clock_id %d\n", @@ -576,7 +424,7 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set) /* Create a POSIX.1b interval timer. */ asmlinkage long -sys_timer_create(clockid_t which_clock, +sys_timer_create(const clockid_t which_clock, struct sigevent __user *timer_event_spec, timer_t __user * created_timer_id) { @@ -602,8 +450,7 @@ sys_timer_create(clockid_t which_clock, goto out; } spin_lock_irq(&idr_lock); - error = idr_get_new(&posix_timers_id, - (void *) new_timer, + error = idr_get_new(&posix_timers_id, (void *) new_timer, &new_timer_id); spin_unlock_irq(&idr_lock); if (error == -EAGAIN) @@ -704,27 +551,6 @@ out: } /* - * good_timespec - * - * This function checks the elements of a timespec structure. - * - * Arguments: - * ts : Pointer to the timespec structure to check - * - * Return value: - * If a NULL pointer was passed in, or the tv_nsec field was less than 0 - * or greater than NSEC_PER_SEC, or the tv_sec field was less than 0, - * this function returns 0. Otherwise it returns 1. - */ -static int good_timespec(const struct timespec *ts) -{ - if ((!ts) || (ts->tv_sec < 0) || - ((unsigned) ts->tv_nsec >= NSEC_PER_SEC)) - return 0; - return 1; -} - -/* * Locking issues: We need to protect the result of the id look up until * we get the timer locked down so it is not deleted under us. The * removal is done under the idr spinlock so we use that here to bridge @@ -776,39 +602,39 @@ static struct k_itimer * lock_timer(timer_t timer_id, unsigned long *flags) static void common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) { - unsigned long expires; - struct now_struct now; - - do - expires = timr->it.real.timer.expires; - while ((volatile long) (timr->it.real.timer.expires) != expires); - - posix_get_now(&now); - - if (expires && - ((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) && - !timr->it.real.incr && - posix_time_before(&timr->it.real.timer, &now)) - timr->it.real.timer.expires = expires = 0; - if (expires) { - if (timr->it_requeue_pending & REQUEUE_PENDING || - (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) { - posix_bump_timer(timr, now); - expires = timr->it.real.timer.expires; - } - else - if (!timer_pending(&timr->it.real.timer)) - expires = 0; - if (expires) - expires -= now.jiffies; - } - jiffies_to_timespec(expires, &cur_setting->it_value); - jiffies_to_timespec(timr->it.real.incr, &cur_setting->it_interval); + ktime_t remaining; + struct hrtimer *timer = &timr->it.real.timer; + + memset(cur_setting, 0, sizeof(struct itimerspec)); + remaining = hrtimer_get_remaining(timer); - if (cur_setting->it_value.tv_sec < 0) { + /* Time left ? or timer pending */ + if (remaining.tv64 > 0 || hrtimer_active(timer)) + goto calci; + /* interval timer ? */ + if (timr->it.real.interval.tv64 == 0) + return; + /* + * When a requeue is pending or this is a SIGEV_NONE timer + * move the expiry time forward by intervals, so expiry is > + * now. + */ + if (timr->it_requeue_pending & REQUEUE_PENDING || + (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) { + timr->it_overrun += + hrtimer_forward(timer, timr->it.real.interval); + remaining = hrtimer_get_remaining(timer); + } + calci: + /* interval timer ? */ + if (timr->it.real.interval.tv64 != 0) + cur_setting->it_interval = + ktime_to_timespec(timr->it.real.interval); + /* Return 0 only, when the timer is expired and not pending */ + if (remaining.tv64 <= 0) cur_setting->it_value.tv_nsec = 1; - cur_setting->it_value.tv_sec = 0; - } + else + cur_setting->it_value = ktime_to_timespec(remaining); } /* Get the time remaining on a POSIX.1b interval timer. */ @@ -832,6 +658,7 @@ sys_timer_gettime(timer_t timer_id, struct itimerspec __user *setting) return 0; } + /* * Get the number of overruns of a POSIX.1b interval timer. This is to * be the overrun of the timer last delivered. At the same time we are @@ -841,7 +668,6 @@ sys_timer_gettime(timer_t timer_id, struct itimerspec __user *setting) * the call back to do_schedule_next_timer(). So all we need to do is * to pick up the frozen overrun. */ - asmlinkage long sys_timer_getoverrun(timer_t timer_id) { @@ -858,84 +684,6 @@ sys_timer_getoverrun(timer_t timer_id) return overrun; } -/* - * Adjust for absolute time - * - * If absolute time is given and it is not CLOCK_MONOTONIC, we need to - * adjust for the offset between the timer clock (CLOCK_MONOTONIC) and - * what ever clock he is using. - * - * If it is relative time, we need to add the current (CLOCK_MONOTONIC) - * time to it to get the proper time for the timer. - */ -static int adjust_abs_time(struct k_clock *clock, struct timespec *tp, - int abs, u64 *exp, struct timespec *wall_to) -{ - struct timespec now; - struct timespec oc = *tp; - u64 jiffies_64_f; - int rtn =0; - - if (abs) { - /* - * The mask pick up the 4 basic clocks - */ - if (!((clock - &posix_clocks[0]) & ~CLOCKS_MASK)) { - jiffies_64_f = do_posix_clock_monotonic_gettime_parts( - &now, wall_to); - /* - * If we are doing a MONOTONIC clock - */ - if((clock - &posix_clocks[0]) & CLOCKS_MONO){ - now.tv_sec += wall_to->tv_sec; - now.tv_nsec += wall_to->tv_nsec; - } - } else { - /* - * Not one of the basic clocks - */ - clock->clock_get(clock - posix_clocks, &now); - jiffies_64_f = get_jiffies_64(); - } - /* - * Take away now to get delta and normalize - */ - set_normalized_timespec(&oc, oc.tv_sec - now.tv_sec, - oc.tv_nsec - now.tv_nsec); - }else{ - jiffies_64_f = get_jiffies_64(); - } - /* - * Check if the requested time is prior to now (if so set now) - */ - if (oc.tv_sec < 0) - oc.tv_sec = oc.tv_nsec = 0; - - if (oc.tv_sec | oc.tv_nsec) - set_normalized_timespec(&oc, oc.tv_sec, - oc.tv_nsec + clock->res); - tstojiffie(&oc, clock->res, exp); - - /* - * Check if the requested time is more than the timer code - * can handle (if so we error out but return the value too). - */ - if (*exp > ((u64)MAX_JIFFY_OFFSET)) - /* - * This is a considered response, not exactly in - * line with the standard (in fact it is silent on - * possible overflows). We assume such a large - * value is ALMOST always a programming error and - * try not to compound it by setting a really dumb - * value. - */ - rtn = -EINVAL; - /* - * return the actual jiffies expire time, full 64 bits - */ - *exp += jiffies_64_f; - return rtn; -} /* Set a POSIX.1b interval timer. */ /* timr->it_lock is taken. */ @@ -943,68 +691,48 @@ static inline int common_timer_set(struct k_itimer *timr, int flags, struct itimerspec *new_setting, struct itimerspec *old_setting) { - struct k_clock *clock = &posix_clocks[timr->it_clock]; - u64 expire_64; + struct hrtimer *timer = &timr->it.real.timer; if (old_setting) common_timer_get(timr, old_setting); /* disable the timer */ - timr->it.real.incr = 0; + timr->it.real.interval.tv64 = 0; /* * careful here. If smp we could be in the "fire" routine which will * be spinning as we hold the lock. But this is ONLY an SMP issue. */ - if (try_to_del_timer_sync(&timr->it.real.timer) < 0) { -#ifdef CONFIG_SMP - /* - * It can only be active if on an other cpu. Since - * we have cleared the interval stuff above, it should - * clear once we release the spin lock. Of course once - * we do that anything could happen, including the - * complete melt down of the timer. So return with - * a "retry" exit status. - */ + if (hrtimer_try_to_cancel(timer) < 0) return TIMER_RETRY; -#endif - } - - remove_from_abslist(timr); timr->it_requeue_pending = (timr->it_requeue_pending + 2) & ~REQUEUE_PENDING; timr->it_overrun_last = 0; - timr->it_overrun = -1; - /* - *switch off the timer when it_value is zero - */ - if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec) { - timr->it.real.timer.expires = 0; - return 0; - } - if (adjust_abs_time(clock, - &new_setting->it_value, flags & TIMER_ABSTIME, - &expire_64, &(timr->it.real.wall_to_prev))) { - return -EINVAL; - } - timr->it.real.timer.expires = (unsigned long)expire_64; - tstojiffie(&new_setting->it_interval, clock->res, &expire_64); - timr->it.real.incr = (unsigned long)expire_64; + /* switch off the timer when it_value is zero */ + if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec) + return 0; - /* - * We do not even queue SIGEV_NONE timers! But we do put them - * in the abs list so we can do that right. + /* Posix madness. Only absolute CLOCK_REALTIME timers + * are affected by clock sets. So we must reiniatilize + * the timer. */ - if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE)) - add_timer(&timr->it.real.timer); - - if (flags & TIMER_ABSTIME && clock->abs_struct) { - spin_lock(&clock->abs_struct->lock); - list_add_tail(&(timr->it.real.abs_timer_entry), - &(clock->abs_struct->list)); - spin_unlock(&clock->abs_struct->lock); - } + if (timr->it_clock == CLOCK_REALTIME && (flags & TIMER_ABSTIME)) + hrtimer_rebase(timer, CLOCK_REALTIME); + else + hrtimer_rebase(timer, CLOCK_MONOTONIC); + + timer->expires = timespec_to_ktime(new_setting->it_value); + + /* Convert interval */ + timr->it.real.interval = timespec_to_ktime(new_setting->it_interval); + + /* SIGEV_NONE timers are not queued ! See common_timer_get */ + if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) + return 0; + + hrtimer_start(timer, timer->expires, (flags & TIMER_ABSTIME) ? + HRTIMER_ABS : HRTIMER_REL); return 0; } @@ -1026,8 +754,8 @@ sys_timer_settime(timer_t timer_id, int flags, if (copy_from_user(&new_spec, new_setting, sizeof (new_spec))) return -EFAULT; - if ((!good_timespec(&new_spec.it_interval)) || - (!good_timespec(&new_spec.it_value))) + if (!timespec_valid(&new_spec.it_interval) || + !timespec_valid(&new_spec.it_value)) return -EINVAL; retry: timr = lock_timer(timer_id, &flag); @@ -1043,8 +771,8 @@ retry: goto retry; } - if (old_setting && !error && copy_to_user(old_setting, - &old_spec, sizeof (old_spec))) + if (old_setting && !error && + copy_to_user(old_setting, &old_spec, sizeof (old_spec))) error = -EFAULT; return error; @@ -1052,24 +780,10 @@ retry: static inline int common_timer_del(struct k_itimer *timer) { - timer->it.real.incr = 0; + timer->it.real.interval.tv64 = 0; - if (try_to_del_timer_sync(&timer->it.real.timer) < 0) { -#ifdef CONFIG_SMP - /* - * It can only be active if on an other cpu. Since - * we have cleared the interval stuff above, it should - * clear once we release the spin lock. Of course once - * we do that anything could happen, including the - * complete melt down of the timer. So return with - * a "retry" exit status. - */ + if (hrtimer_try_to_cancel(&timer->it.real.timer) < 0) return TIMER_RETRY; -#endif - } - - remove_from_abslist(timer); - return 0; } @@ -1085,24 +799,16 @@ sys_timer_delete(timer_t timer_id) struct k_itimer *timer; long flags; -#ifdef CONFIG_SMP - int error; retry_delete: -#endif timer = lock_timer(timer_id, &flags); if (!timer) return -EINVAL; -#ifdef CONFIG_SMP - error = timer_delete_hook(timer); - - if (error == TIMER_RETRY) { + if (timer_delete_hook(timer) == TIMER_RETRY) { unlock_timer(timer, flags); goto retry_delete; } -#else - timer_delete_hook(timer); -#endif + spin_lock(¤t->sighand->siglock); list_del(&timer->list); spin_unlock(¤t->sighand->siglock); @@ -1119,6 +825,7 @@ retry_delete: release_posix_timer(timer, IT_ID_SET); return 0; } + /* * return timer owned by the process, used by exit_itimers */ @@ -1126,22 +833,13 @@ static inline void itimer_delete(struct k_itimer *timer) { unsigned long flags; -#ifdef CONFIG_SMP - int error; retry_delete: -#endif spin_lock_irqsave(&timer->it_lock, flags); -#ifdef CONFIG_SMP - error = timer_delete_hook(timer); - - if (error == TIMER_RETRY) { + if (timer_delete_hook(timer) == TIMER_RETRY) { unlock_timer(timer, flags); goto retry_delete; } -#else - timer_delete_hook(timer); -#endif list_del(&timer->list); /* * This keeps any tasks waiting on the spin lock from thinking @@ -1170,57 +868,8 @@ void exit_itimers(struct signal_struct *sig) } } -/* - * And now for the "clock" calls - * - * These functions are called both from timer functions (with the timer - * spin_lock_irq() held and from clock calls with no locking. They must - * use the save flags versions of locks. - */ - -/* - * We do ticks here to avoid the irq lock ( they take sooo long). - * The seqlock is great here. Since we a reader, we don't really care - * if we are interrupted since we don't take lock that will stall us or - * any other cpu. Voila, no irq lock is needed. - * - */ - -static u64 do_posix_clock_monotonic_gettime_parts( - struct timespec *tp, struct timespec *mo) -{ - u64 jiff; - unsigned int seq; - - do { - seq = read_seqbegin(&xtime_lock); - getnstimeofday(tp); - *mo = wall_to_monotonic; - jiff = jiffies_64; - - } while(read_seqretry(&xtime_lock, seq)); - - return jiff; -} - -static int do_posix_clock_monotonic_get(clockid_t clock, struct timespec *tp) -{ - struct timespec wall_to_mono; - - do_posix_clock_monotonic_gettime_parts(tp, &wall_to_mono); - - set_normalized_timespec(tp, tp->tv_sec + wall_to_mono.tv_sec, - tp->tv_nsec + wall_to_mono.tv_nsec); - - return 0; -} - -int do_posix_clock_monotonic_gettime(struct timespec *tp) -{ - return do_posix_clock_monotonic_get(CLOCK_MONOTONIC, tp); -} - -int do_posix_clock_nosettime(clockid_t clockid, struct timespec *tp) +/* Not available / possible... functions */ +int do_posix_clock_nosettime(const clockid_t clockid, struct timespec *tp) { return -EINVAL; } @@ -1232,7 +881,8 @@ int do_posix_clock_notimer_create(struct k_itimer *timer) } EXPORT_SYMBOL_GPL(do_posix_clock_notimer_create); -int do_posix_clock_nonanosleep(clockid_t clock, int flags, struct timespec *t) +int do_posix_clock_nonanosleep(const clockid_t clock, int flags, + struct timespec *t, struct timespec __user *r) { #ifndef ENOTSUP return -EOPNOTSUPP; /* aka ENOTSUP in userland for POSIX */ @@ -1242,8 +892,8 @@ int do_posix_clock_nonanosleep(clockid_t clock, int flags, struct timespec *t) } EXPORT_SYMBOL_GPL(do_posix_clock_nonanosleep); -asmlinkage long -sys_clock_settime(clockid_t which_clock, const struct timespec __user *tp) +asmlinkage long sys_clock_settime(const clockid_t which_clock, + const struct timespec __user *tp) { struct timespec new_tp; @@ -1256,7 +906,7 @@ sys_clock_settime(clockid_t which_clock, const struct timespec __user *tp) } asmlinkage long -sys_clock_gettime(clockid_t which_clock, struct timespec __user *tp) +sys_clock_gettime(const clockid_t which_clock, struct timespec __user *tp) { struct timespec kernel_tp; int error; @@ -1273,7 +923,7 @@ sys_clock_gettime(clockid_t which_clock, struct timespec __user *tp) } asmlinkage long -sys_clock_getres(clockid_t which_clock, struct timespec __user *tp) +sys_clock_getres(const clockid_t which_clock, struct timespec __user *tp) { struct timespec rtn_tp; int error; @@ -1292,117 +942,34 @@ sys_clock_getres(clockid_t which_clock, struct timespec __user *tp) } /* - * The standard says that an absolute nanosleep call MUST wake up at - * the requested time in spite of clock settings. Here is what we do: - * For each nanosleep call that needs it (only absolute and not on - * CLOCK_MONOTONIC* (as it can not be set)) we thread a little structure - * into the "nanosleep_abs_list". All we need is the task_struct pointer. - * When ever the clock is set we just wake up all those tasks. The rest - * is done by the while loop in clock_nanosleep(). - * - * On locking, clock_was_set() is called from update_wall_clock which - * holds (or has held for it) a write_lock_irq( xtime_lock) and is - * called from the timer bh code. Thus we need the irq save locks. - * - * Also, on the call from update_wall_clock, that is done as part of a - * softirq thing. We don't want to delay the system that much (possibly - * long list of timers to fix), so we defer that work to keventd. + * nanosleep for monotonic and realtime clocks */ - -static DECLARE_WAIT_QUEUE_HEAD(nanosleep_abs_wqueue); -static DECLARE_WORK(clock_was_set_work, (void(*)(void*))clock_was_set, NULL); - -static DECLARE_MUTEX(clock_was_set_lock); - -void clock_was_set(void) -{ - struct k_itimer *timr; - struct timespec new_wall_to; - LIST_HEAD(cws_list); - unsigned long seq; - - - if (unlikely(in_interrupt())) { - schedule_work(&clock_was_set_work); - return; +static int common_nsleep(const clockid_t which_clock, int flags, + struct timespec *tsave, struct timespec __user *rmtp) +{ + int mode = flags & TIMER_ABSTIME ? HRTIMER_ABS : HRTIMER_REL; + int clockid = which_clock; + + switch (which_clock) { + case CLOCK_REALTIME: + /* Posix madness. Only absolute timers on clock realtime + are affected by clock set. */ + if (mode != HRTIMER_ABS) + clockid = CLOCK_MONOTONIC; + case CLOCK_MONOTONIC: + break; + default: + return -EINVAL; } - wake_up_all(&nanosleep_abs_wqueue); - - /* - * Check if there exist TIMER_ABSTIME timers to correct. - * - * Notes on locking: This code is run in task context with irq - * on. We CAN be interrupted! All other usage of the abs list - * lock is under the timer lock which holds the irq lock as - * well. We REALLY don't want to scan the whole list with the - * interrupt system off, AND we would like a sequence lock on - * this code as well. Since we assume that the clock will not - * be set often, it seems ok to take and release the irq lock - * for each timer. In fact add_timer will do this, so this is - * not an issue. So we know when we are done, we will move the - * whole list to a new location. Then as we process each entry, - * we will move it to the actual list again. This way, when our - * copy is empty, we are done. We are not all that concerned - * about preemption so we will use a semaphore lock to protect - * aginst reentry. This way we will not stall another - * processor. It is possible that this may delay some timers - * that should have expired, given the new clock, but even this - * will be minimal as we will always update to the current time, - * even if it was set by a task that is waiting for entry to - * this code. Timers that expire too early will be caught by - * the expire code and restarted. - - * Absolute timers that repeat are left in the abs list while - * waiting for the task to pick up the signal. This means we - * may find timers that are not in the "add_timer" list, but are - * in the abs list. We do the same thing for these, save - * putting them back in the "add_timer" list. (Note, these are - * left in the abs list mainly to indicate that they are - * ABSOLUTE timers, a fact that is used by the re-arm code, and - * for which we have no other flag.) - - */ - - down(&clock_was_set_lock); - spin_lock_irq(&abs_list.lock); - list_splice_init(&abs_list.list, &cws_list); - spin_unlock_irq(&abs_list.lock); - do { - do { - seq = read_seqbegin(&xtime_lock); - new_wall_to = wall_to_monotonic; - } while (read_seqretry(&xtime_lock, seq)); - - spin_lock_irq(&abs_list.lock); - if (list_empty(&cws_list)) { - spin_unlock_irq(&abs_list.lock); - break; - } - timr = list_entry(cws_list.next, struct k_itimer, - it.real.abs_timer_entry); - - list_del_init(&timr->it.real.abs_timer_entry); - if (add_clockset_delta(timr, &new_wall_to) && - del_timer(&timr->it.real.timer)) /* timer run yet? */ - add_timer(&timr->it.real.timer); - list_add(&timr->it.real.abs_timer_entry, &abs_list.list); - spin_unlock_irq(&abs_list.lock); - } while (1); - - up(&clock_was_set_lock); + return hrtimer_nanosleep(tsave, rmtp, mode, clockid); } -long clock_nanosleep_restart(struct restart_block *restart_block); - asmlinkage long -sys_clock_nanosleep(clockid_t which_clock, int flags, +sys_clock_nanosleep(const clockid_t which_clock, int flags, const struct timespec __user *rqtp, struct timespec __user *rmtp) { struct timespec t; - struct restart_block *restart_block = - &(current_thread_info()->restart_block); - int ret; if (invalid_clockid(which_clock)) return -EINVAL; @@ -1410,125 +977,9 @@ sys_clock_nanosleep(clockid_t which_clock, int flags, if (copy_from_user(&t, rqtp, sizeof (struct timespec))) return -EFAULT; - if ((unsigned) t.tv_nsec >= NSEC_PER_SEC || t.tv_sec < 0) + if (!timespec_valid(&t)) return -EINVAL; - /* - * Do this here as nsleep function does not have the real address. - */ - restart_block->arg1 = (unsigned long)rmtp; - - ret = CLOCK_DISPATCH(which_clock, nsleep, (which_clock, flags, &t)); - - if ((ret == -ERESTART_RESTARTBLOCK) && rmtp && - copy_to_user(rmtp, &t, sizeof (t))) - return -EFAULT; - return ret; -} - - -static int common_nsleep(clockid_t which_clock, - int flags, struct timespec *tsave) -{ - struct timespec t, dum; - DECLARE_WAITQUEUE(abs_wqueue, current); - u64 rq_time = (u64)0; - s64 left; - int abs; - struct restart_block *restart_block = - ¤t_thread_info()->restart_block; - - abs_wqueue.flags = 0; - abs = flags & TIMER_ABSTIME; - - if (restart_block->fn == clock_nanosleep_restart) { - /* - * Interrupted by a non-delivered signal, pick up remaining - * time and continue. Remaining time is in arg2 & 3. - */ - restart_block->fn = do_no_restart_syscall; - - rq_time = restart_block->arg3; - rq_time = (rq_time << 32) + restart_block->arg2; - if (!rq_time) - return -EINTR; - left = rq_time - get_jiffies_64(); - if (left <= (s64)0) - return 0; /* Already passed */ - } - - if (abs && (posix_clocks[which_clock].clock_get != - posix_clocks[CLOCK_MONOTONIC].clock_get)) - add_wait_queue(&nanosleep_abs_wqueue, &abs_wqueue); - - do { - t = *tsave; - if (abs || !rq_time) { - adjust_abs_time(&posix_clocks[which_clock], &t, abs, - &rq_time, &dum); - } - - left = rq_time - get_jiffies_64(); - if (left >= (s64)MAX_JIFFY_OFFSET) - left = (s64)MAX_JIFFY_OFFSET; - if (left < (s64)0) - break; - - schedule_timeout_interruptible(left); - - left = rq_time - get_jiffies_64(); - } while (left > (s64)0 && !test_thread_flag(TIF_SIGPENDING)); - - if (abs_wqueue.task_list.next) - finish_wait(&nanosleep_abs_wqueue, &abs_wqueue); - - if (left > (s64)0) { - - /* - * Always restart abs calls from scratch to pick up any - * clock shifting that happened while we are away. - */ - if (abs) - return -ERESTARTNOHAND; - - left *= TICK_NSEC; - tsave->tv_sec = div_long_long_rem(left, - NSEC_PER_SEC, - &tsave->tv_nsec); - /* - * Restart works by saving the time remaing in - * arg2 & 3 (it is 64-bits of jiffies). The other - * info we need is the clock_id (saved in arg0). - * The sys_call interface needs the users - * timespec return address which _it_ saves in arg1. - * Since we have cast the nanosleep call to a clock_nanosleep - * both can be restarted with the same code. - */ - restart_block->fn = clock_nanosleep_restart; - restart_block->arg0 = which_clock; - /* - * Caller sets arg1 - */ - restart_block->arg2 = rq_time & 0xffffffffLL; - restart_block->arg3 = rq_time >> 32; - - return -ERESTART_RESTARTBLOCK; - } - - return 0; -} -/* - * This will restart clock_nanosleep. - */ -long -clock_nanosleep_restart(struct restart_block *restart_block) -{ - struct timespec t; - int ret = common_nsleep(restart_block->arg0, 0, &t); - - if ((ret == -ERESTART_RESTARTBLOCK) && restart_block->arg1 && - copy_to_user((struct timespec __user *)(restart_block->arg1), &t, - sizeof (t))) - return -EFAULT; - return ret; + return CLOCK_DISPATCH(which_clock, nsleep, + (which_clock, flags, &t, rmtp)); } diff --git a/kernel/resource.c b/kernel/resource.c index 92285d822de6..e3080fcc66a3 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -464,7 +464,7 @@ struct resource * __request_region(struct resource *parent, unsigned long start, EXPORT_SYMBOL(__request_region); -int __deprecated __check_region(struct resource *parent, unsigned long start, unsigned long n) +int __check_region(struct resource *parent, unsigned long start, unsigned long n) { struct resource * res; diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index b3d4dc858e35..dcfb5d731466 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -87,13 +87,9 @@ static int stop_machine(void) { int i, ret = 0; struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; - mm_segment_t old_fs = get_fs(); /* One high-prio thread per cpu. We'll do this one. */ - set_fs(KERNEL_DS); - sys_sched_setscheduler(current->pid, SCHED_FIFO, - (struct sched_param __user *)¶m); - set_fs(old_fs); + sched_setscheduler(current, SCHED_FIFO, ¶m); atomic_set(&stopmachine_thread_ack, 0); stopmachine_num_threads = 0; diff --git a/kernel/time.c b/kernel/time.c index b94bfa8c03e0..169e8329e0b6 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -154,6 +154,9 @@ int do_sys_settimeofday(struct timespec *tv, struct timezone *tz) static int firsttime = 1; int error = 0; + if (!timespec_valid(tv)) + return -EINVAL; + error = security_settime(tv, tz); if (error) return error; @@ -561,27 +564,107 @@ void getnstimeofday(struct timespec *tv) EXPORT_SYMBOL_GPL(getnstimeofday); #endif -void getnstimestamp(struct timespec *ts) +/* Converts Gregorian date to seconds since 1970-01-01 00:00:00. + * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 + * => year=1980, mon=12, day=31, hour=23, min=59, sec=59. + * + * [For the Julian calendar (which was used in Russia before 1917, + * Britain & colonies before 1752, anywhere else before 1582, + * and is still in use by some communities) leave out the + * -year/100+year/400 terms, and add 10.] + * + * This algorithm was first published by Gauss (I think). + * + * WARNING: this function will overflow on 2106-02-07 06:28:16 on + * machines were long is 32-bit! (However, as time_t is signed, we + * will already get problems at other places on 2038-01-19 03:14:08) + */ +unsigned long +mktime(const unsigned int year0, const unsigned int mon0, + const unsigned int day, const unsigned int hour, + const unsigned int min, const unsigned int sec) { - unsigned int seq; - struct timespec wall2mono; + unsigned int mon = mon0, year = year0; - /* synchronize with settimeofday() changes */ - do { - seq = read_seqbegin(&xtime_lock); - getnstimeofday(ts); - wall2mono = wall_to_monotonic; - } while(unlikely(read_seqretry(&xtime_lock, seq))); - - /* adjust to monotonicaly-increasing values */ - ts->tv_sec += wall2mono.tv_sec; - ts->tv_nsec += wall2mono.tv_nsec; - while (unlikely(ts->tv_nsec >= NSEC_PER_SEC)) { - ts->tv_nsec -= NSEC_PER_SEC; - ts->tv_sec++; + /* 1..12 -> 11,12,1..10 */ + if (0 >= (int) (mon -= 2)) { + mon += 12; /* Puts Feb last since it has leap day */ + year -= 1; } + + return ((((unsigned long) + (year/4 - year/100 + year/400 + 367*mon/12 + day) + + year*365 - 719499 + )*24 + hour /* now have hours */ + )*60 + min /* now have minutes */ + )*60 + sec; /* finally seconds */ +} + +EXPORT_SYMBOL(mktime); + +/** + * set_normalized_timespec - set timespec sec and nsec parts and normalize + * + * @ts: pointer to timespec variable to be set + * @sec: seconds to set + * @nsec: nanoseconds to set + * + * Set seconds and nanoseconds field of a timespec variable and + * normalize to the timespec storage format + * + * Note: The tv_nsec part is always in the range of + * 0 <= tv_nsec < NSEC_PER_SEC + * For negative values only the tv_sec field is negative ! + */ +void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec) +{ + while (nsec >= NSEC_PER_SEC) { + nsec -= NSEC_PER_SEC; + ++sec; + } + while (nsec < 0) { + nsec += NSEC_PER_SEC; + --sec; + } + ts->tv_sec = sec; + ts->tv_nsec = nsec; +} + +/** + * ns_to_timespec - Convert nanoseconds to timespec + * @nsec: the nanoseconds value to be converted + * + * Returns the timespec representation of the nsec parameter. + */ +inline struct timespec ns_to_timespec(const nsec_t nsec) +{ + struct timespec ts; + + if (nsec) + ts.tv_sec = div_long_long_rem_signed(nsec, NSEC_PER_SEC, + &ts.tv_nsec); + else + ts.tv_sec = ts.tv_nsec = 0; + + return ts; +} + +/** + * ns_to_timeval - Convert nanoseconds to timeval + * @nsec: the nanoseconds value to be converted + * + * Returns the timeval representation of the nsec parameter. + */ +struct timeval ns_to_timeval(const nsec_t nsec) +{ + struct timespec ts = ns_to_timespec(nsec); + struct timeval tv; + + tv.tv_sec = ts.tv_sec; + tv.tv_usec = (suseconds_t) ts.tv_nsec / 1000; + + return tv; } -EXPORT_SYMBOL_GPL(getnstimestamp); #if (BITS_PER_LONG < 64) u64 get_jiffies_64(void) diff --git a/kernel/timer.c b/kernel/timer.c index 074b4bd5cfd8..4f1cb0ab5251 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -858,6 +858,7 @@ static void run_timer_softirq(struct softirq_action *h) { tvec_base_t *base = &__get_cpu_var(tvec_bases); + hrtimer_run_queues(); if (time_after_eq(jiffies, base->timer_jiffies)) __run_timers(base); } @@ -1119,62 +1120,6 @@ asmlinkage long sys_gettid(void) return current->pid; } -static long __sched nanosleep_restart(struct restart_block *restart) -{ - unsigned long expire = restart->arg0, now = jiffies; - struct timespec __user *rmtp = (struct timespec __user *) restart->arg1; - long ret; - - /* Did it expire while we handled signals? */ - if (!time_after(expire, now)) - return 0; - - expire = schedule_timeout_interruptible(expire - now); - - ret = 0; - if (expire) { - struct timespec t; - jiffies_to_timespec(expire, &t); - - ret = -ERESTART_RESTARTBLOCK; - if (rmtp && copy_to_user(rmtp, &t, sizeof(t))) - ret = -EFAULT; - /* The 'restart' block is already filled in */ - } - return ret; -} - -asmlinkage long sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp) -{ - struct timespec t; - unsigned long expire; - long ret; - - if (copy_from_user(&t, rqtp, sizeof(t))) - return -EFAULT; - - if ((t.tv_nsec >= 1000000000L) || (t.tv_nsec < 0) || (t.tv_sec < 0)) - return -EINVAL; - - expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec); - expire = schedule_timeout_interruptible(expire); - - ret = 0; - if (expire) { - struct restart_block *restart; - jiffies_to_timespec(expire, &t); - if (rmtp && copy_to_user(rmtp, &t, sizeof(t))) - return -EFAULT; - - restart = ¤t_thread_info()->restart_block; - restart->fn = nanosleep_restart; - restart->arg0 = jiffies + expire; - restart->arg1 = (unsigned long) rmtp; - ret = -ERESTART_RESTARTBLOCK; - } - return ret; -} - /* * sys_sysinfo - fill in sysinfo struct */ |