From 1cc4fff0b360aeffeedb7d6db5089d88dd861700 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 22 Dec 2008 02:24:48 +0100 Subject: hrtimers: increase clock min delta threshold while interrupt hanging Impact: avoid timer IRQ hanging slow systems While using the function graph tracer on a virtualized system, the hrtimer_interrupt can hang the system on an infinite loop. This can be caused in several situations: - the hardware is very slow and HZ is set too high - something intrusive is slowing the system down (tracing under emulation) ... and the next clock events to program are always before the current time. This patch implements a reasonable compromise: if such a situation is detected, we share the CPUs time in 1/4 to process the hrtimer interrupts. This is enough to let the system running without serious starvation. It has been successfully tested under VirtualBox with 1000 HZ and 100 HZ with function graph tracer launched. On both cases, the clock events were increased until about 25 ms periodic ticks, which means 40 HZ. So we change a hard to debug hang into a warning message and a system that still manages to limp along. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/hrtimer.c | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index bda9cb924276..c2a69b89ac61 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1171,6 +1171,29 @@ static void __run_hrtimer(struct hrtimer *timer) #ifdef CONFIG_HIGH_RES_TIMERS +static int force_clock_reprogram; + +/* + * After 5 iteration's attempts, we consider that hrtimer_interrupt() + * is hanging, which could happen with something that slows the interrupt + * such as the tracing. Then we force the clock reprogramming for each future + * hrtimer interrupts to avoid infinite loops and use the min_delta_ns + * threshold that we will overwrite. + * The next tick event will be scheduled to 3 times we currently spend on + * hrtimer_interrupt(). This gives a good compromise, the cpus will spend + * 1/4 of their time to process the hrtimer interrupts. This is enough to + * let it running without serious starvation. + */ + +static inline void +hrtimer_interrupt_hanging(struct clock_event_device *dev, + ktime_t try_time) +{ + force_clock_reprogram = 1; + dev->min_delta_ns = (unsigned long)try_time.tv64 * 3; + printk(KERN_WARNING "hrtimer: interrupt too slow, " + "forcing clock min delta to %lu ns\n", dev->min_delta_ns); +} /* * High resolution timer interrupt * Called with interrupts disabled @@ -1180,6 +1203,7 @@ void hrtimer_interrupt(struct clock_event_device *dev) struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); struct hrtimer_clock_base *base; ktime_t expires_next, now; + int nr_retries = 0; int i; BUG_ON(!cpu_base->hres_active); @@ -1187,6 +1211,10 @@ void hrtimer_interrupt(struct clock_event_device *dev) dev->next_event.tv64 = KTIME_MAX; retry: + /* 5 retries is enough to notice a hang */ + if (!(++nr_retries % 5)) + hrtimer_interrupt_hanging(dev, ktime_sub(ktime_get(), now)); + now = ktime_get(); expires_next.tv64 = KTIME_MAX; @@ -1239,7 +1267,7 @@ void hrtimer_interrupt(struct clock_event_device *dev) /* Reprogramming necessary ? */ if (expires_next.tv64 != KTIME_MAX) { - if (tick_program_event(expires_next, 0)) + if (tick_program_event(expires_next, force_clock_reprogram)) goto retry; } } -- cgit v1.2.1 From 5762ba1873b0bb9faa631aaa02f533c2b9837f82 Mon Sep 17 00:00:00 2001 From: Sebastien Dugue Date: Mon, 1 Dec 2008 14:09:07 +0100 Subject: hrtimers: allow the hot-unplugging of all cpus Impact: fix CPU hotplug hang on Power6 testbox On architectures that support offlining all cpus (at least powerpc/pseries), hot-unpluging the tick_do_timer_cpu can result in a system hang. This comes from the fact that if the cpu going down happens to be the cpu doing the tick, then as the tick_do_timer_cpu handover happens after the cpu is dead (via the CPU_DEAD notification), we're left without ticks, jiffies are frozen and any task relying on timers (msleep, ...) is stuck. That's particularly the case for the cpu looping in __cpu_die() waiting for the dying cpu to be dead. This patch addresses this by having the tick_do_timer_cpu handover happen earlier during the CPU_DYING notification. For this, a new clockevent notification type is introduced (CLOCK_EVT_NOTIFY_CPU_DYING) which is triggered in hrtimer_cpu_notify(). Signed-off-by: Sebastien Dugue Cc: Signed-off-by: Ingo Molnar --- kernel/hrtimer.c | 4 ++++ kernel/time/tick-common.c | 26 +++++++++++++++++++------- 2 files changed, 23 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index c2a69b89ac61..61cb933395ba 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1609,6 +1609,10 @@ static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self, break; #ifdef CONFIG_HOTPLUG_CPU + case CPU_DYING: + case CPU_DYING_FROZEN: + clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DYING, &scpu); + break; case CPU_DEAD: case CPU_DEAD_FROZEN: { diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index df12434b43ca..457d281258ee 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -273,6 +273,21 @@ out_bc: return ret; } +/* + * Transfer the do_timer job away from a dying cpu. + * + * Called with interrupts disabled. + */ +static void tick_handover_do_timer(int *cpup) +{ + if (*cpup == tick_do_timer_cpu) { + int cpu = first_cpu(cpu_online_map); + + tick_do_timer_cpu = (cpu != NR_CPUS) ? cpu : + TICK_DO_TIMER_NONE; + } +} + /* * Shutdown an event device on a given cpu: * @@ -297,13 +312,6 @@ static void tick_shutdown(unsigned int *cpup) clockevents_exchange_device(dev, NULL); td->evtdev = NULL; } - /* Transfer the do_timer job away from this cpu */ - if (*cpup == tick_do_timer_cpu) { - int cpu = first_cpu(cpu_online_map); - - tick_do_timer_cpu = (cpu != NR_CPUS) ? cpu : - TICK_DO_TIMER_NONE; - } spin_unlock_irqrestore(&tick_device_lock, flags); } @@ -357,6 +365,10 @@ static int tick_notify(struct notifier_block *nb, unsigned long reason, tick_broadcast_oneshot_control(reason); break; + case CLOCK_EVT_NOTIFY_CPU_DYING: + tick_handover_do_timer(dev); + break; + case CLOCK_EVT_NOTIFY_CPU_DEAD: tick_shutdown_broadcast_oneshot(dev); tick_shutdown_broadcast(dev); -- cgit v1.2.1 From 2d68259db26ad57fd9643f1c69b5181ec9836ca9 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Fri, 16 Jan 2009 17:14:38 +0900 Subject: clockevents: let set_mode() setup delta information Allow the set_mode() clockevent callback to decide and fill in delta details such as shift, mult, max_delta_ns and min_delta_ns. With this change the clockevent can be registered without delta details which allows us to keep the parent clock disabled until the clockevent gets setup using set_mode(). Letting set_mode() fill in or update delta details allows us to save power by disabling the parent clock while the clockevent is unused. This may however make the parent clock rate change, so next time the clockevent gets enabled we need let set_mode() to update the detla details accordingly. Doing it at registration time is not enough. Furthermore, the delta details seem unused in the case of periodic-only clockevent drivers, so this change also allows registration of such drivers without the delta details filled in. Signed-off-by: Magnus Damm Signed-off-by: Thomas Gleixner --- kernel/time/clockevents.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index ea2f48af83cf..d13be216a790 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -68,6 +68,17 @@ void clockevents_set_mode(struct clock_event_device *dev, if (dev->mode != mode) { dev->set_mode(mode, dev); dev->mode = mode; + + /* + * A nsec2cyc multiplicator of 0 is invalid and we'd crash + * on it, so fix it up and emit a warning: + */ + if (mode == CLOCK_EVT_MODE_ONESHOT) { + if (unlikely(!dev->mult)) { + dev->mult = 1; + WARN_ON(1); + } + } } } @@ -168,15 +179,6 @@ void clockevents_register_device(struct clock_event_device *dev) BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); BUG_ON(!dev->cpumask); - /* - * A nsec2cyc multiplicator of 0 is invalid and we'd crash - * on it, so fix it up and emit a warning: - */ - if (unlikely(!dev->mult)) { - dev->mult = 1; - WARN_ON(1); - } - spin_lock(&clockevents_lock); list_add(&dev->list, &clockevent_devices); -- cgit v1.2.1 From 6626bff24578753808c8b5bd4f1619e14e980f0f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Jan 2009 11:31:36 +0100 Subject: hrtimer: prevent negative expiry value after clock_was_set() Impact: prevent false positive WARN_ON() in clockevents_program_event() clock_was_set() changes the base->offset of CLOCK_REALTIME and enforces the reprogramming of the clockevent device to expire timers which are based on CLOCK_REALTIME. If the clock change is large enough then the subtraction of the timer expiry value and base->offset can become negative which triggers the warning in clockevents_program_event(). Check the subtraction result and set a negative value to 0. Signed-off-by: Thomas Gleixner --- kernel/hrtimer.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 2c40ee8f44bd..d71cef25954b 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -501,6 +501,13 @@ static void hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base) continue; timer = rb_entry(base->first, struct hrtimer, node); expires = ktime_sub(hrtimer_get_expires(timer), base->offset); + /* + * clock_was_set() has changed base->offset so the + * result might be negative. Fix it up to prevent a + * false positive in clockevents_program_event() + */ + if (expires.tv64 < 0) + expires.tv64 = 0; if (expires.tv64 < cpu_base->expires_next.tv64) cpu_base->expires_next = expires; } -- cgit v1.2.1 From 74019224ac34b044b44a31dd89a54e3477db4896 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 18 Feb 2009 12:23:29 +0100 Subject: timers: add mod_timer_pending() Impact: new timer API Based on an idea from Martin Josefsson with the help of Patrick McHardy and Stephen Hemminger: introduce the mod_timer_pending() API which is a mod_timer() offspring that is an invariant on already removed timers. (regular mod_timer() re-activates non-pending timers.) This is useful for the networking code in that it can allow unserialized mod_timer_pending() timer-forwarding calls, but a single del_timer*() will stop the timer from being reactivated again. Also while at it: - optimize the regular mod_timer() path some more, the timer-stat and a debug check was needlessly duplicated in __mod_timer(). - make the exports come straight after the function, as most other exports in timer.c already did. - eliminate __mod_timer() as an external API, change the users to mod_timer(). The regular mod_timer() code path is not impacted significantly, due to inlining optimizations and due to the simplifications. Based-on-patch-from: Stephen Hemminger Acked-by: Stephen Hemminger Cc: "David S. Miller" Cc: Patrick McHardy Cc: netdev@vger.kernel.org Cc: Oleg Nesterov Cc: Andrew Morton Signed-off-by: Ingo Molnar --- kernel/relay.c | 2 +- kernel/timer.c | 110 ++++++++++++++++++++++++++++++++++++++------------------- 2 files changed, 74 insertions(+), 38 deletions(-) (limited to 'kernel') diff --git a/kernel/relay.c b/kernel/relay.c index 9d79b7854fa6..8f2179c8056f 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -750,7 +750,7 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length) * from the scheduler (trying to re-grab * rq->lock), so defer it. */ - __mod_timer(&buf->timer, jiffies + 1); + mod_timer(&buf->timer, jiffies + 1); } old = buf->data; diff --git a/kernel/timer.c b/kernel/timer.c index 13dd64fe143d..9b77fc9a9ac8 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -589,11 +589,14 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer, } } -int __mod_timer(struct timer_list *timer, unsigned long expires) +static inline int +__mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) { struct tvec_base *base, *new_base; unsigned long flags; - int ret = 0; + int ret; + + ret = 0; timer_stats_timer_set_start_info(timer); BUG_ON(!timer->function); @@ -603,6 +606,9 @@ int __mod_timer(struct timer_list *timer, unsigned long expires) if (timer_pending(timer)) { detach_timer(timer, 0); ret = 1; + } else { + if (pending_only) + goto out_unlock; } debug_timer_activate(timer); @@ -629,42 +635,28 @@ int __mod_timer(struct timer_list *timer, unsigned long expires) timer->expires = expires; internal_add_timer(base, timer); + +out_unlock: spin_unlock_irqrestore(&base->lock, flags); return ret; } -EXPORT_SYMBOL(__mod_timer); - /** - * add_timer_on - start a timer on a particular CPU - * @timer: the timer to be added - * @cpu: the CPU to start it on + * mod_timer_pending - modify a pending timer's timeout + * @timer: the pending timer to be modified + * @expires: new timeout in jiffies * - * This is not very scalable on SMP. Double adds are not possible. + * mod_timer_pending() is the same for pending timers as mod_timer(), + * but will not re-activate and modify already deleted timers. + * + * It is useful for unserialized use of timers. */ -void add_timer_on(struct timer_list *timer, int cpu) +int mod_timer_pending(struct timer_list *timer, unsigned long expires) { - struct tvec_base *base = per_cpu(tvec_bases, cpu); - unsigned long flags; - - timer_stats_timer_set_start_info(timer); - BUG_ON(timer_pending(timer) || !timer->function); - spin_lock_irqsave(&base->lock, flags); - timer_set_base(timer, base); - debug_timer_activate(timer); - internal_add_timer(base, timer); - /* - * Check whether the other CPU is idle and needs to be - * triggered to reevaluate the timer wheel when nohz is - * active. We are protected against the other CPU fiddling - * with the timer by holding the timer base lock. This also - * makes sure that a CPU on the way to idle can not evaluate - * the timer wheel. - */ - wake_up_idle_cpu(cpu); - spin_unlock_irqrestore(&base->lock, flags); + return __mod_timer(timer, expires, true); } +EXPORT_SYMBOL(mod_timer_pending); /** * mod_timer - modify a timer's timeout @@ -688,9 +680,6 @@ void add_timer_on(struct timer_list *timer, int cpu) */ int mod_timer(struct timer_list *timer, unsigned long expires) { - BUG_ON(!timer->function); - - timer_stats_timer_set_start_info(timer); /* * This is a common optimization triggered by the * networking code - if the timer is re-modified @@ -699,11 +688,61 @@ int mod_timer(struct timer_list *timer, unsigned long expires) if (timer->expires == expires && timer_pending(timer)) return 1; - return __mod_timer(timer, expires); + return __mod_timer(timer, expires, false); } - EXPORT_SYMBOL(mod_timer); +/** + * add_timer - start a timer + * @timer: the timer to be added + * + * The kernel will do a ->function(->data) callback from the + * timer interrupt at the ->expires point in the future. The + * current time is 'jiffies'. + * + * The timer's ->expires, ->function (and if the handler uses it, ->data) + * fields must be set prior calling this function. + * + * Timers with an ->expires field in the past will be executed in the next + * timer tick. + */ +void add_timer(struct timer_list *timer) +{ + BUG_ON(timer_pending(timer)); + mod_timer(timer, timer->expires); +} +EXPORT_SYMBOL(add_timer); + +/** + * add_timer_on - start a timer on a particular CPU + * @timer: the timer to be added + * @cpu: the CPU to start it on + * + * This is not very scalable on SMP. Double adds are not possible. + */ +void add_timer_on(struct timer_list *timer, int cpu) +{ + struct tvec_base *base = per_cpu(tvec_bases, cpu); + unsigned long flags; + + timer_stats_timer_set_start_info(timer); + BUG_ON(timer_pending(timer) || !timer->function); + spin_lock_irqsave(&base->lock, flags); + timer_set_base(timer, base); + debug_timer_activate(timer); + internal_add_timer(base, timer); + /* + * Check whether the other CPU is idle and needs to be + * triggered to reevaluate the timer wheel when nohz is + * active. We are protected against the other CPU fiddling + * with the timer by holding the timer base lock. This also + * makes sure that a CPU on the way to idle can not evaluate + * the timer wheel. + */ + wake_up_idle_cpu(cpu); + spin_unlock_irqrestore(&base->lock, flags); +} + /** * del_timer - deactive a timer. * @timer: the timer to be deactivated @@ -733,7 +772,6 @@ int del_timer(struct timer_list *timer) return ret; } - EXPORT_SYMBOL(del_timer); #ifdef CONFIG_SMP @@ -767,7 +805,6 @@ out: return ret; } - EXPORT_SYMBOL(try_to_del_timer_sync); /** @@ -796,7 +833,6 @@ int del_timer_sync(struct timer_list *timer) cpu_relax(); } } - EXPORT_SYMBOL(del_timer_sync); #endif @@ -1268,7 +1304,7 @@ signed long __sched schedule_timeout(signed long timeout) expire = timeout + jiffies; setup_timer_on_stack(&timer, process_timeout, (unsigned long)current); - __mod_timer(&timer, expire); + __mod_timer(&timer, expire, false); schedule(); del_singleshot_timer_sync(&timer); -- cgit v1.2.1 From fdcedf7b75808dd72c3cc0b931be11b04d75c60a Mon Sep 17 00:00:00 2001 From: john stultz Date: Wed, 18 Feb 2009 16:02:22 -0800 Subject: time: apply NTP frequency/tick changes immediately Since the GENERIC_TIME changes landed, the adjtimex behavior changed for struct timex.tick and .freq changed. When the tick or freq value is set, we adjust the tick_length_base in ntp_update_frequency(). However, this new value doesn't get applied to tick_length until the next second (via second_overflow). This means some applications that do quick time tweaking do not see the requested change made as quickly as expected. I've run a few tests with this change, and ntpd still functions fine. Signed-off-by: John Stultz Signed-off-by: Ingo Molnar --- kernel/time/ntp.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index f5f793d92415..e1fa3689a903 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -51,6 +51,7 @@ static long ntp_tick_adj; static void ntp_update_frequency(void) { + u64 old_tick_length_base = tick_length_base; u64 second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) << NTP_SCALE_SHIFT; second_length += (s64)ntp_tick_adj << NTP_SCALE_SHIFT; @@ -60,6 +61,12 @@ static void ntp_update_frequency(void) tick_nsec = div_u64(second_length, HZ) >> NTP_SCALE_SHIFT; tick_length_base = div_u64(tick_length_base, NTP_INTERVAL_FREQ); + + /* + * Don't wait for the next second_overflow, apply + * the change to the tick length immediately + */ + tick_length += tick_length_base - old_tick_length_base; } static void ntp_update_offset(long offset) -- cgit v1.2.1 From 53bbfa9e9437e70b322368e82c723112d690e304 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 20 Feb 2008 07:58:42 +0100 Subject: time: ntp: clean up kernel/time/ntp.c Impact: cleanup, no functionality changed Make this file a bit more readable by applying a consistent coding style. No code changed: kernel/time/ntp.o: text data bss dec hex filename 2552 170 168 2890 b4a ntp.o.before 2552 170 168 2890 b4a ntp.o.after md5: eae1275df0b7d6290c13f6f6f8f05c8c ntp.o.before.asm eae1275df0b7d6290c13f6f6f8f05c8c ntp.o.after.asm Signed-off-by: Ingo Molnar --- kernel/time/ntp.c | 129 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 81 insertions(+), 48 deletions(-) (limited to 'kernel') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index e1fa3689a903..3479ec48e604 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -1,53 +1,81 @@ /* - * linux/kernel/time/ntp.c - * * NTP state machine interfaces and logic. * * This code was mainly moved from kernel/timer.c and kernel/time.c * Please see those files for relevant copyright info and historical * changelogs. */ - -#include -#include -#include -#include -#include #include -#include #include #include -#include +#include +#include +#include +#include +#include +#include /* - * Timekeeping variables + * NTP timekeeping variables: */ -unsigned long tick_usec = TICK_USEC; /* USER_HZ period (usec) */ -unsigned long tick_nsec; /* ACTHZ period (nsec) */ -u64 tick_length; -static u64 tick_length_base; -static struct hrtimer leap_timer; +/* USER_HZ period (usecs): */ +unsigned long tick_usec = TICK_USEC; + +/* ACTHZ period (nsecs): */ +unsigned long tick_nsec; -#define MAX_TICKADJ 500 /* microsecs */ -#define MAX_TICKADJ_SCALED (((u64)(MAX_TICKADJ * NSEC_PER_USEC) << \ - NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ) +u64 tick_length; +static u64 tick_length_base; + +static struct hrtimer leap_timer; + +#define MAX_TICKADJ 500 /* usecs */ +#define MAX_TICKADJ_SCALED \ + (((u64)(MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ) /* * phase-lock loop variables */ -/* TIME_ERROR prevents overwriting the CMOS clock */ -static int time_state = TIME_OK; /* clock synchronization status */ -int time_status = STA_UNSYNC; /* clock status bits */ -static long time_tai; /* TAI offset (s) */ -static s64 time_offset; /* time adjustment (ns) */ -static long time_constant = 2; /* pll time constant */ -long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ -long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ -static s64 time_freq; /* frequency offset (scaled ns/s)*/ -static long time_reftime; /* time at last adjustment (s) */ -long time_adjust; -static long ntp_tick_adj; + +/* + * clock synchronization status + * + * (TIME_ERROR prevents overwriting the CMOS clock) + */ +static int time_state = TIME_OK; + +/* clock status bits: */ +int time_status = STA_UNSYNC; + +/* TAI offset (secs): */ +static long time_tai; + +/* time adjustment (nsecs): */ +static s64 time_offset; + +/* pll time constant: */ +static long time_constant = 2; + +/* maximum error (usecs): */ +long time_maxerror = NTP_PHASE_LIMIT; + +/* estimated error (usecs): */ +long time_esterror = NTP_PHASE_LIMIT; + +/* frequency offset (scaled nsecs/secs): */ +static s64 time_freq; + +/* time at last adjustment (secs): */ +static long time_reftime; + +long time_adjust; + +static long ntp_tick_adj; + +/* + * NTP methods: + */ static void ntp_update_frequency(void) { @@ -118,15 +146,15 @@ static void ntp_update_offset(long offset) */ void ntp_clear(void) { - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; + time_adjust = 0; /* stop active adjtime() */ + time_status |= STA_UNSYNC; + time_maxerror = NTP_PHASE_LIMIT; + time_esterror = NTP_PHASE_LIMIT; ntp_update_frequency(); - tick_length = tick_length_base; - time_offset = 0; + tick_length = tick_length_base; + time_offset = 0; } /* @@ -147,8 +175,8 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) xtime.tv_sec--; wall_to_monotonic.tv_sec++; time_state = TIME_OOP; - printk(KERN_NOTICE "Clock: " - "inserting leap second 23:59:60 UTC\n"); + printk(KERN_NOTICE + "Clock: inserting leap second 23:59:60 UTC\n"); hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC); res = HRTIMER_RESTART; break; @@ -157,8 +185,8 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) time_tai--; wall_to_monotonic.tv_sec--; time_state = TIME_WAIT; - printk(KERN_NOTICE "Clock: " - "deleting leap second 23:59:59 UTC\n"); + printk(KERN_NOTICE + "Clock: deleting leap second 23:59:59 UTC\n"); break; case TIME_OOP: time_tai++; @@ -199,10 +227,10 @@ void second_overflow(void) * Compute the phase adjustment for the next second. The offset is * reduced by a fixed factor times the time constant. */ - tick_length = tick_length_base; - time_adj = shift_right(time_offset, SHIFT_PLL + time_constant); - time_offset -= time_adj; - tick_length += time_adj; + tick_length = tick_length_base; + time_adj = shift_right(time_offset, SHIFT_PLL + time_constant); + time_offset -= time_adj; + tick_length += time_adj; if (unlikely(time_adjust)) { if (time_adjust > MAX_TICKADJ) { @@ -240,12 +268,13 @@ static void sync_cmos_clock(struct work_struct *work) * This code is run on a timer. If the clock is set, that timer * may not expire at the correct time. Thus, we adjust... */ - if (!ntp_synced()) + if (!ntp_synced()) { /* * Not synced, exit, do not restart a timer (if one is * running, let it run out). */ return; + } getnstimeofday(&now); if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec / 2) @@ -277,7 +306,8 @@ static void notify_cmos_timer(void) static inline void notify_cmos_timer(void) { } #endif -/* adjtimex mainly allows reading (and writing, if superuser) of +/* + * adjtimex mainly allows reading (and writing, if superuser) of * kernel time-keeping variables. used by xntpd. */ int do_adjtimex(struct timex *txc) @@ -298,7 +328,10 @@ int do_adjtimex(struct timex *txc) if (txc->modes && !capable(CAP_SYS_TIME)) return -EPERM; - /* if the quartz is off by more than 10% something is VERY wrong! */ + /* + * if the quartz is off by more than 10% then + * something is VERY wrong! + */ if (txc->modes & ADJ_TICK && (txc->tick < 900000/USER_HZ || txc->tick > 1100000/USER_HZ)) -- cgit v1.2.1 From 3c972c2444dcb7088999c32b8c5a7ab3b8a6c0b6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 22 Feb 2009 12:06:57 +0100 Subject: time: ntp: simplify the second_overflow() code flow Impact: cleanup, no functionality changed Instead of a hierarchy of conditions, transform them to clean gradual conditions and return's. This makes the flow easier to read and makes the purpose of the function easier to understand. kernel/time/ntp.o: text data bss dec hex filename 2552 170 168 2890 b4a ntp.o.before 2552 170 168 2890 b4a ntp.o.after md5: eae1275df0b7d6290c13f6f6f8f05c8c ntp.o.before.asm eae1275df0b7d6290c13f6f6f8f05c8c ntp.o.after.asm Signed-off-by: Ingo Molnar --- kernel/time/ntp.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) (limited to 'kernel') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 3479ec48e604..1fa6615b317a 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -232,19 +232,24 @@ void second_overflow(void) time_offset -= time_adj; tick_length += time_adj; - if (unlikely(time_adjust)) { - if (time_adjust > MAX_TICKADJ) { - time_adjust -= MAX_TICKADJ; - tick_length += MAX_TICKADJ_SCALED; - } else if (time_adjust < -MAX_TICKADJ) { - time_adjust += MAX_TICKADJ; - tick_length -= MAX_TICKADJ_SCALED; - } else { - tick_length += (s64)(time_adjust * NSEC_PER_USEC / - NTP_INTERVAL_FREQ) << NTP_SCALE_SHIFT; - time_adjust = 0; - } + if (!time_adjust) + return; + + if (time_adjust > MAX_TICKADJ) { + time_adjust -= MAX_TICKADJ; + tick_length += MAX_TICKADJ_SCALED; + return; } + + if (time_adjust < -MAX_TICKADJ) { + time_adjust += MAX_TICKADJ; + tick_length -= MAX_TICKADJ_SCALED; + return; + } + + tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ) + << NTP_SCALE_SHIFT; + time_adjust = 0; } #ifdef CONFIG_GENERIC_CMOS_UPDATE -- cgit v1.2.1 From bbd1267690bb6940d0722dd33e929442c0409c01 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 22 Feb 2009 12:11:11 +0100 Subject: time: ntp: simplify the MAX_TICKADJ_SCALED definition Impact: cleanup, no functionality changed There's an ugly u64 typecase in the MAX_TICKADJ_SCALED definition, this can be eliminated by making the MAX_TICKADJ constant's type 64-bit (signed). kernel/time/ntp.o: text data bss dec hex filename 2504 114 136 2754 ac2 ntp.o.before 2504 114 136 2754 ac2 ntp.o.after md5: 41f3009debc9b397d7394dd77d912f0a ntp.o.before.asm 41f3009debc9b397d7394dd77d912f0a ntp.o.after.asm Signed-off-by: Ingo Molnar --- kernel/time/ntp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 1fa6615b317a..2b758c935c65 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -30,9 +30,9 @@ static u64 tick_length_base; static struct hrtimer leap_timer; -#define MAX_TICKADJ 500 /* usecs */ +#define MAX_TICKADJ 500LL /* usecs */ #define MAX_TICKADJ_SCALED \ - (((u64)(MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ) + (((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ) /* * phase-lock loop variables -- cgit v1.2.1 From 9ce616aaefcb9309cb9c49a36310ebda6061b98b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 22 Feb 2009 12:42:59 +0100 Subject: time: ntp: clean up ntp_update_frequency() Impact: cleanup, no functionality changed Prepare a refactoring of ntp_update_frequency(). kernel/time/ntp.o: text data bss dec hex filename 2504 114 136 2754 ac2 ntp.o.before 2504 114 136 2754 ac2 ntp.o.after md5: 41f3009debc9b397d7394dd77d912f0a ntp.o.before.asm 41f3009debc9b397d7394dd77d912f0a ntp.o.after.asm Signed-off-by: Ingo Molnar --- kernel/time/ntp.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 2b758c935c65..7d281d9fbe30 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -77,24 +77,33 @@ static long ntp_tick_adj; * NTP methods: */ +/* + * Update (tick_length, tick_length_base, tick_nsec), based + * on (tick_usec, ntp_tick_adj, time_freq): + */ static void ntp_update_frequency(void) { - u64 old_tick_length_base = tick_length_base; - u64 second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) - << NTP_SCALE_SHIFT; - second_length += (s64)ntp_tick_adj << NTP_SCALE_SHIFT; - second_length += time_freq; + u64 prev_base; + u64 second_length; + + prev_base = tick_length_base; + + second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) + << NTP_SCALE_SHIFT; + + second_length += (s64)ntp_tick_adj << NTP_SCALE_SHIFT; + second_length += time_freq; - tick_length_base = second_length; + tick_length_base = second_length; - tick_nsec = div_u64(second_length, HZ) >> NTP_SCALE_SHIFT; - tick_length_base = div_u64(tick_length_base, NTP_INTERVAL_FREQ); + tick_nsec = div_u64(second_length, HZ) >> NTP_SCALE_SHIFT; + tick_length_base = div_u64(tick_length_base, NTP_INTERVAL_FREQ); /* * Don't wait for the next second_overflow, apply * the change to the tick length immediately */ - tick_length += tick_length_base - old_tick_length_base; + tick_length += tick_length_base - prev_base; } static void ntp_update_offset(long offset) -- cgit v1.2.1 From bc26c31d446bc9c24cd6f7003777a05fe268ae48 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 22 Feb 2009 12:17:36 +0100 Subject: time: ntp: refactor up ntp_update_frequency() Impact: cleanup, no functionality changed Change ntp_update_frequency() from a hard to follow code flow that uses global variables as temporaries, to a clean input+output flow. Signed-off-by: Ingo Molnar --- kernel/time/ntp.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 7d281d9fbe30..f1abad738579 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -83,10 +83,8 @@ static long ntp_tick_adj; */ static void ntp_update_frequency(void) { - u64 prev_base; u64 second_length; - - prev_base = tick_length_base; + u64 new_base; second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) << NTP_SCALE_SHIFT; @@ -94,16 +92,15 @@ static void ntp_update_frequency(void) second_length += (s64)ntp_tick_adj << NTP_SCALE_SHIFT; second_length += time_freq; - tick_length_base = second_length; - tick_nsec = div_u64(second_length, HZ) >> NTP_SCALE_SHIFT; - tick_length_base = div_u64(tick_length_base, NTP_INTERVAL_FREQ); + new_base = div_u64(second_length, NTP_INTERVAL_FREQ); /* * Don't wait for the next second_overflow, apply - * the change to the tick length immediately + * the change to the tick length immediately: */ - tick_length += tick_length_base - prev_base; + tick_length += new_base - tick_length_base; + tick_length_base = new_base; } static void ntp_update_offset(long offset) -- cgit v1.2.1 From f939890b6687e05c42361655fb6610fa08f5a601 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 22 Feb 2009 12:57:49 +0100 Subject: time: ntp: refactor and clean up ntp_update_offset() Impact: cleanup, no functionality changed - introduce the ntp_update_offset_fll() helper - clean up the flow and variable naming kernel/time/ntp.o: text data bss dec hex filename 2504 114 136 2754 ac2 ntp.o.before 2504 114 136 2754 ac2 ntp.o.after md5: 01f7b8e1a5472a3056f9e4ae84d46315 ntp.o.before.asm 01f7b8e1a5472a3056f9e4ae84d46315 ntp.o.after.asm Signed-off-by: Ingo Molnar --- kernel/time/ntp.c | 44 ++++++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index f1abad738579..ee437e1445d1 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -103,10 +103,27 @@ static void ntp_update_frequency(void) tick_length_base = new_base; } +static inline s64 ntp_update_offset_fll(s64 freq_adj, s64 offset64, long secs) +{ + time_status &= ~STA_MODE; + + if (secs < MINSEC) + return freq_adj; + + if (!(time_status & STA_FLL) && (secs <= MAXSEC)) + return freq_adj; + + freq_adj += div_s64(offset64 << (NTP_SCALE_SHIFT - SHIFT_FLL), secs); + time_status |= STA_MODE; + + return freq_adj; +} + static void ntp_update_offset(long offset) { - long mtemp; s64 freq_adj; + s64 offset64; + long secs; if (!(time_status & STA_PLL)) return; @@ -127,22 +144,21 @@ static void ntp_update_offset(long offset) */ if (time_status & STA_FREQHOLD || time_reftime == 0) time_reftime = xtime.tv_sec; - mtemp = xtime.tv_sec - time_reftime; + + secs = xtime.tv_sec - time_reftime; time_reftime = xtime.tv_sec; - freq_adj = (s64)offset * mtemp; - freq_adj <<= NTP_SCALE_SHIFT - 2 * (SHIFT_PLL + 2 + time_constant); - time_status &= ~STA_MODE; - if (mtemp >= MINSEC && (time_status & STA_FLL || mtemp > MAXSEC)) { - freq_adj += div_s64((s64)offset << (NTP_SCALE_SHIFT - SHIFT_FLL), - mtemp); - time_status |= STA_MODE; - } - freq_adj += time_freq; - freq_adj = min(freq_adj, MAXFREQ_SCALED); - time_freq = max(freq_adj, -MAXFREQ_SCALED); + offset64 = offset; + freq_adj = (offset64 * secs) << + (NTP_SCALE_SHIFT - 2 * (SHIFT_PLL + 2 + time_constant)); + + freq_adj = ntp_update_offset_fll(freq_adj, offset64, secs); + + freq_adj = min(freq_adj + time_freq, MAXFREQ_SCALED); + + time_freq = max(freq_adj, -MAXFREQ_SCALED); - time_offset = div_s64((s64)offset << NTP_SCALE_SHIFT, NTP_INTERVAL_FREQ); + time_offset = div_s64(offset64 << NTP_SCALE_SHIFT, NTP_INTERVAL_FREQ); } /** -- cgit v1.2.1 From 478b7aab1682246a3d1e76e27a0aecb2f0013379 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 22 Feb 2009 13:22:23 +0100 Subject: time: ntp: simplify ntp_update_offset_fll() Impact: cleanup, no functionality changed Change ntp_update_offset_fll() to delta logic instead of absolute value logic. This eliminates 'freq_adj' from the function. Signed-off-by: Ingo Molnar --- kernel/time/ntp.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index ee437e1445d1..5202dde2f0af 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -103,20 +103,19 @@ static void ntp_update_frequency(void) tick_length_base = new_base; } -static inline s64 ntp_update_offset_fll(s64 freq_adj, s64 offset64, long secs) +static inline s64 ntp_update_offset_fll(s64 offset64, long secs) { time_status &= ~STA_MODE; if (secs < MINSEC) - return freq_adj; + return 0; if (!(time_status & STA_FLL) && (secs <= MAXSEC)) - return freq_adj; + return 0; - freq_adj += div_s64(offset64 << (NTP_SCALE_SHIFT - SHIFT_FLL), secs); time_status |= STA_MODE; - return freq_adj; + return div_s64(offset64 << (NTP_SCALE_SHIFT - SHIFT_FLL), secs); } static void ntp_update_offset(long offset) @@ -152,7 +151,7 @@ static void ntp_update_offset(long offset) freq_adj = (offset64 * secs) << (NTP_SCALE_SHIFT - 2 * (SHIFT_PLL + 2 + time_constant)); - freq_adj = ntp_update_offset_fll(freq_adj, offset64, secs); + freq_adj += ntp_update_offset_fll(offset64, secs); freq_adj = min(freq_adj + time_freq, MAXFREQ_SCALED); -- cgit v1.2.1 From c7986acba211e8285e14c9603fb89e6f4ea0b9f8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 22 Feb 2009 13:29:09 +0100 Subject: time: ntp: micro-optimize ntp_update_offset() Impact: cleanup, no functionality changed The time_reftime update in ntp_update_offset() to xtime.tv_sec is a convoluted way of saying that we want to freeze the frequency and want the 'secs' delta to be 0. Also make this branch unlikely. This shaves off 8 bytes from the code size: text data bss dec hex filename 2504 114 136 2754 ac2 ntp.o.before 2496 114 136 2746 aba ntp.o.after Signed-off-by: Ingo Molnar --- kernel/time/ntp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 5202dde2f0af..580a35028693 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -141,10 +141,10 @@ static void ntp_update_offset(long offset) * Select how the frequency is to be controlled * and in which mode (PLL or FLL). */ - if (time_status & STA_FREQHOLD || time_reftime == 0) - time_reftime = xtime.tv_sec; - secs = xtime.tv_sec - time_reftime; + if (unlikely(time_status & STA_FREQHOLD || time_reftime == 0)) + secs = 0; + time_reftime = xtime.tv_sec; offset64 = offset; -- cgit v1.2.1 From 10dd31a7a17254d6ba793305fc590455393e610e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 22 Feb 2009 13:38:40 +0100 Subject: time: ntp: fix bug in ntp_update_offset() & do_adjtimex() Impact: change (fix) the way the NTP PLL seconds offset is initialized/tracked Fix a bug and do a micro-optimization: When PLL is enabled we do not reset time_reftime. If the PLL was off for a long time (for example after bootup), this is arguably the wrong thing to do. We already had a hack for the common boot-time case in ntp_update_offset(), in form of: if (unlikely(time_status & STA_FREQHOLD || time_reftime == 0)) secs = 0; But the update delta should be reset later on too - not just when the PLL is enabled for the first time after bootup. So do it on !STA_PLL -> STA_PLL transitions. This changes behavior, as previously if ntpd was disabled for a long time and we restarted it, we'd run from that last update, with a very large delta. Signed-off-by: Ingo Molnar --- kernel/time/ntp.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 580a35028693..fc08eb10ced4 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -142,7 +142,7 @@ static void ntp_update_offset(long offset) * and in which mode (PLL or FLL). */ secs = xtime.tv_sec - time_reftime; - if (unlikely(time_status & STA_FREQHOLD || time_reftime == 0)) + if (unlikely(time_status & STA_FREQHOLD)) secs = 0; time_reftime = xtime.tv_sec; @@ -394,6 +394,13 @@ int do_adjtimex(struct timex *txc) } /* only set allowed bits */ time_status &= STA_RONLY; + /* + * If we turn on PLL adjustments then reset the + * reference time to current time. + */ + if (!(time_status & STA_PLL) && (txc->status & STA_PLL)) + time_reftime = xtime.tv_sec; + time_status |= txc->status & ~STA_RONLY; switch (time_state) { -- cgit v1.2.1 From 80f2257116474ceed5fccab510b4f7245c0f49d7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 22 Feb 2009 15:15:32 +0100 Subject: time: ntp: refactor do_adjtimex() Impact: cleanup, no functionality changed do_adjtimex() is currently a monster function with a maze of branches. Refactor the txc->modes setting aspects of it into two new helper functions: process_adj_status() process_adjtimex_modes() kernel/time/ntp.o: text data bss dec hex filename 2512 114 136 2762 aca ntp.o.before 2512 114 136 2762 aca ntp.o.after Signed-off-by: Ingo Molnar --- kernel/time/ntp.c | 182 +++++++++++++++++++++++++++++------------------------- 1 file changed, 99 insertions(+), 83 deletions(-) (limited to 'kernel') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index fc08eb10ced4..aded09be98cc 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -332,6 +332,102 @@ static void notify_cmos_timer(void) static inline void notify_cmos_timer(void) { } #endif + +/* + * Propagate a new txc->status value into the NTP state: + */ +static inline void process_adj_status(struct timex *txc, struct timespec *ts) +{ + long now; + + if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) { + time_state = TIME_OK; + time_status = STA_UNSYNC; + } + /* only set allowed bits */ + time_status &= STA_RONLY; + + /* + * If we turn on PLL adjustments then reset the + * reference time to current time. + */ + if (!(time_status & STA_PLL) && (txc->status & STA_PLL)) + time_reftime = xtime.tv_sec; + + time_status |= txc->status & ~STA_RONLY; + + switch (time_state) { + case TIME_OK: + start_timer: + now = ts->tv_sec; + if (time_status & STA_INS) { + time_state = TIME_INS; + now += 86400 - now % 86400; + hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS); + } else if (time_status & STA_DEL) { + time_state = TIME_DEL; + now += 86400 - (now + 1) % 86400; + hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS); + } + break; + case TIME_INS: + case TIME_DEL: + time_state = TIME_OK; + goto start_timer; + case TIME_WAIT: + if (!(time_status & (STA_INS | STA_DEL))) + time_state = TIME_OK; + break; + case TIME_OOP: + hrtimer_restart(&leap_timer); + break; + } +} +/* + * Called with the xtime lock held, so we can access and modify + * all the global NTP state: + */ +static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts) +{ + if (txc->modes & ADJ_STATUS) + process_adj_status(txc, ts); + + if (txc->modes & ADJ_NANO) + time_status |= STA_NANO; + if (txc->modes & ADJ_MICRO) + time_status &= ~STA_NANO; + + if (txc->modes & ADJ_FREQUENCY) { + time_freq = (s64)txc->freq * PPM_SCALE; + time_freq = min(time_freq, MAXFREQ_SCALED); + time_freq = max(time_freq, -MAXFREQ_SCALED); + } + + if (txc->modes & ADJ_MAXERROR) + time_maxerror = txc->maxerror; + if (txc->modes & ADJ_ESTERROR) + time_esterror = txc->esterror; + + if (txc->modes & ADJ_TIMECONST) { + time_constant = txc->constant; + if (!(time_status & STA_NANO)) + time_constant += 4; + time_constant = min(time_constant, (long)MAXTC); + time_constant = max(time_constant, 0l); + } + + if (txc->modes & ADJ_TAI && txc->constant > 0) + time_tai = txc->constant; + + if (txc->modes & ADJ_OFFSET) + ntp_update_offset(txc->offset); + if (txc->modes & ADJ_TICK) + tick_usec = txc->tick; + + if (txc->modes & (ADJ_TICK|ADJ_FREQUENCY|ADJ_OFFSET)) + ntp_update_frequency(); +} + /* * adjtimex mainly allows reading (and writing, if superuser) of * kernel time-keeping variables. used by xntpd. @@ -383,90 +479,10 @@ int do_adjtimex(struct timex *txc) txc->offset = save_adjust; goto adj_done; } - if (txc->modes) { - long sec; - - if (txc->modes & ADJ_STATUS) { - if ((time_status & STA_PLL) && - !(txc->status & STA_PLL)) { - time_state = TIME_OK; - time_status = STA_UNSYNC; - } - /* only set allowed bits */ - time_status &= STA_RONLY; - /* - * If we turn on PLL adjustments then reset the - * reference time to current time. - */ - if (!(time_status & STA_PLL) && (txc->status & STA_PLL)) - time_reftime = xtime.tv_sec; - - time_status |= txc->status & ~STA_RONLY; - - switch (time_state) { - case TIME_OK: - start_timer: - sec = ts.tv_sec; - if (time_status & STA_INS) { - time_state = TIME_INS; - sec += 86400 - sec % 86400; - hrtimer_start(&leap_timer, ktime_set(sec, 0), HRTIMER_MODE_ABS); - } else if (time_status & STA_DEL) { - time_state = TIME_DEL; - sec += 86400 - (sec + 1) % 86400; - hrtimer_start(&leap_timer, ktime_set(sec, 0), HRTIMER_MODE_ABS); - } - break; - case TIME_INS: - case TIME_DEL: - time_state = TIME_OK; - goto start_timer; - break; - case TIME_WAIT: - if (!(time_status & (STA_INS | STA_DEL))) - time_state = TIME_OK; - break; - case TIME_OOP: - hrtimer_restart(&leap_timer); - break; - } - } - - if (txc->modes & ADJ_NANO) - time_status |= STA_NANO; - if (txc->modes & ADJ_MICRO) - time_status &= ~STA_NANO; - if (txc->modes & ADJ_FREQUENCY) { - time_freq = (s64)txc->freq * PPM_SCALE; - time_freq = min(time_freq, MAXFREQ_SCALED); - time_freq = max(time_freq, -MAXFREQ_SCALED); - } - - if (txc->modes & ADJ_MAXERROR) - time_maxerror = txc->maxerror; - if (txc->modes & ADJ_ESTERROR) - time_esterror = txc->esterror; - - if (txc->modes & ADJ_TIMECONST) { - time_constant = txc->constant; - if (!(time_status & STA_NANO)) - time_constant += 4; - time_constant = min(time_constant, (long)MAXTC); - time_constant = max(time_constant, 0l); - } - - if (txc->modes & ADJ_TAI && txc->constant > 0) - time_tai = txc->constant; - - if (txc->modes & ADJ_OFFSET) - ntp_update_offset(txc->offset); - if (txc->modes & ADJ_TICK) - tick_usec = txc->tick; - - if (txc->modes & (ADJ_TICK|ADJ_FREQUENCY|ADJ_OFFSET)) - ntp_update_frequency(); - } + /* If there are input parameters, then process them: */ + if (txc->modes) + process_adjtimex_modes(txc, &ts); txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ, NTP_SCALE_SHIFT); -- cgit v1.2.1 From e96291653b2e4df02f160b574070f6e632868e5e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 22 Feb 2009 15:35:18 +0100 Subject: time: ntp: refactor do_adjtimex() some more Impact: cleanup, no functionality changed Further simplify do_adjtimex(): - introduce the ntp_start_leap_timer() helper function - eliminate the goto adj_done complication Signed-off-by: Ingo Molnar --- kernel/time/ntp.c | 61 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 25 deletions(-) (limited to 'kernel') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index aded09be98cc..4346ed6e623f 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -332,14 +332,33 @@ static void notify_cmos_timer(void) static inline void notify_cmos_timer(void) { } #endif +/* + * Start the leap seconds timer: + */ +static inline void ntp_start_leap_timer(struct timespec *ts) +{ + long now = ts->tv_sec; + + if (time_status & STA_INS) { + time_state = TIME_INS; + now += 86400 - now % 86400; + hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS); + + return; + } + + if (time_status & STA_DEL) { + time_state = TIME_DEL; + now += 86400 - (now + 1) % 86400; + hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS); + } +} /* * Propagate a new txc->status value into the NTP state: */ static inline void process_adj_status(struct timex *txc, struct timespec *ts) { - long now; - if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) { time_state = TIME_OK; time_status = STA_UNSYNC; @@ -358,22 +377,12 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts) switch (time_state) { case TIME_OK: - start_timer: - now = ts->tv_sec; - if (time_status & STA_INS) { - time_state = TIME_INS; - now += 86400 - now % 86400; - hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS); - } else if (time_status & STA_DEL) { - time_state = TIME_DEL; - now += 86400 - (now + 1) % 86400; - hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS); - } + ntp_start_leap_timer(ts); break; case TIME_INS: case TIME_DEL: time_state = TIME_OK; - goto start_timer; + ntp_start_leap_timer(ts); case TIME_WAIT: if (!(time_status & (STA_INS | STA_DEL))) time_state = TIME_OK; @@ -394,6 +403,7 @@ static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts if (txc->modes & ADJ_NANO) time_status |= STA_NANO; + if (txc->modes & ADJ_MICRO) time_status &= ~STA_NANO; @@ -405,6 +415,7 @@ static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts if (txc->modes & ADJ_MAXERROR) time_maxerror = txc->maxerror; + if (txc->modes & ADJ_ESTERROR) time_esterror = txc->esterror; @@ -421,6 +432,7 @@ static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts if (txc->modes & ADJ_OFFSET) ntp_update_offset(txc->offset); + if (txc->modes & ADJ_TICK) tick_usec = txc->tick; @@ -457,7 +469,7 @@ int do_adjtimex(struct timex *txc) if (txc->modes & ADJ_TICK && (txc->tick < 900000/USER_HZ || txc->tick > 1100000/USER_HZ)) - return -EINVAL; + return -EINVAL; if (txc->modes & ADJ_STATUS && time_state != TIME_OK) hrtimer_cancel(&leap_timer); @@ -467,7 +479,6 @@ int do_adjtimex(struct timex *txc) write_seqlock_irq(&xtime_lock); - /* If there are input parameters, then process them */ if (txc->modes & ADJ_ADJTIME) { long save_adjust = time_adjust; @@ -477,19 +488,18 @@ int do_adjtimex(struct timex *txc) ntp_update_frequency(); } txc->offset = save_adjust; - goto adj_done; - } + } else { - /* If there are input parameters, then process them: */ - if (txc->modes) - process_adjtimex_modes(txc, &ts); + /* If there are input parameters, then process them: */ + if (txc->modes) + process_adjtimex_modes(txc, &ts); - txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ, + txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ, NTP_SCALE_SHIFT); - if (!(time_status & STA_NANO)) - txc->offset /= NSEC_PER_USEC; + if (!(time_status & STA_NANO)) + txc->offset /= NSEC_PER_USEC; + } -adj_done: result = time_state; /* mostly `TIME_OK' */ if (time_status & (STA_UNSYNC|STA_CLOCKERR)) result = TIME_ERROR; @@ -514,6 +524,7 @@ adj_done: txc->calcnt = 0; txc->errcnt = 0; txc->stbcnt = 0; + write_sequnlock_irq(&xtime_lock); txc->time.tv_sec = ts.tv_sec; -- cgit v1.2.1 From 2b9d1496e7835a603c340e8f0dd81f4b74d5f248 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 22 Feb 2009 15:48:43 +0100 Subject: time: ntp: make 64-bit constants more robust Impact: cleanup, no functionality changed - make PPM_SCALE an explicit s64 constant, to remove (s64) casts from usage sites. kernel/time/ntp.o: text data bss dec hex filename 2536 114 136 2786 ae2 ntp.o.before 2536 114 136 2786 ae2 ntp.o.after md5: 40a7728d1188aa18e83e21a81fa7b150 ntp.o.before.asm 40a7728d1188aa18e83e21a81fa7b150 ntp.o.after.asm Signed-off-by: Ingo Molnar --- kernel/time/ntp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 4346ed6e623f..7447d57e021a 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -408,7 +408,7 @@ static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts time_status &= ~STA_NANO; if (txc->modes & ADJ_FREQUENCY) { - time_freq = (s64)txc->freq * PPM_SCALE; + time_freq = txc->freq * PPM_SCALE; time_freq = min(time_freq, MAXFREQ_SCALED); time_freq = max(time_freq, -MAXFREQ_SCALED); } @@ -505,7 +505,7 @@ int do_adjtimex(struct timex *txc) result = TIME_ERROR; txc->freq = shift_right((time_freq >> PPM_SCALE_INV_SHIFT) * - (s64)PPM_SCALE_INV, NTP_SCALE_SHIFT); + PPM_SCALE_INV, NTP_SCALE_SHIFT); txc->maxerror = time_maxerror; txc->esterror = time_esterror; txc->status = time_status; -- cgit v1.2.1 From 069569e025706f27f939785f86a94d5d8ce55dce Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 22 Feb 2009 16:03:37 +0100 Subject: time: ntp: simplify ntp_tick_adj calculations Impact: micro-optimization Convert the (internal) ntp_tick_adj value we store from unscaled units to scaled units. This is a constant that we never modify, so scaling it up once during bootup is enough - we dont have to do it for every adjustment step. Signed-off-by: Ingo Molnar --- kernel/time/ntp.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 7447d57e021a..a3fe7ef2d83b 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -71,7 +71,8 @@ static long time_reftime; long time_adjust; -static long ntp_tick_adj; +/* constant (boot-param configurable) NTP tick adjustment (upscaled) */ +static s64 ntp_tick_adj; /* * NTP methods: @@ -89,7 +90,7 @@ static void ntp_update_frequency(void) second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) << NTP_SCALE_SHIFT; - second_length += (s64)ntp_tick_adj << NTP_SCALE_SHIFT; + second_length += ntp_tick_adj; second_length += time_freq; tick_nsec = div_u64(second_length, HZ) >> NTP_SCALE_SHIFT; @@ -540,6 +541,8 @@ int do_adjtimex(struct timex *txc) static int __init ntp_tick_adj_setup(char *str) { ntp_tick_adj = simple_strtol(str, NULL, 0); + ntp_tick_adj <<= NTP_SCALE_SHIFT; + return 1; } -- cgit v1.2.1 From 39854fe8c165872d743f6a0c4860ca2de8e45ac9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 22 Feb 2009 16:06:58 +0100 Subject: time: ntp: clean up second_overflow() Impact: cleanup, no functionality changed The 'time_adj' local variable is named in a very confusing way because it almost shadows the 'time_adjust' global variable - which is used in this same function. Rename it to 'delta' - to make them stand apart more clearly. kernel/time/ntp.o: text data bss dec hex filename 2545 114 144 2803 af3 ntp.o.before 2545 114 144 2803 af3 ntp.o.after md5: 1bf0b3be564512279ba7cee299d1d2be ntp.o.before.asm 1bf0b3be564512279ba7cee299d1d2be ntp.o.after.asm Signed-off-by: Ingo Molnar --- kernel/time/ntp.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index a3fe7ef2d83b..c74eb7d9d854 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -236,7 +236,7 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) */ void second_overflow(void) { - s64 time_adj; + s64 delta; /* Bump the maxerror field */ time_maxerror += MAXFREQ / NSEC_PER_USEC; @@ -249,10 +249,11 @@ void second_overflow(void) * Compute the phase adjustment for the next second. The offset is * reduced by a fixed factor times the time constant. */ - tick_length = tick_length_base; - time_adj = shift_right(time_offset, SHIFT_PLL + time_constant); - time_offset -= time_adj; - tick_length += time_adj; + tick_length = tick_length_base; + + delta = shift_right(time_offset, SHIFT_PLL + time_constant); + time_offset -= delta; + tick_length += delta; if (!time_adjust) return; -- cgit v1.2.1 From a2a5ac8650b570bea3cb3614f77739dcd07d6632 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 26 Feb 2009 09:46:14 -0800 Subject: time: ntp: fix bug in ntp_update_offset() & do_adjtimex(), fix The time_status conditional was accidentally placed right after we clear the checked time_status bits, which causes us to take the conditional every time through. This fixes it by moving the conditional to before we clear the time_status bits. Signed-off-by: John Stultz Cc: Clark Williams Signed-off-by: Ingo Molnar --- kernel/time/ntp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index c74eb7d9d854..7fc64375ff43 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -365,8 +365,6 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts) time_state = TIME_OK; time_status = STA_UNSYNC; } - /* only set allowed bits */ - time_status &= STA_RONLY; /* * If we turn on PLL adjustments then reset the @@ -375,6 +373,8 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts) if (!(time_status & STA_PLL) && (txc->status & STA_PLL)) time_reftime = xtime.tv_sec; + /* only set allowed bits */ + time_status &= STA_RONLY; time_status |= txc->status & ~STA_RONLY; switch (time_state) { -- cgit v1.2.1 From 37bebc70d7ad4144c571d74500db3bb26ec0c0eb Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 23 Mar 2009 20:34:11 +0100 Subject: posix timers: fix RLIMIT_CPU && fork() See http://bugzilla.kernel.org/show_bug.cgi?id=12911 copy_signal() copies signal->rlim, but RLIMIT_CPU is "lost". Because posix_cpu_timers_init_group() sets cputime_expires.prof_exp = 0 and thus fastpath_timer_check() returns false unless we have other cpu timers. This is the minimal fix for 2.6.29 (tested) and 2.6.28. The patch is not optimal, we need further cleanups here. With this patch update_rlimit_cpu() is not really needed, but I don't think it should be removed. The proper fix (I think) is: - set_process_cpu_timer() should just start the cputimer->running logic (it does), no need to change cputime_expires.xxx_exp - posix_cpu_timers_init_group() should set ->running when needed - fastpath_timer_check() can check ->running instead of task_cputime_zero(signal->cputime_expires) Reported-by: Peter Lojkin Signed-off-by: Oleg Nesterov Cc: Peter Zijlstra Cc: Roland McGrath Cc: [for 2.6.29.x] LKML-Reference: <20090323193411.GA17514@redhat.com> Signed-off-by: Ingo Molnar --- kernel/posix-cpu-timers.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index e976e505648d..8e5d9a68b022 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1370,7 +1370,8 @@ static inline int fastpath_timer_check(struct task_struct *tsk) if (task_cputime_expired(&group_sample, &sig->cputime_expires)) return 1; } - return 0; + + return sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY; } /* -- cgit v1.2.1