From d3b8f889a220aed825accc28eb64ce283a0d51ac Mon Sep 17 00:00:00 2001 From: john stultz Date: Mon, 17 Aug 2009 16:40:47 -0700 Subject: x86: Make tsc=reliable override boot time stability checks This patch makes the tsc=reliable option disable the boot time stability checks. Currently the option only disables the runtime watchdog checks. This change allows folks who want to override the boot time TSC stability checks and use the TSC when the system would otherwise disqualify it. There still are some situations that the TSC will be disqualified, such as cpufreq scaling. But these are situations where the box will hang if allowed. Patch also includes a fix for an issue found by Thomas Gleixner, where the TSC disqualification message wouldn't be printed after a call to unsynchronized_tsc(). Signed-off-by: John Stultz Cc: Andrew Morton Cc: akataria@vmware.com Cc: Stephen Hemminger LKML-Reference: <1250552447.7212.92.camel@localhost.localdomain> Signed-off-by: Thomas Gleixner --- arch/x86/kernel/tsc.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 71f4368b357e..648fb269e5d1 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -825,6 +825,9 @@ __cpuinit int unsynchronized_tsc(void) if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) return 0; + + if (tsc_clocksource_reliable) + return 0; /* * Intel systems are normally all synchronized. * Exceptions must mark TSC as unstable: @@ -832,10 +835,10 @@ __cpuinit int unsynchronized_tsc(void) if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { /* assume multi socket systems are not synchronized: */ if (num_possible_cpus() > 1) - tsc_unstable = 1; + return 1; } - return tsc_unstable; + return 0; } static void __init init_tsc_clocksource(void) -- cgit v1.2.1 From 08ec0c58fb8a05d3191d5cb6f5d6f81adb419798 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 27 Jul 2010 17:00:00 -0700 Subject: x86: Improve TSC calibration using a delayed workqueue Boot to boot the TSC calibration may vary by quite a large amount. While normal variance of 50-100ppm can easily be seen, the quick calibration code only requires 500ppm accuracy, which is the limit of what NTP can correct for. This can cause problems for systems being used as NTP servers, as every time they reboot it can take hours for them to calculate the new drift error caused by the calibration. The classic trade-off here is calibration accuracy vs slow boot times, as during the calibration nothing else can run. This patch uses a delayed workqueue to calibrate the TSC over the period of a second. This allows very accurate calibration (in my tests only varying by 1khz or 0.4ppm boot to boot). Additionally this refined calibration step does not block the boot process, and only delays the TSC clocksoure registration by a few seconds in early boot. If the refined calibration strays 1% from the early boot calibration value, the system will fall back to already calculated early boot calibration. Credit to Andi Kleen who suggested using a timer quite awhile back, but I dismissed it thinking the timer calibration would be done after the clocksource was registered (which would break things). Forgive me for my short-sightedness. This patch has worked very well in my testing, but TSC hardware is quite varied so it would probably be good to get some extended testing, possibly pushing inclusion out to 2.6.39. Signed-off-by: John Stultz LKML-Reference: <1289003985-29060-1-git-send-email-johnstul@us.ibm.com> Reviewed-by: Thomas Gleixner CC: Thomas Gleixner CC: Ingo Molnar CC: Martin Schwidefsky CC: Clark Williams CC: Andi Kleen --- arch/x86/kernel/tsc.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 83 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index bb64beb301d9..dc1393e7cbfb 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -888,7 +888,82 @@ __cpuinit int unsynchronized_tsc(void) return 0; } -static void __init init_tsc_clocksource(void) + +static void tsc_refine_calibration_work(struct work_struct *work); +static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work); +/** + * tsc_refine_calibration_work - Further refine tsc freq calibration + * @work - ignored. + * + * This functions uses delayed work over a period of a + * second to further refine the TSC freq value. Since this is + * timer based, instead of loop based, we don't block the boot + * process while this longer calibration is done. + * + * If there are any calibration anomolies (too many SMIs, etc), + * or the refined calibration is off by 1% of the fast early + * calibration, we throw out the new calibration and use the + * early calibration. + */ +static void tsc_refine_calibration_work(struct work_struct *work) +{ + static u64 tsc_start = -1, ref_start; + static int hpet; + u64 tsc_stop, ref_stop, delta; + unsigned long freq; + + /* Don't bother refining TSC on unstable systems */ + if (check_tsc_unstable()) + goto out; + + /* + * Since the work is started early in boot, we may be + * delayed the first time we expire. So set the workqueue + * again once we know timers are working. + */ + if (tsc_start == -1) { + /* + * Only set hpet once, to avoid mixing hardware + * if the hpet becomes enabled later. + */ + hpet = is_hpet_enabled(); + schedule_delayed_work(&tsc_irqwork, HZ); + tsc_start = tsc_read_refs(&ref_start, hpet); + return; + } + + tsc_stop = tsc_read_refs(&ref_stop, hpet); + + /* hpet or pmtimer available ? */ + if (!hpet && !ref_start && !ref_stop) + goto out; + + /* Check, whether the sampling was disturbed by an SMI */ + if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX) + goto out; + + delta = tsc_stop - tsc_start; + delta *= 1000000LL; + if (hpet) + freq = calc_hpet_ref(delta, ref_start, ref_stop); + else + freq = calc_pmtimer_ref(delta, ref_start, ref_stop); + + /* Make sure we're within 1% */ + if (abs(tsc_khz - freq) > tsc_khz/100) + goto out; + + tsc_khz = freq; + printk(KERN_INFO "Refined TSC clocksource calibration: " + "%lu.%03lu MHz.\n", (unsigned long)tsc_khz / 1000, + (unsigned long)tsc_khz % 1000); + +out: + clocksource_register_khz(&clocksource_tsc, tsc_khz); +} + + +static int __init init_tsc_clocksource(void) { if (tsc_clocksource_reliable) clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; @@ -897,8 +972,14 @@ static void __init init_tsc_clocksource(void) clocksource_tsc.rating = 0; clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; } - clocksource_register_khz(&clocksource_tsc, tsc_khz); + schedule_delayed_work(&tsc_irqwork, 0); + return 0; } +/* + * We use device_initcall here, to ensure we run after the hpet + * is fully initialized, which may occur at fs_initcall time. + */ +device_initcall(init_tsc_clocksource); void __init tsc_init(void) { @@ -952,6 +1033,5 @@ void __init tsc_init(void) mark_tsc_unstable("TSCs unsynchronized"); check_system_tsc_reliable(); - init_tsc_clocksource(); } -- cgit v1.2.1 From a8760eca6cf60ed303ad494ef45901f63165d2c8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 13 Dec 2010 11:28:02 +0100 Subject: x86: Check tsc available/disabled in the delayed init function The delayed TSC init function does not check whether the system has no TSC or TSC is disabled at the kernel command line, which results in a crash in the work queue based extended calibration due to division by zero because the basic calibration never happened. Add the missing checks and do not touch TSC when not available or disabled. Signed-off-by: Thomas Gleixner Cc: John Stultz --- arch/x86/kernel/tsc.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index dc1393e7cbfb..356a0d455cf9 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -965,6 +965,9 @@ out: static int __init init_tsc_clocksource(void) { + if (!cpu_has_tsc || tsc_disabled > 0) + return 0; + if (tsc_clocksource_reliable) clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; /* lower the rating if we already know its unstable: */ -- cgit v1.2.1