diff options
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/acpi/wakeup_64.S | 30 | ||||
-rw-r--r-- | arch/x86/kernel/apic.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/apm_32.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 12 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_64.c | 7 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/ds.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/efi.c | 7 | ||||
-rw-r--r-- | arch/x86/kernel/efi_64.c | 21 | ||||
-rw-r--r-- | arch/x86/kernel/hpet.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/i387.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/kprobes.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/olpc.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/paravirt.c | 26 | ||||
-rw-r--r-- | arch/x86/kernel/process_32.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/ptrace.c | 18 | ||||
-rw-r--r-- | arch/x86/kernel/reboot.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/setup.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/time_64.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/tsc.c | 110 | ||||
-rw-r--r-- | arch/x86/kernel/vmiclock_32.c | 7 |
24 files changed, 174 insertions, 117 deletions
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S index bcc293423a70..96258d9dc974 100644 --- a/arch/x86/kernel/acpi/wakeup_64.S +++ b/arch/x86/kernel/acpi/wakeup_64.S @@ -13,7 +13,6 @@ * Hooray, we are in Long 64-bit mode (but still running in low memory) */ ENTRY(wakeup_long64) -wakeup_long64: movq saved_magic, %rax movq $0x123456789abcdef0, %rdx cmpq %rdx, %rax @@ -34,16 +33,12 @@ wakeup_long64: movq saved_rip, %rax jmp *%rax +ENDPROC(wakeup_long64) bogus_64_magic: jmp bogus_64_magic - .align 2 - .p2align 4,,15 -.globl do_suspend_lowlevel - .type do_suspend_lowlevel,@function -do_suspend_lowlevel: -.LFB5: +ENTRY(do_suspend_lowlevel) subq $8, %rsp xorl %eax, %eax call save_processor_state @@ -67,7 +62,7 @@ do_suspend_lowlevel: pushfq popq pt_regs_flags(%rax) - movq $.L97, saved_rip(%rip) + movq $resume_point, saved_rip(%rip) movq %rsp, saved_rsp movq %rbp, saved_rbp @@ -78,14 +73,12 @@ do_suspend_lowlevel: addq $8, %rsp movl $3, %edi xorl %eax, %eax - jmp acpi_enter_sleep_state -.L97: - .p2align 4,,7 -.L99: - .align 4 - movl $24, %eax - movw %ax, %ds + call acpi_enter_sleep_state + /* in case something went wrong, restore the machine status and go on */ + jmp resume_point + .align 4 +resume_point: /* We don't restore %rax, it must be 0 anyway */ movq $saved_context, %rax movq saved_context_cr4(%rax), %rbx @@ -117,12 +110,9 @@ do_suspend_lowlevel: xorl %eax, %eax addq $8, %rsp jmp restore_processor_state -.LFE5: -.Lfe5: - .size do_suspend_lowlevel, .Lfe5-do_suspend_lowlevel - +ENDPROC(do_suspend_lowlevel) + .data -ALIGN ENTRY(saved_rbp) .quad 0 ENTRY(saved_rsi) .quad 0 ENTRY(saved_rdi) .quad 0 diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 115449f869ee..570f36e44e59 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -862,7 +862,7 @@ void clear_local_APIC(void) } /* lets not touch this if we didn't frob it */ -#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL) +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) if (maxlvt >= 5) { v = apic_read(APIC_LVTTHMR); apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 98807bb095ad..266ec6c18b6c 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -1192,6 +1192,7 @@ static int suspend(int vetoable) device_suspend(PMSG_SUSPEND); local_irq_disable(); device_power_down(PMSG_SUSPEND); + sysdev_suspend(PMSG_SUSPEND); local_irq_enable(); @@ -1208,6 +1209,7 @@ static int suspend(int vetoable) if (err != APM_SUCCESS) apm_error("suspend", err); err = (err == APM_SUCCESS) ? 0 : -EIO; + sysdev_resume(); device_power_up(PMSG_RESUME); local_irq_enable(); device_resume(PMSG_RESUME); @@ -1228,6 +1230,7 @@ static void standby(void) local_irq_disable(); device_power_down(PMSG_SUSPEND); + sysdev_suspend(PMSG_SUSPEND); local_irq_enable(); err = set_system_power_state(APM_STATE_STANDBY); @@ -1235,6 +1238,7 @@ static void standby(void) apm_error("standby", err); local_irq_disable(); + sysdev_resume(); device_power_up(PMSG_RESUME); local_irq_enable(); } diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index b585e04cbc9e..3178c3acd97e 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -277,7 +277,6 @@ static struct cpufreq_driver p4clockmod_driver = { .name = "p4-clockmod", .owner = THIS_MODULE, .attr = p4clockmod_attr, - .hide_interface = 1, }; diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index fb039cd345d8..6428aa17b40e 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1157,8 +1157,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) data->cpu = pol->cpu; data->currpstate = HW_PSTATE_INVALID; - rc = powernow_k8_cpu_init_acpi(data); - if (rc) { + if (powernow_k8_cpu_init_acpi(data)) { /* * Use the PSB BIOS structure. This is only availabe on * an UP version, and is deprecated by AMD. @@ -1176,17 +1175,20 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) "ACPI maintainers and complain to your BIOS " "vendor.\n"); #endif - goto err_out; + kfree(data); + return -ENODEV; } if (pol->cpu != 0) { printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for " "CPU other than CPU0. Complain to your BIOS " "vendor.\n"); - goto err_out; + kfree(data); + return -ENODEV; } rc = find_psb_table(data); if (rc) { - goto err_out; + kfree(data); + return -ENODEV; } /* Take a crude guess here. * That guess was in microseconds, so multiply with 1000 */ diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 1c838032fd37..fe79985ce0f2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -295,11 +295,11 @@ void do_machine_check(struct pt_regs * regs, long error_code) * If we know that the error was in user space, send a * SIGBUS. Otherwise, panic if tolerance is low. * - * do_exit() takes an awful lot of locks and has a slight + * force_sig() takes an awful lot of locks and has a slight * risk of deadlocking. */ if (user_space) { - do_exit(SIGBUS); + force_sig(SIGBUS, current); } else if (panic_on_oops || tolerant < 2) { mce_panic("Uncorrected machine check", &panicm, mcestart); @@ -490,7 +490,7 @@ static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) } -static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) +static void mce_cpu_features(struct cpuinfo_x86 *c) { switch (c->x86_vendor) { case X86_VENDOR_INTEL: @@ -734,6 +734,7 @@ __setup("mce=", mcheck_enable); static int mce_resume(struct sys_device *dev) { mce_init(NULL); + mce_cpu_features(¤t_cpu_data); return 0; } diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 8ae8c4ff094d..f2ee0ae29bd6 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -121,7 +121,7 @@ static long threshold_restart_bank(void *_tr) } /* cpu init entry point, called from mce.c with preempt off */ -void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) +void mce_amd_feature_init(struct cpuinfo_x86 *c) { unsigned int bank, block; unsigned int cpu = smp_processor_id(); diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index 4b48f251fd39..f44c36624360 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -30,7 +30,7 @@ asmlinkage void smp_thermal_interrupt(void) irq_exit(); } -static void __cpuinit intel_init_thermal(struct cpuinfo_x86 *c) +static void intel_init_thermal(struct cpuinfo_x86 *c) { u32 l, h; int tm2 = 0; @@ -84,7 +84,7 @@ static void __cpuinit intel_init_thermal(struct cpuinfo_x86 *c) return; } -void __cpuinit mce_intel_feature_init(struct cpuinfo_x86 *c) +void mce_intel_feature_init(struct cpuinfo_x86 *c) { intel_init_thermal(c); } diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 169a120587be..87b67e3a765a 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -729,7 +729,7 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task, spin_unlock_irqrestore(&ds_lock, irq); - ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); + ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); ds_resume_pebs(tracer); return tracer; @@ -1029,5 +1029,4 @@ void ds_copy_thread(struct task_struct *tsk, struct task_struct *father) void ds_exit_thread(struct task_struct *tsk) { - WARN_ON(tsk->thread.ds_ctx); } diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 1119d247fe11..eb1ef3b67dd5 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c @@ -467,7 +467,7 @@ void __init efi_enter_virtual_mode(void) efi_memory_desc_t *md; efi_status_t status; unsigned long size; - u64 end, systab, addr, npages; + u64 end, systab, addr, npages, end_pfn; void *p, *va; efi.systab = NULL; @@ -479,7 +479,10 @@ void __init efi_enter_virtual_mode(void) size = md->num_pages << EFI_PAGE_SHIFT; end = md->phys_addr + size; - if (PFN_UP(end) <= max_low_pfn_mapped) + end_pfn = PFN_UP(end); + if (end_pfn <= max_low_pfn_mapped + || (end_pfn > (1UL << (32 - PAGE_SHIFT)) + && end_pfn <= max_pfn_mapped)) va = __va(md->phys_addr); else va = efi_ioremap(md->phys_addr, size); diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c index 652c5287215f..cb783b92c50c 100644 --- a/arch/x86/kernel/efi_64.c +++ b/arch/x86/kernel/efi_64.c @@ -99,24 +99,11 @@ void __init efi_call_phys_epilog(void) void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size) { - static unsigned pages_mapped __initdata; - unsigned i, pages; - unsigned long offset; + unsigned long last_map_pfn; - pages = PFN_UP(phys_addr + size) - PFN_DOWN(phys_addr); - offset = phys_addr & ~PAGE_MASK; - phys_addr &= PAGE_MASK; - - if (pages_mapped + pages > MAX_EFI_IO_PAGES) + last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); + if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) return NULL; - for (i = 0; i < pages; i++) { - __set_fixmap(FIX_EFI_IO_MAP_FIRST_PAGE - pages_mapped, - phys_addr, PAGE_KERNEL); - phys_addr += PAGE_SIZE; - pages_mapped++; - } - - return (void __iomem *)__fix_to_virt(FIX_EFI_IO_MAP_FIRST_PAGE - \ - (pages_mapped - pages)) + offset; + return (void __iomem *)__va(phys_addr); } diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 388254f69a2a..a00545fe5cdd 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -269,6 +269,8 @@ static void hpet_set_mode(enum clock_event_mode mode, now = hpet_readl(HPET_COUNTER); cmp = now + (unsigned long) delta; cfg = hpet_readl(HPET_Tn_CFG(timer)); + /* Make sure we use edge triggered interrupts */ + cfg &= ~HPET_TN_LEVEL; cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | HPET_TN_32BIT; hpet_writel(cfg, HPET_Tn_CFG(timer)); diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index b0f61f0dcd0a..f2f8540a7f3d 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -136,7 +136,7 @@ int init_fpu(struct task_struct *tsk) #ifdef CONFIG_X86_32 if (!HAVE_HWFP) { memset(tsk->thread.xstate, 0, xstate_size); - finit(); + finit_task(tsk); set_stopped_child_used_math(tsk); return 0; } diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index e948b28a5a9a..4558dd3918cf 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -193,6 +193,9 @@ static int __kprobes can_boost(kprobe_opcode_t *opcodes) kprobe_opcode_t opcode; kprobe_opcode_t *orig_opcodes = opcodes; + if (search_exception_tables(opcodes)) + return 0; /* Page fault may occur on this address. */ + retry: if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1) return 0; diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c index 7a13fac63a1f..4006c522adc7 100644 --- a/arch/x86/kernel/olpc.c +++ b/arch/x86/kernel/olpc.c @@ -203,7 +203,7 @@ static void __init platform_detect(void) static void __init platform_detect(void) { /* stopgap until OFW support is added to the kernel */ - olpc_platform_info.boardrev = 0xc2; + olpc_platform_info.boardrev = olpc_board(0xc2); } #endif diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index e4c8fb608873..c6520a4e85d4 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -268,6 +268,32 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void) return __get_cpu_var(paravirt_lazy_mode); } +void arch_flush_lazy_mmu_mode(void) +{ + preempt_disable(); + + if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { + WARN_ON(preempt_count() == 1); + arch_leave_lazy_mmu_mode(); + arch_enter_lazy_mmu_mode(); + } + + preempt_enable(); +} + +void arch_flush_lazy_cpu_mode(void) +{ + preempt_disable(); + + if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { + WARN_ON(preempt_count() == 1); + arch_leave_lazy_cpu_mode(); + arch_enter_lazy_cpu_mode(); + } + + preempt_enable(); +} + struct pv_info pv_info = { .name = "bare hardware", .paravirt_enabled = 0, diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index a546f55c77b4..bd4da2af08ae 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -104,9 +104,6 @@ void cpu_idle(void) check_pgt_cache(); rmb(); - if (rcu_pending(cpu)) - rcu_check_callbacks(cpu, 0); - if (cpu_is_offline(cpu)) play_dead(); diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 0a5df5f82fb9..06ca07f6ad86 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -810,12 +810,16 @@ static void ptrace_bts_untrace(struct task_struct *child) static void ptrace_bts_detach(struct task_struct *child) { - if (unlikely(child->bts)) { - ds_release_bts(child->bts); - child->bts = NULL; - - ptrace_bts_free_buffer(child); - } + /* + * Ptrace_detach() races with ptrace_untrace() in case + * the child dies and is reaped by another thread. + * + * We only do the memory accounting at this point and + * leave the buffer deallocation and the bts tracer + * release to ptrace_bts_untrace() which will be called + * later on with tasklist_lock held. + */ + release_locked_buffer(child->bts_buffer, child->bts_size); } #else static inline void ptrace_bts_fork(struct task_struct *tsk) {} @@ -1384,7 +1388,7 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, #ifdef CONFIG_X86_32 # define IS_IA32 1 #elif defined CONFIG_IA32_EMULATION -# define IS_IA32 test_thread_flag(TIF_IA32) +# define IS_IA32 is_compat_task() #else # define IS_IA32 0 #endif diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 2b46eb41643b..4526b3a75ed2 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -217,6 +217,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq"), }, }, + { /* Handle problems with rebooting on Dell XPS710 */ + .callback = set_bios_reboot, + .ident = "Dell XPS710", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Dell XPS710"), + }, + }, { } }; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index c461f6d69074..6a8811a69324 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -770,6 +770,9 @@ void __init setup_arch(char **cmdline_p) finish_e820_parsing(); + if (efi_enabled) + efi_init(); + dmi_scan_machine(); dmi_check_system(bad_bios_dmi_table); @@ -789,8 +792,6 @@ void __init setup_arch(char **cmdline_p) insert_resource(&iomem_resource, &data_resource); insert_resource(&iomem_resource, &bss_resource); - if (efi_enabled) - efi_init(); #ifdef CONFIG_X86_32 if (ppro_with_ram_bug()) { diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index e6e695acd725..241ec3923f61 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c @@ -115,7 +115,7 @@ unsigned long __init calibrate_cpu(void) static struct irqaction irq0 = { .handler = timer_interrupt, - .flags = IRQF_DISABLED | IRQF_IRQPOLL | IRQF_NOBALANCING, + .flags = IRQF_DISABLED | IRQF_IRQPOLL | IRQF_NOBALANCING | IRQF_TIMER, .mask = CPU_MASK_NONE, .name = "timer" }; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 7932338d7cb3..a9e7548e1790 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -99,6 +99,12 @@ static inline void preempt_conditional_sti(struct pt_regs *regs) local_irq_enable(); } +static inline void conditional_cli(struct pt_regs *regs) +{ + if (regs->flags & X86_EFLAGS_IF) + local_irq_disable(); +} + static inline void preempt_conditional_cli(struct pt_regs *regs) { if (regs->flags & X86_EFLAGS_IF) @@ -626,8 +632,10 @@ clear_dr7: #ifdef CONFIG_X86_32 debug_vm86: + /* reenable preemption: handle_vm86_trap() might sleep */ + dec_preempt_count(); handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); - preempt_conditional_cli(regs); + conditional_cli(regs); return; #endif diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 599e58168631..d5cebb52d45b 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -273,30 +273,43 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin) * use the TSC value at the transitions to calculate a pretty * good value for the TSC frequencty. */ -static inline int pit_expect_msb(unsigned char val) +static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap) { - int count = 0; + int count; + u64 tsc = 0; for (count = 0; count < 50000; count++) { /* Ignore LSB */ inb(0x42); if (inb(0x42) != val) break; + tsc = get_cycles(); } - return count > 50; + *deltap = get_cycles() - tsc; + *tscp = tsc; + + /* + * We require _some_ success, but the quality control + * will be based on the error terms on the TSC values. + */ + return count > 5; } /* - * How many MSB values do we want to see? We aim for a - * 15ms calibration, which assuming a 2us counter read - * error should give us roughly 150 ppm precision for - * the calibration. + * How many MSB values do we want to see? We aim for + * a maximum error rate of 500ppm (in practice the + * real error is much smaller), but refuse to spend + * more than 25ms on it. */ -#define QUICK_PIT_MS 15 -#define QUICK_PIT_ITERATIONS (QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256) +#define MAX_QUICK_PIT_MS 25 +#define MAX_QUICK_PIT_ITERATIONS (MAX_QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256) static unsigned long quick_pit_calibrate(void) { + int i; + u64 tsc, delta; + unsigned long d1, d2; + /* Set the Gate high, disable speaker */ outb((inb(0x61) & ~0x02) | 0x01, 0x61); @@ -315,45 +328,52 @@ static unsigned long quick_pit_calibrate(void) outb(0xff, 0x42); outb(0xff, 0x42); - if (pit_expect_msb(0xff)) { - int i; - u64 t1, t2, delta; - unsigned char expect = 0xfe; - - t1 = get_cycles(); - for (i = 0; i < QUICK_PIT_ITERATIONS; i++, expect--) { - if (!pit_expect_msb(expect)) - goto failed; + /* + * The PIT starts counting at the next edge, so we + * need to delay for a microsecond. The easiest way + * to do that is to just read back the 16-bit counter + * once from the PIT. + */ + inb(0x42); + inb(0x42); + + if (pit_expect_msb(0xff, &tsc, &d1)) { + for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) { + if (!pit_expect_msb(0xff-i, &delta, &d2)) + break; + + /* + * Iterate until the error is less than 500 ppm + */ + delta -= tsc; + if (d1+d2 < delta >> 11) + goto success; } - t2 = get_cycles(); - - /* - * Make sure we can rely on the second TSC timestamp: - */ - if (!pit_expect_msb(expect)) - goto failed; - - /* - * Ok, if we get here, then we've seen the - * MSB of the PIT decrement QUICK_PIT_ITERATIONS - * times, and each MSB had many hits, so we never - * had any sudden jumps. - * - * As a result, we can depend on there not being - * any odd delays anywhere, and the TSC reads are - * reliable. - * - * kHz = ticks / time-in-seconds / 1000; - * kHz = (t2 - t1) / (QPI * 256 / PIT_TICK_RATE) / 1000 - * kHz = ((t2 - t1) * PIT_TICK_RATE) / (QPI * 256 * 1000) - */ - delta = (t2 - t1)*PIT_TICK_RATE; - do_div(delta, QUICK_PIT_ITERATIONS*256*1000); - printk("Fast TSC calibration using PIT\n"); - return delta; } -failed: + printk("Fast TSC calibration failed\n"); return 0; + +success: + /* + * Ok, if we get here, then we've seen the + * MSB of the PIT decrement 'i' times, and the + * error has shrunk to less than 500 ppm. + * + * As a result, we can depend on there not being + * any odd delays anywhere, and the TSC reads are + * reliable (within the error). We also adjust the + * delta to the middle of the error bars, just + * because it looks nicer. + * + * kHz = ticks / time-in-seconds / 1000; + * kHz = (t2 - t1) / (I * 256 / PIT_TICK_RATE) / 1000 + * kHz = ((t2 - t1) * PIT_TICK_RATE) / (I * 256 * 1000) + */ + delta += (long)(d2 - d1)/2; + delta *= PIT_TICK_RATE; + do_div(delta, i*256*1000); + printk("Fast TSC calibration using PIT\n"); + return delta; } /** diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index c4c1f9e09402..e5b088fffa40 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c @@ -202,7 +202,7 @@ static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id) static struct irqaction vmi_clock_action = { .name = "vmi-timer", .handler = vmi_timer_interrupt, - .flags = IRQF_DISABLED | IRQF_NOBALANCING, + .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TIMER, .mask = CPU_MASK_ALL, }; @@ -283,10 +283,13 @@ void __devinit vmi_time_ap_init(void) #endif /** vmi clocksource */ +static struct clocksource clocksource_vmi; static cycle_t read_real_cycles(void) { - return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL); + cycle_t ret = (cycle_t)vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL); + return ret >= clocksource_vmi.cycle_last ? + ret : clocksource_vmi.cycle_last; } static struct clocksource clocksource_vmi = { |