diff options
Diffstat (limited to 'arch/i386/kernel/paravirt.c')
-rw-r--r-- | arch/i386/kernel/paravirt.c | 523 |
1 files changed, 141 insertions, 382 deletions
diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c index 2ec331e03fa9..faab09abca5e 100644 --- a/arch/i386/kernel/paravirt.c +++ b/arch/i386/kernel/paravirt.c @@ -19,7 +19,7 @@ #include <linux/module.h> #include <linux/efi.h> #include <linux/bcd.h> -#include <linux/start_kernel.h> +#include <linux/highmem.h> #include <asm/bug.h> #include <asm/paravirt.h> @@ -35,7 +35,7 @@ #include <asm/timer.h> /* nop stub */ -static void native_nop(void) +void _paravirt_nop(void) { } @@ -54,331 +54,148 @@ char *memory_setup(void) #define DEF_NATIVE(name, code) \ extern const char start_##name[], end_##name[]; \ asm("start_" #name ": " code "; end_" #name ":") -DEF_NATIVE(cli, "cli"); -DEF_NATIVE(sti, "sti"); -DEF_NATIVE(popf, "push %eax; popf"); -DEF_NATIVE(pushf, "pushf; pop %eax"); -DEF_NATIVE(pushf_cli, "pushf; pop %eax; cli"); + +DEF_NATIVE(irq_disable, "cli"); +DEF_NATIVE(irq_enable, "sti"); +DEF_NATIVE(restore_fl, "push %eax; popf"); +DEF_NATIVE(save_fl, "pushf; pop %eax"); DEF_NATIVE(iret, "iret"); -DEF_NATIVE(sti_sysexit, "sti; sysexit"); +DEF_NATIVE(irq_enable_sysexit, "sti; sysexit"); +DEF_NATIVE(read_cr2, "mov %cr2, %eax"); +DEF_NATIVE(write_cr3, "mov %eax, %cr3"); +DEF_NATIVE(read_cr3, "mov %cr3, %eax"); +DEF_NATIVE(clts, "clts"); +DEF_NATIVE(read_tsc, "rdtsc"); -static const struct native_insns -{ - const char *start, *end; -} native_insns[] = { - [PARAVIRT_IRQ_DISABLE] = { start_cli, end_cli }, - [PARAVIRT_IRQ_ENABLE] = { start_sti, end_sti }, - [PARAVIRT_RESTORE_FLAGS] = { start_popf, end_popf }, - [PARAVIRT_SAVE_FLAGS] = { start_pushf, end_pushf }, - [PARAVIRT_SAVE_FLAGS_IRQ_DISABLE] = { start_pushf_cli, end_pushf_cli }, - [PARAVIRT_INTERRUPT_RETURN] = { start_iret, end_iret }, - [PARAVIRT_STI_SYSEXIT] = { start_sti_sysexit, end_sti_sysexit }, -}; +DEF_NATIVE(ud2a, "ud2a"); static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len) { - unsigned int insn_len; - - /* Don't touch it if we don't have a replacement */ - if (type >= ARRAY_SIZE(native_insns) || !native_insns[type].start) - return len; - - insn_len = native_insns[type].end - native_insns[type].start; - - /* Similarly if we can't fit replacement. */ - if (len < insn_len) - return len; + const unsigned char *start, *end; + unsigned ret; + + switch(type) { +#define SITE(x) case PARAVIRT_PATCH(x): start = start_##x; end = end_##x; goto patch_site + SITE(irq_disable); + SITE(irq_enable); + SITE(restore_fl); + SITE(save_fl); + SITE(iret); + SITE(irq_enable_sysexit); + SITE(read_cr2); + SITE(read_cr3); + SITE(write_cr3); + SITE(clts); + SITE(read_tsc); +#undef SITE + + patch_site: + ret = paravirt_patch_insns(insns, len, start, end); + break; - memcpy(insns, native_insns[type].start, insn_len); - return insn_len; -} + case PARAVIRT_PATCH(make_pgd): + case PARAVIRT_PATCH(make_pte): + case PARAVIRT_PATCH(pgd_val): + case PARAVIRT_PATCH(pte_val): +#ifdef CONFIG_X86_PAE + case PARAVIRT_PATCH(make_pmd): + case PARAVIRT_PATCH(pmd_val): +#endif + /* These functions end up returning exactly what + they're passed, in the same registers. */ + ret = paravirt_patch_nop(); + break; -static unsigned long native_get_debugreg(int regno) -{ - unsigned long val = 0; /* Damn you, gcc! */ - - switch (regno) { - case 0: - asm("movl %%db0, %0" :"=r" (val)); break; - case 1: - asm("movl %%db1, %0" :"=r" (val)); break; - case 2: - asm("movl %%db2, %0" :"=r" (val)); break; - case 3: - asm("movl %%db3, %0" :"=r" (val)); break; - case 6: - asm("movl %%db6, %0" :"=r" (val)); break; - case 7: - asm("movl %%db7, %0" :"=r" (val)); break; default: - BUG(); - } - return val; -} - -static void native_set_debugreg(int regno, unsigned long value) -{ - switch (regno) { - case 0: - asm("movl %0,%%db0" : /* no output */ :"r" (value)); - break; - case 1: - asm("movl %0,%%db1" : /* no output */ :"r" (value)); - break; - case 2: - asm("movl %0,%%db2" : /* no output */ :"r" (value)); + ret = paravirt_patch_default(type, clobbers, insns, len); break; - case 3: - asm("movl %0,%%db3" : /* no output */ :"r" (value)); - break; - case 6: - asm("movl %0,%%db6" : /* no output */ :"r" (value)); - break; - case 7: - asm("movl %0,%%db7" : /* no output */ :"r" (value)); - break; - default: - BUG(); } -} - -void init_IRQ(void) -{ - paravirt_ops.init_IRQ(); -} - -static void native_clts(void) -{ - asm volatile ("clts"); -} - -static unsigned long native_read_cr0(void) -{ - unsigned long val; - asm volatile("movl %%cr0,%0\n\t" :"=r" (val)); - return val; -} - -static void native_write_cr0(unsigned long val) -{ - asm volatile("movl %0,%%cr0": :"r" (val)); -} - -static unsigned long native_read_cr2(void) -{ - unsigned long val; - asm volatile("movl %%cr2,%0\n\t" :"=r" (val)); - return val; -} - -static void native_write_cr2(unsigned long val) -{ - asm volatile("movl %0,%%cr2": :"r" (val)); -} - -static unsigned long native_read_cr3(void) -{ - unsigned long val; - asm volatile("movl %%cr3,%0\n\t" :"=r" (val)); - return val; -} - -static void native_write_cr3(unsigned long val) -{ - asm volatile("movl %0,%%cr3": :"r" (val)); -} - -static unsigned long native_read_cr4(void) -{ - unsigned long val; - asm volatile("movl %%cr4,%0\n\t" :"=r" (val)); - return val; -} - -static unsigned long native_read_cr4_safe(void) -{ - unsigned long val; - /* This could fault if %cr4 does not exist */ - asm("1: movl %%cr4, %0 \n" - "2: \n" - ".section __ex_table,\"a\" \n" - ".long 1b,2b \n" - ".previous \n" - : "=r" (val): "0" (0)); - return val; -} - -static void native_write_cr4(unsigned long val) -{ - asm volatile("movl %0,%%cr4": :"r" (val)); -} - -static unsigned long native_save_fl(void) -{ - unsigned long f; - asm volatile("pushfl ; popl %0":"=g" (f): /* no input */); - return f; -} - -static void native_restore_fl(unsigned long f) -{ - asm volatile("pushl %0 ; popfl": /* no output */ - :"g" (f) - :"memory", "cc"); -} - -static void native_irq_disable(void) -{ - asm volatile("cli": : :"memory"); -} - -static void native_irq_enable(void) -{ - asm volatile("sti": : :"memory"); -} - -static void native_safe_halt(void) -{ - asm volatile("sti; hlt": : :"memory"); -} -static void native_halt(void) -{ - asm volatile("hlt": : :"memory"); + return ret; } -static void native_wbinvd(void) +unsigned paravirt_patch_nop(void) { - asm volatile("wbinvd": : :"memory"); + return 0; } -static unsigned long long native_read_msr(unsigned int msr, int *err) +unsigned paravirt_patch_ignore(unsigned len) { - unsigned long long val; - - asm volatile("2: rdmsr ; xorl %0,%0\n" - "1:\n\t" - ".section .fixup,\"ax\"\n\t" - "3: movl %3,%0 ; jmp 1b\n\t" - ".previous\n\t" - ".section __ex_table,\"a\"\n" - " .align 4\n\t" - " .long 2b,3b\n\t" - ".previous" - : "=r" (*err), "=A" (val) - : "c" (msr), "i" (-EFAULT)); - - return val; + return len; } -static int native_write_msr(unsigned int msr, unsigned long long val) +unsigned paravirt_patch_call(void *target, u16 tgt_clobbers, + void *site, u16 site_clobbers, + unsigned len) { - int err; - asm volatile("2: wrmsr ; xorl %0,%0\n" - "1:\n\t" - ".section .fixup,\"ax\"\n\t" - "3: movl %4,%0 ; jmp 1b\n\t" - ".previous\n\t" - ".section __ex_table,\"a\"\n" - " .align 4\n\t" - " .long 2b,3b\n\t" - ".previous" - : "=a" (err) - : "c" (msr), "0" ((u32)val), "d" ((u32)(val>>32)), - "i" (-EFAULT)); - return err; -} + unsigned char *call = site; + unsigned long delta = (unsigned long)target - (unsigned long)(call+5); -static unsigned long long native_read_tsc(void) -{ - unsigned long long val; - asm volatile("rdtsc" : "=A" (val)); - return val; -} + if (tgt_clobbers & ~site_clobbers) + return len; /* target would clobber too much for this site */ + if (len < 5) + return len; /* call too long for patch site */ -static unsigned long long native_read_pmc(void) -{ - unsigned long long val; - asm volatile("rdpmc" : "=A" (val)); - return val; -} + *call++ = 0xe8; /* call */ + *(unsigned long *)call = delta; -static void native_load_tr_desc(void) -{ - asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); + return 5; } -static void native_load_gdt(const struct Xgt_desc_struct *dtr) +unsigned paravirt_patch_jmp(void *target, void *site, unsigned len) { - asm volatile("lgdt %0"::"m" (*dtr)); -} + unsigned char *jmp = site; + unsigned long delta = (unsigned long)target - (unsigned long)(jmp+5); -static void native_load_idt(const struct Xgt_desc_struct *dtr) -{ - asm volatile("lidt %0"::"m" (*dtr)); -} + if (len < 5) + return len; /* call too long for patch site */ -static void native_store_gdt(struct Xgt_desc_struct *dtr) -{ - asm ("sgdt %0":"=m" (*dtr)); -} + *jmp++ = 0xe9; /* jmp */ + *(unsigned long *)jmp = delta; -static void native_store_idt(struct Xgt_desc_struct *dtr) -{ - asm ("sidt %0":"=m" (*dtr)); + return 5; } -static unsigned long native_store_tr(void) +unsigned paravirt_patch_default(u8 type, u16 clobbers, void *site, unsigned len) { - unsigned long tr; - asm ("str %0":"=r" (tr)); - return tr; -} + void *opfunc = *((void **)¶virt_ops + type); + unsigned ret; -static void native_load_tls(struct thread_struct *t, unsigned int cpu) -{ -#define C(i) get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i] - C(0); C(1); C(2); -#undef C -} + if (opfunc == NULL) + /* If there's no function, patch it with a ud2a (BUG) */ + ret = paravirt_patch_insns(site, len, start_ud2a, end_ud2a); + else if (opfunc == paravirt_nop) + /* If the operation is a nop, then nop the callsite */ + ret = paravirt_patch_nop(); + else if (type == PARAVIRT_PATCH(iret) || + type == PARAVIRT_PATCH(irq_enable_sysexit)) + /* If operation requires a jmp, then jmp */ + ret = paravirt_patch_jmp(opfunc, site, len); + else + /* Otherwise call the function; assume target could + clobber any caller-save reg */ + ret = paravirt_patch_call(opfunc, CLBR_ANY, + site, clobbers, len); -static inline void native_write_dt_entry(void *dt, int entry, u32 entry_low, u32 entry_high) -{ - u32 *lp = (u32 *)((char *)dt + entry*8); - lp[0] = entry_low; - lp[1] = entry_high; + return ret; } -static void native_write_ldt_entry(void *dt, int entrynum, u32 low, u32 high) +unsigned paravirt_patch_insns(void *site, unsigned len, + const char *start, const char *end) { - native_write_dt_entry(dt, entrynum, low, high); -} + unsigned insn_len = end - start; -static void native_write_gdt_entry(void *dt, int entrynum, u32 low, u32 high) -{ - native_write_dt_entry(dt, entrynum, low, high); -} - -static void native_write_idt_entry(void *dt, int entrynum, u32 low, u32 high) -{ - native_write_dt_entry(dt, entrynum, low, high); -} + if (insn_len > len || start == NULL) + insn_len = len; + else + memcpy(site, start, insn_len); -static void native_load_esp0(struct tss_struct *tss, - struct thread_struct *thread) -{ - tss->esp0 = thread->esp0; - - /* This can only happen when SEP is enabled, no need to test "SEP"arately */ - if (unlikely(tss->ss1 != thread->sysenter_cs)) { - tss->ss1 = thread->sysenter_cs; - wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); - } + return insn_len; } -static void native_io_delay(void) +void init_IRQ(void) { - asm volatile("outb %al,$0x80"); + paravirt_ops.init_IRQ(); } static void native_flush_tlb(void) @@ -395,83 +212,11 @@ static void native_flush_tlb_global(void) __native_flush_tlb_global(); } -static void native_flush_tlb_single(u32 addr) +static void native_flush_tlb_single(unsigned long addr) { __native_flush_tlb_single(addr); } -#ifndef CONFIG_X86_PAE -static void native_set_pte(pte_t *ptep, pte_t pteval) -{ - *ptep = pteval; -} - -static void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval) -{ - *ptep = pteval; -} - -static void native_set_pmd(pmd_t *pmdp, pmd_t pmdval) -{ - *pmdp = pmdval; -} - -#else /* CONFIG_X86_PAE */ - -static void native_set_pte(pte_t *ptep, pte_t pte) -{ - ptep->pte_high = pte.pte_high; - smp_wmb(); - ptep->pte_low = pte.pte_low; -} - -static void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte) -{ - ptep->pte_high = pte.pte_high; - smp_wmb(); - ptep->pte_low = pte.pte_low; -} - -static void native_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) -{ - ptep->pte_low = 0; - smp_wmb(); - ptep->pte_high = pte.pte_high; - smp_wmb(); - ptep->pte_low = pte.pte_low; -} - -static void native_set_pte_atomic(pte_t *ptep, pte_t pteval) -{ - set_64bit((unsigned long long *)ptep,pte_val(pteval)); -} - -static void native_set_pmd(pmd_t *pmdp, pmd_t pmdval) -{ - set_64bit((unsigned long long *)pmdp,pmd_val(pmdval)); -} - -static void native_set_pud(pud_t *pudp, pud_t pudval) -{ - *pudp = pudval; -} - -static void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) -{ - ptep->pte_low = 0; - smp_wmb(); - ptep->pte_high = 0; -} - -static void native_pmd_clear(pmd_t *pmd) -{ - u32 *tmp = (u32 *)pmd; - *tmp = 0; - smp_wmb(); - *(tmp + 1) = 0; -} -#endif /* CONFIG_X86_PAE */ - /* These are in entry.S */ extern void native_iret(void); extern void native_irq_enable_sysexit(void); @@ -487,10 +232,11 @@ struct paravirt_ops paravirt_ops = { .name = "bare hardware", .paravirt_enabled = 0, .kernel_rpl = 0, + .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ .patch = native_patch, .banner = default_banner, - .arch_setup = native_nop, + .arch_setup = paravirt_nop, .memory_setup = machine_specific_memory_setup, .get_wallclock = native_get_wallclock, .set_wallclock = native_set_wallclock, @@ -517,8 +263,8 @@ struct paravirt_ops paravirt_ops = { .safe_halt = native_safe_halt, .halt = native_halt, .wbinvd = native_wbinvd, - .read_msr = native_read_msr, - .write_msr = native_write_msr, + .read_msr = native_read_msr_safe, + .write_msr = native_write_msr_safe, .read_tsc = native_read_tsc, .read_pmc = native_read_pmc, .get_scheduled_cycles = native_read_tsc, @@ -531,9 +277,9 @@ struct paravirt_ops paravirt_ops = { .store_idt = native_store_idt, .store_tr = native_store_tr, .load_tls = native_load_tls, - .write_ldt_entry = native_write_ldt_entry, - .write_gdt_entry = native_write_gdt_entry, - .write_idt_entry = native_write_idt_entry, + .write_ldt_entry = write_dt_entry, + .write_gdt_entry = write_dt_entry, + .write_idt_entry = write_dt_entry, .load_esp0 = native_load_esp0, .set_iopl_mask = native_set_iopl_mask, @@ -545,44 +291,57 @@ struct paravirt_ops paravirt_ops = { .apic_read = native_apic_read, .setup_boot_clock = setup_boot_APIC_clock, .setup_secondary_clock = setup_secondary_APIC_clock, + .startup_ipi_hook = paravirt_nop, #endif - .set_lazy_mode = (void *)native_nop, + .set_lazy_mode = paravirt_nop, + + .pagetable_setup_start = native_pagetable_setup_start, + .pagetable_setup_done = native_pagetable_setup_done, .flush_tlb_user = native_flush_tlb, .flush_tlb_kernel = native_flush_tlb_global, .flush_tlb_single = native_flush_tlb_single, + .flush_tlb_others = native_flush_tlb_others, - .map_pt_hook = (void *)native_nop, - - .alloc_pt = (void *)native_nop, - .alloc_pd = (void *)native_nop, - .alloc_pd_clone = (void *)native_nop, - .release_pt = (void *)native_nop, - .release_pd = (void *)native_nop, + .alloc_pt = paravirt_nop, + .alloc_pd = paravirt_nop, + .alloc_pd_clone = paravirt_nop, + .release_pt = paravirt_nop, + .release_pd = paravirt_nop, .set_pte = native_set_pte, .set_pte_at = native_set_pte_at, .set_pmd = native_set_pmd, - .pte_update = (void *)native_nop, - .pte_update_defer = (void *)native_nop, + .pte_update = paravirt_nop, + .pte_update_defer = paravirt_nop, + +#ifdef CONFIG_HIGHPTE + .kmap_atomic_pte = kmap_atomic, +#endif + #ifdef CONFIG_X86_PAE .set_pte_atomic = native_set_pte_atomic, .set_pte_present = native_set_pte_present, .set_pud = native_set_pud, .pte_clear = native_pte_clear, .pmd_clear = native_pmd_clear, + + .pmd_val = native_pmd_val, + .make_pmd = native_make_pmd, #endif + .pte_val = native_pte_val, + .pgd_val = native_pgd_val, + + .make_pte = native_make_pte, + .make_pgd = native_make_pgd, + .irq_enable_sysexit = native_irq_enable_sysexit, .iret = native_iret, - .startup_ipi_hook = (void *)native_nop, + .dup_mmap = paravirt_nop, + .exit_mmap = paravirt_nop, + .activate_mm = paravirt_nop, }; -/* - * NOTE: CONFIG_PARAVIRT is experimental and the paravirt_ops - * semantics are subject to change. Hence we only do this - * internal-only export of this, until it gets sorted out and - * all lowlevel CPU ops used by modules are separately exported. - */ -EXPORT_SYMBOL_GPL(paravirt_ops); +EXPORT_SYMBOL(paravirt_ops); |