From 90a0a06aa81692028864c21f981905fda46b1208 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 2 May 2007 19:27:10 +0200 Subject: [PATCH] i386: rationalize paravirt wrappers paravirt.c used to implement native versions of all low-level functions. Far cleaner is to have the native versions exposed in the headers and as inline native_XXX, and if !CONFIG_PARAVIRT, then simply #define XXX native_XXX. There are several nice side effects: 1) write_dt_entry() now takes the correct "struct Xgt_desc_struct *" not "void *". 2) load_TLS is reintroduced to the for loop, not manually unrolled with a #error in case the bounds ever change. 3) Macros become inlines, with type checking. 4) Access to the native versions is trivial for KVM, lguest, Xen and others who might want it. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: Avi Kivity Signed-off-by: Andrew Morton --- include/asm-i386/paravirt.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include/asm-i386/paravirt.h') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index e63f1e444fcf..32acebce9ae2 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -29,6 +29,7 @@ struct thread_struct; struct Xgt_desc_struct; struct tss_struct; struct mm_struct; +struct desc_struct; struct paravirt_ops { unsigned int kernel_rpl; @@ -105,14 +106,13 @@ struct paravirt_ops void (*set_ldt)(const void *desc, unsigned entries); unsigned long (*store_tr)(void); void (*load_tls)(struct thread_struct *t, unsigned int cpu); - void (*write_ldt_entry)(void *dt, int entrynum, - u32 low, u32 high); - void (*write_gdt_entry)(void *dt, int entrynum, - u32 low, u32 high); - void (*write_idt_entry)(void *dt, int entrynum, - u32 low, u32 high); - void (*load_esp0)(struct tss_struct *tss, - struct thread_struct *thread); + void (*write_ldt_entry)(struct desc_struct *, + int entrynum, u32 low, u32 high); + void (*write_gdt_entry)(struct desc_struct *, + int entrynum, u32 low, u32 high); + void (*write_idt_entry)(struct desc_struct *, + int entrynum, u32 low, u32 high); + void (*load_esp0)(struct tss_struct *tss, struct thread_struct *t); void (*set_iopl_mask)(unsigned mask); @@ -232,6 +232,7 @@ static inline void halt(void) #define get_kernel_rpl() (paravirt_ops.kernel_rpl) +/* These should all do BUG_ON(_err), but our headers are too tangled. */ #define rdmsr(msr,val1,val2) do { \ int _err; \ u64 _l = paravirt_ops.read_msr(msr,&_err); \ -- cgit v1.2.1 From 45876233605c268e929a7875081e129debe34bdc Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:13 +0200 Subject: [PATCH] i386: PARAVIRT: use paravirt_nop to consistently mark no-op operations Add a _paravirt_nop function for use as a stub for no-op operations, and paravirt_nop #defined void * version to make using it easier (since all its uses are as a void *). This is useful to allow the patcher to automatically identify noop operations so it can simply nop out the callsite. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Acked-by: Ingo Molnar [mingo] but only as a cleanup of the current open-coded (void *) casts. My problem with this is that it loses the types. Not that there is much to check for, but still, this adds some assumptions about how function calls look like --- include/asm-i386/paravirt.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/asm-i386/paravirt.h') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 32acebce9ae2..f0bdaea6235d 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -434,6 +434,9 @@ static inline void pmd_clear(pmd_t *pmdp) #define arch_leave_lazy_mmu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_NONE) #define arch_flush_lazy_mmu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_FLUSH) +void _paravirt_nop(void); +#define paravirt_nop ((void *)_paravirt_nop) + /* These all sit in the .parainstructions section to tell us what to patch. */ struct paravirt_patch { u8 *instr; /* original instructions */ -- cgit v1.2.1 From 3dc494e86d1c93afd4c66385f270899dbfae483d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:13 +0200 Subject: [PATCH] i386: PARAVIRT: Add pagetable accessors to pack and unpack pagetable entries Add a set of accessors to pack, unpack and modify page table entries (at all levels). This allows a paravirt implementation to control the contents of pgd/pmd/pte entries. For example, Xen uses this to convert the (pseudo-)physical address into a machine address when populating a pagetable entry, and converting back to pphys address when an entry is read. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Acked-by: Ingo Molnar --- include/asm-i386/paravirt.h | 52 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 12 deletions(-) (limited to 'include/asm-i386/paravirt.h') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index f0bdaea6235d..0aacb13bb929 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -2,7 +2,6 @@ #define __ASM_PARAVIRT_H /* Various instructions on x86 need to be replaced for * para-virtualization: those hooks are defined here. */ -#include #include #include @@ -25,6 +24,8 @@ #define CLBR_ANY 0x7 #ifndef __ASSEMBLY__ +#include + struct thread_struct; struct Xgt_desc_struct; struct tss_struct; @@ -55,11 +56,6 @@ struct paravirt_ops int (*set_wallclock)(unsigned long); void (*time_init)(void); - /* All the function pointers here are declared as "fastcall" - so that we get a specific register-based calling - convention. This makes it easier to implement inline - assembler replacements. */ - void (*cpuid)(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx); @@ -139,16 +135,33 @@ struct paravirt_ops void (*release_pd)(u32 pfn); void (*set_pte)(pte_t *ptep, pte_t pteval); - void (*set_pte_at)(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval); + void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval); void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); - void (*pte_update)(struct mm_struct *mm, u32 addr, pte_t *ptep); - void (*pte_update_defer)(struct mm_struct *mm, u32 addr, pte_t *ptep); + void (*pte_update)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); + void (*pte_update_defer)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); + + pte_t (*ptep_get_and_clear)(pte_t *ptep); + #ifdef CONFIG_X86_PAE void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); - void (*set_pte_present)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); + void (*set_pte_present)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); void (*set_pud)(pud_t *pudp, pud_t pudval); - void (*pte_clear)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); + void (*pte_clear)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); void (*pmd_clear)(pmd_t *pmdp); + + unsigned long long (*pte_val)(pte_t); + unsigned long long (*pmd_val)(pmd_t); + unsigned long long (*pgd_val)(pgd_t); + + pte_t (*make_pte)(unsigned long long pte); + pmd_t (*make_pmd)(unsigned long long pmd); + pgd_t (*make_pgd)(unsigned long long pgd); +#else + unsigned long (*pte_val)(pte_t); + unsigned long (*pgd_val)(pgd_t); + + pte_t (*make_pte)(unsigned long pte); + pgd_t (*make_pgd)(unsigned long pgd); #endif void (*set_lazy_mode)(int mode); @@ -219,6 +232,8 @@ static inline void __cpuid(unsigned int *eax, unsigned int *ebx, #define read_cr4_safe(x) paravirt_ops.read_cr4_safe() #define write_cr4(x) paravirt_ops.write_cr4(x) +#define raw_ptep_get_and_clear(xp) (paravirt_ops.ptep_get_and_clear(xp)) + static inline void raw_safe_halt(void) { paravirt_ops.safe_halt(); @@ -304,6 +319,17 @@ static inline void halt(void) (paravirt_ops.write_idt_entry((dt), (entry), (low), (high))) #define set_iopl_mask(mask) (paravirt_ops.set_iopl_mask(mask)) +#define __pte(x) paravirt_ops.make_pte(x) +#define __pgd(x) paravirt_ops.make_pgd(x) + +#define pte_val(x) paravirt_ops.pte_val(x) +#define pgd_val(x) paravirt_ops.pgd_val(x) + +#ifdef CONFIG_X86_PAE +#define __pmd(x) paravirt_ops.make_pmd(x) +#define pmd_val(x) paravirt_ops.pmd_val(x) +#endif + /* The paravirtualized I/O functions */ static inline void slow_down_io(void) { paravirt_ops.io_delay(); @@ -344,6 +370,7 @@ static inline void setup_secondary_clock(void) } #endif + #ifdef CONFIG_SMP static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip, unsigned long start_esp) @@ -371,7 +398,8 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) paravirt_ops.set_pte(ptep, pteval); } -static inline void set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval) +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval) { paravirt_ops.set_pte_at(mm, addr, ptep, pteval); } -- cgit v1.2.1 From b239fb2501117bf3aeb4dd6926edd855be92333d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:13 +0200 Subject: [PATCH] i386: PARAVIRT: Hooks to set up initial pagetable This patch introduces paravirt_ops hooks to control how the kernel's initial pagetable is set up. In the case of a native boot, the very early bootstrap code creates a simple non-PAE pagetable to map the kernel and physical memory. When the VM subsystem is initialized, it creates a proper pagetable which respects the PAE mode, large pages, etc. When booting under a hypervisor, there are many possibilities for what paging environment the hypervisor establishes for the guest kernel, so the constructon of the kernel's pagetable depends on the hypervisor. In the case of Xen, the hypervisor boots the kernel with a fully constructed pagetable, which is already using PAE if necessary. Also, Xen requires particular care when constructing pagetables to make sure all pagetables are always mapped read-only. In order to make this easier, kernel's initial pagetable construction has been changed to only allocate and initialize a pagetable page if there's no page already present in the pagetable. This allows the Xen paravirt backend to make a copy of the hypervisor-provided pagetable, allowing the kernel to establish any more mappings it needs while keeping the existing ones. A slightly subtle point which is worth highlighting here is that Xen requires all kernel mappings to share the same pte_t pages between all pagetables, so that updating a kernel page's mapping in one pagetable is reflected in all other pagetables. This makes it possible to allocate a page and attach it to a pagetable without having to explicitly enumerate that page's mapping in all pagetables. And: +From: "Eric W. Biederman" If we don't set the leaf page table entries it is quite possible that will inherit and incorrect page table entry from the initial boot page table setup in head.S. So we need to redo the effort here, so we pick up PSE, PGE and the like. Hypervisors like Xen require that their page tables be read-only, which is slightly incompatible with our low identity mappings, however I discussed this with Jeremy he has modified the Xen early set_pte function to avoid problems in this area. Signed-off-by: Eric W. Biederman Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Acked-by: William Irwin Cc: Ingo Molnar --- include/asm-i386/paravirt.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include/asm-i386/paravirt.h') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 0aacb13bb929..c49b44cdd8ee 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -2,10 +2,11 @@ #define __ASM_PARAVIRT_H /* Various instructions on x86 need to be replaced for * para-virtualization: those hooks are defined here. */ + +#ifdef CONFIG_PARAVIRT #include #include -#ifdef CONFIG_PARAVIRT /* These are the most performance critical ops, so we want to be able to patch * callers */ #define PARAVIRT_IRQ_DISABLE 0 @@ -50,6 +51,9 @@ struct paravirt_ops char *(*memory_setup)(void); void (*init_IRQ)(void); + void (*pagetable_setup_start)(pgd_t *pgd_base); + void (*pagetable_setup_done)(pgd_t *pgd_base); + void (*banner)(void); unsigned long (*get_wallclock)(void); @@ -370,6 +374,17 @@ static inline void setup_secondary_clock(void) } #endif +static inline void paravirt_pagetable_setup_start(pgd_t *base) +{ + if (paravirt_ops.pagetable_setup_start) + (*paravirt_ops.pagetable_setup_start)(base); +} + +static inline void paravirt_pagetable_setup_done(pgd_t *base) +{ + if (paravirt_ops.pagetable_setup_done) + (*paravirt_ops.pagetable_setup_done)(base); +} #ifdef CONFIG_SMP static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip, -- cgit v1.2.1 From 5311ab62cdc7788784971ed816ce85e926f3e994 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:13 +0200 Subject: [PATCH] i386: PARAVIRT: Allow paravirt backend to choose kernel PMD sharing Normally when running in PAE mode, the 4th PMD maps the kernel address space, which can be shared among all processes (since they all need the same kernel mappings). Xen, however, does not allow guests to have the kernel pmd shared between page tables, so parameterize pgtable.c to allow both modes of operation. There are several side-effects of this. One is that vmalloc will update the kernel address space mappings, and those updates need to be propagated into all processes if the kernel mappings are not intrinsically shared. In the non-PAE case, this is done by maintaining a pgd_list of all processes; this list is used when all process pagetables must be updated. pgd_list is threaded via otherwise unused entries in the page structure for the pgd, which means that the pgd must be page-sized for this to work. Normally the PAE pgd is only 4x64 byte entries large, but Xen requires the PAE pgd to page aligned anyway, so this patch forces the pgd to be page aligned+sized when the kernel pmd is unshared, to accomodate both these requirements. Also, since there may be several distinct kernel pmds (if the user/kernel split is below 3G), there's no point in allocating them from a slab cache; they're just allocated with get_free_page and initialized appropriately. (Of course the could be cached if there is just a single kernel pmd - which is the default with a 3G user/kernel split - but it doesn't seem worthwhile to add yet another case into this code). [ Many thanks to wli for review comments. ] Signed-off-by: Jeremy Fitzhardinge Signed-off-by: William Lee Irwin III Signed-off-by: Andi Kleen Cc: Zachary Amsden Cc: Christoph Lameter Acked-by: Ingo Molnar Signed-off-by: Andrew Morton --- include/asm-i386/paravirt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/asm-i386/paravirt.h') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index c49b44cdd8ee..f93599dc7756 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -35,6 +35,7 @@ struct desc_struct; struct paravirt_ops { unsigned int kernel_rpl; + int shared_kernel_pmd; int paravirt_enabled; const char *name; -- cgit v1.2.1 From d6dd61c831226f9cd7750885da04d360d6455101 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:14 +0200 Subject: [PATCH] x86: PARAVIRT: add hooks to intercept mm creation and destruction Add hooks to allow a paravirt implementation to track the lifetime of an mm. Paravirtualization requires three hooks, but only two are needed in common code. They are: arch_dup_mmap, which is called when a new mmap is created at fork arch_exit_mmap, which is called when the last process reference to an mm is dropped, which typically happens on exit and exec. The third hook is activate_mm, which is called from the arch-specific activate_mm() macro/function, and so doesn't need stub versions for other architectures. It's called when an mm is first used. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: linux-arch@vger.kernel.org Cc: James Bottomley Acked-by: Ingo Molnar --- include/asm-i386/paravirt.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/asm-i386/paravirt.h') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index f93599dc7756..61c03f1e0c29 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -119,6 +119,12 @@ struct paravirt_ops void (*io_delay)(void); + void (*activate_mm)(struct mm_struct *prev, + struct mm_struct *next); + void (*dup_mmap)(struct mm_struct *oldmm, + struct mm_struct *mm); + void (*exit_mmap)(struct mm_struct *mm); + #ifdef CONFIG_X86_LOCAL_APIC void (*apic_write)(unsigned long reg, unsigned long v); void (*apic_write_atomic)(unsigned long reg, unsigned long v); @@ -395,6 +401,23 @@ static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip, } #endif +static inline void paravirt_activate_mm(struct mm_struct *prev, + struct mm_struct *next) +{ + paravirt_ops.activate_mm(prev, next); +} + +static inline void arch_dup_mmap(struct mm_struct *oldmm, + struct mm_struct *mm) +{ + paravirt_ops.dup_mmap(oldmm, mm); +} + +static inline void arch_exit_mmap(struct mm_struct *mm) +{ + paravirt_ops.exit_mmap(mm); +} + #define __flush_tlb() paravirt_ops.flush_tlb_user() #define __flush_tlb_global() paravirt_ops.flush_tlb_kernel() #define __flush_tlb_single(addr) paravirt_ops.flush_tlb_single(addr) -- cgit v1.2.1 From 98de032b681d8a7532d44dfc66aa5c0c1c755a9d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:14 +0200 Subject: [PATCH] i386: PARAVIRT: rename struct paravirt_patch to paravirt_patch_site for clarity Rename struct paravirt_patch to paravirt_patch_site, so that it clearly refers to a callsite, and not the patch which may be applied to that callsite. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Rusty Russell Cc: Zachary Amsden --- include/asm-i386/paravirt.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/asm-i386/paravirt.h') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 61c03f1e0c29..b4cc2fc4031e 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -505,13 +505,16 @@ void _paravirt_nop(void); #define paravirt_nop ((void *)_paravirt_nop) /* These all sit in the .parainstructions section to tell us what to patch. */ -struct paravirt_patch { +struct paravirt_patch_site { u8 *instr; /* original instructions */ u8 instrtype; /* type of this instruction */ u8 len; /* length of original instruction */ u16 clobbers; /* what registers you may clobber */ }; +extern struct paravirt_patch_site __parainstructions[], + __parainstructions_end[]; + #define paravirt_alt(insn_string, typenum, clobber) \ "771:\n\t" insn_string "\n" "772:\n" \ ".pushsection .parainstructions,\"a\"\n" \ -- cgit v1.2.1 From d582203578a1f3d408e27bb9042e8635954cd320 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:14 +0200 Subject: [PATCH] i386: PARAVIRT: Use patch site IDs computed from offset in paravirt_ops structure Use patch type identifiers derived from the offset of the operation in the paravirt_ops structure. This avoids having to maintain a separate enum for patch site types. Also, since the identifier is derived from the offset into paravirt_ops, the offset can be derived from the identifier. This is used to remove replicated information in the various callsite macros, which has been a source of bugs in the past. This patch also drops the fused save_fl+cli operation, which doesn't really add much and makes things more complex - specifically because it breaks the 1:1 relationship between identifiers and offsets. If this operation turns out to be particularly beneficial, then the right answer is to define a new entrypoint for it. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Rusty Russell Cc: Zachary Amsden --- include/asm-i386/paravirt.h | 177 +++++++++++++++++++++++--------------------- 1 file changed, 92 insertions(+), 85 deletions(-) (limited to 'include/asm-i386/paravirt.h') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index b4cc2fc4031e..1dbc01f4ed4d 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -4,19 +4,8 @@ * para-virtualization: those hooks are defined here. */ #ifdef CONFIG_PARAVIRT -#include #include -/* These are the most performance critical ops, so we want to be able to patch - * callers */ -#define PARAVIRT_IRQ_DISABLE 0 -#define PARAVIRT_IRQ_ENABLE 1 -#define PARAVIRT_RESTORE_FLAGS 2 -#define PARAVIRT_SAVE_FLAGS 3 -#define PARAVIRT_SAVE_FLAGS_IRQ_DISABLE 4 -#define PARAVIRT_INTERRUPT_RETURN 5 -#define PARAVIRT_STI_SYSEXIT 6 - /* Bitmask of what can be clobbered: usually at least eax. */ #define CLBR_NONE 0x0 #define CLBR_EAX 0x1 @@ -191,6 +180,28 @@ struct paravirt_ops extern struct paravirt_ops paravirt_ops; +#define PARAVIRT_PATCH(x) \ + (offsetof(struct paravirt_ops, x) / sizeof(void *)) + +#define paravirt_type(type) \ + [paravirt_typenum] "i" (PARAVIRT_PATCH(type)) +#define paravirt_clobber(clobber) \ + [paravirt_clobber] "i" (clobber) + +#define PARAVIRT_CALL "call *paravirt_ops+%c[paravirt_typenum]*4;" + +#define _paravirt_alt(insn_string, type, clobber) \ + "771:\n\t" insn_string "\n" "772:\n" \ + ".pushsection .parainstructions,\"a\"\n" \ + " .long 771b\n" \ + " .byte " type "\n" \ + " .byte 772b-771b\n" \ + " .short " clobber "\n" \ + ".popsection\n" + +#define paravirt_alt(insn_string) \ + _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") + #define paravirt_enabled() (paravirt_ops.paravirt_enabled) static inline void load_esp0(struct tss_struct *tss, @@ -515,93 +526,89 @@ struct paravirt_patch_site { extern struct paravirt_patch_site __parainstructions[], __parainstructions_end[]; -#define paravirt_alt(insn_string, typenum, clobber) \ - "771:\n\t" insn_string "\n" "772:\n" \ - ".pushsection .parainstructions,\"a\"\n" \ - " .long 771b\n" \ - " .byte " __stringify(typenum) "\n" \ - " .byte 772b-771b\n" \ - " .short " __stringify(clobber) "\n" \ - ".popsection" - static inline unsigned long __raw_local_save_flags(void) { unsigned long f; - __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;" - "call *%1;" - "popl %%edx; popl %%ecx", - PARAVIRT_SAVE_FLAGS, CLBR_NONE) - : "=a"(f): "m"(paravirt_ops.save_fl) - : "memory", "cc"); + asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;" + PARAVIRT_CALL + "popl %%edx; popl %%ecx") + : "=a"(f) + : paravirt_type(save_fl), + paravirt_clobber(CLBR_NONE) + : "memory", "cc"); return f; } static inline void raw_local_irq_restore(unsigned long f) { - __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;" - "call *%1;" - "popl %%edx; popl %%ecx", - PARAVIRT_RESTORE_FLAGS, CLBR_EAX) - : "=a"(f) : "m" (paravirt_ops.restore_fl), "0"(f) - : "memory", "cc"); + asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;" + PARAVIRT_CALL + "popl %%edx; popl %%ecx") + : "=a"(f) + : "0"(f), + paravirt_type(restore_fl), + paravirt_clobber(CLBR_EAX) + : "memory", "cc"); } static inline void raw_local_irq_disable(void) { - __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;" - "call *%0;" - "popl %%edx; popl %%ecx", - PARAVIRT_IRQ_DISABLE, CLBR_EAX) - : : "m" (paravirt_ops.irq_disable) - : "memory", "eax", "cc"); + asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;" + PARAVIRT_CALL + "popl %%edx; popl %%ecx") + : + : paravirt_type(irq_disable), + paravirt_clobber(CLBR_EAX) + : "memory", "eax", "cc"); } static inline void raw_local_irq_enable(void) { - __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;" - "call *%0;" - "popl %%edx; popl %%ecx", - PARAVIRT_IRQ_ENABLE, CLBR_EAX) - : : "m" (paravirt_ops.irq_enable) - : "memory", "eax", "cc"); + asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;" + PARAVIRT_CALL + "popl %%edx; popl %%ecx") + : + : paravirt_type(irq_enable), + paravirt_clobber(CLBR_EAX) + : "memory", "eax", "cc"); } static inline unsigned long __raw_local_irq_save(void) { unsigned long f; - __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;" - "call *%1; pushl %%eax;" - "call *%2; popl %%eax;" - "popl %%edx; popl %%ecx", - PARAVIRT_SAVE_FLAGS_IRQ_DISABLE, - CLBR_NONE) - : "=a"(f) - : "m" (paravirt_ops.save_fl), - "m" (paravirt_ops.irq_disable) - : "memory", "cc"); + f = __raw_local_save_flags(); + raw_local_irq_disable(); return f; } -#define CLI_STRING paravirt_alt("pushl %%ecx; pushl %%edx;" \ - "call *paravirt_ops+%c[irq_disable];" \ - "popl %%edx; popl %%ecx", \ - PARAVIRT_IRQ_DISABLE, CLBR_EAX) +#define CLI_STRING \ + _paravirt_alt("pushl %%ecx; pushl %%edx;" \ + "call *paravirt_ops+%c[paravirt_cli_type]*4;" \ + "popl %%edx; popl %%ecx", \ + "%c[paravirt_cli_type]", "%c[paravirt_clobber]") + +#define STI_STRING \ + _paravirt_alt("pushl %%ecx; pushl %%edx;" \ + "call *paravirt_ops+%c[paravirt_sti_type]*4;" \ + "popl %%edx; popl %%ecx", \ + "%c[paravirt_sti_type]", "%c[paravirt_clobber]") -#define STI_STRING paravirt_alt("pushl %%ecx; pushl %%edx;" \ - "call *paravirt_ops+%c[irq_enable];" \ - "popl %%edx; popl %%ecx", \ - PARAVIRT_IRQ_ENABLE, CLBR_EAX) #define CLI_STI_CLOBBERS , "%eax" -#define CLI_STI_INPUT_ARGS \ +#define CLI_STI_INPUT_ARGS \ , \ - [irq_disable] "i" (offsetof(struct paravirt_ops, irq_disable)), \ - [irq_enable] "i" (offsetof(struct paravirt_ops, irq_enable)) + [paravirt_cli_type] "i" (PARAVIRT_PATCH(irq_disable)), \ + [paravirt_sti_type] "i" (PARAVIRT_PATCH(irq_enable)), \ + paravirt_clobber(CLBR_EAX) + +#undef PARAVIRT_CALL #else /* __ASSEMBLY__ */ -#define PARA_PATCH(ptype, clobbers, ops) \ +#define PARA_PATCH(off) ((off) / 4) + +#define PARA_SITE(ptype, clobbers, ops) \ 771:; \ ops; \ 772:; \ @@ -612,25 +619,25 @@ static inline unsigned long __raw_local_irq_save(void) .short clobbers; \ .popsection -#define INTERRUPT_RETURN \ - PARA_PATCH(PARAVIRT_INTERRUPT_RETURN, CLBR_ANY, \ - jmp *%cs:paravirt_ops+PARAVIRT_iret) - -#define DISABLE_INTERRUPTS(clobbers) \ - PARA_PATCH(PARAVIRT_IRQ_DISABLE, clobbers, \ - pushl %ecx; pushl %edx; \ - call *paravirt_ops+PARAVIRT_irq_disable; \ - popl %edx; popl %ecx) \ - -#define ENABLE_INTERRUPTS(clobbers) \ - PARA_PATCH(PARAVIRT_IRQ_ENABLE, clobbers, \ - pushl %ecx; pushl %edx; \ - call *%cs:paravirt_ops+PARAVIRT_irq_enable; \ - popl %edx; popl %ecx) - -#define ENABLE_INTERRUPTS_SYSEXIT \ - PARA_PATCH(PARAVIRT_STI_SYSEXIT, CLBR_ANY, \ - jmp *%cs:paravirt_ops+PARAVIRT_irq_enable_sysexit) +#define INTERRUPT_RETURN \ + PARA_SITE(PARA_PATCH(PARAVIRT_iret), CLBR_ANY, \ + jmp *%cs:paravirt_ops+PARAVIRT_iret) + +#define DISABLE_INTERRUPTS(clobbers) \ + PARA_SITE(PARA_PATCH(PARAVIRT_irq_disable), clobbers, \ + pushl %ecx; pushl %edx; \ + call *%cs:paravirt_ops+PARAVIRT_irq_disable; \ + popl %edx; popl %ecx) \ + +#define ENABLE_INTERRUPTS(clobbers) \ + PARA_SITE(PARA_PATCH(PARAVIRT_irq_enable), clobbers, \ + pushl %ecx; pushl %edx; \ + call *%cs:paravirt_ops+PARAVIRT_irq_enable; \ + popl %edx; popl %ecx) + +#define ENABLE_INTERRUPTS_SYSEXIT \ + PARA_SITE(PARA_PATCH(PARAVIRT_irq_enable_sysexit), CLBR_ANY, \ + jmp *%cs:paravirt_ops+PARAVIRT_irq_enable_sysexit) #define GET_CR0_INTO_EAX \ call *paravirt_ops+PARAVIRT_read_cr0 -- cgit v1.2.1 From 42c24fa22e86365055fc931d833f26165e687c19 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:14 +0200 Subject: [PATCH] i386: PARAVIRT: Fix patch site clobbers to include return register Fix a few clobbers to include the return register. The clobbers set is the set of all registers modified (or may be modified) by the code snippet, regardless of whether it was deliberate or accidental. Also, make sure that callsites which are used in contexts which don't allow clobbers actually save and restore all clobberable registers. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Rusty Russell Cc: Zachary Amsden --- include/asm-i386/paravirt.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'include/asm-i386/paravirt.h') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 1dbc01f4ed4d..87fd4317bee9 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -535,7 +535,7 @@ static inline unsigned long __raw_local_save_flags(void) "popl %%edx; popl %%ecx") : "=a"(f) : paravirt_type(save_fl), - paravirt_clobber(CLBR_NONE) + paravirt_clobber(CLBR_EAX) : "memory", "cc"); return f; } @@ -620,27 +620,29 @@ static inline unsigned long __raw_local_irq_save(void) .popsection #define INTERRUPT_RETURN \ - PARA_SITE(PARA_PATCH(PARAVIRT_iret), CLBR_ANY, \ + PARA_SITE(PARA_PATCH(PARAVIRT_iret), CLBR_NONE, \ jmp *%cs:paravirt_ops+PARAVIRT_iret) #define DISABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(PARAVIRT_irq_disable), clobbers, \ - pushl %ecx; pushl %edx; \ + pushl %eax; pushl %ecx; pushl %edx; \ call *%cs:paravirt_ops+PARAVIRT_irq_disable; \ - popl %edx; popl %ecx) \ + popl %edx; popl %ecx; popl %eax) \ #define ENABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(PARAVIRT_irq_enable), clobbers, \ - pushl %ecx; pushl %edx; \ + pushl %eax; pushl %ecx; pushl %edx; \ call *%cs:paravirt_ops+PARAVIRT_irq_enable; \ - popl %edx; popl %ecx) + popl %edx; popl %ecx; popl %eax) #define ENABLE_INTERRUPTS_SYSEXIT \ - PARA_SITE(PARA_PATCH(PARAVIRT_irq_enable_sysexit), CLBR_ANY, \ + PARA_SITE(PARA_PATCH(PARAVIRT_irq_enable_sysexit), CLBR_NONE, \ jmp *%cs:paravirt_ops+PARAVIRT_irq_enable_sysexit) #define GET_CR0_INTO_EAX \ - call *paravirt_ops+PARAVIRT_read_cr0 + push %ecx; push %edx; \ + call *paravirt_ops+PARAVIRT_read_cr0; \ + pop %edx; pop %ecx #endif /* __ASSEMBLY__ */ #endif /* CONFIG_PARAVIRT */ -- cgit v1.2.1 From f8822f42019eceed19cc6c0f985a489e17796ed8 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:14 +0200 Subject: [PATCH] i386: PARAVIRT: Consistently wrap paravirt ops callsites to make them patchable Wrap a set of interesting paravirt_ops calls in a wrapper which makes the callsites available for patching. Unfortunately this is pretty ugly because there's no way to get gcc to generate a function call, but also wrap just the callsite itself with the necessary labels. This patch supports functions with 0-4 arguments, and either void or returning a value. 64-bit arguments must be split into a pair of 32-bit arguments (lower word first). Small structures are returned in registers. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Rusty Russell Cc: Zachary Amsden Cc: Anthony Liguori --- include/asm-i386/paravirt.h | 686 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 560 insertions(+), 126 deletions(-) (limited to 'include/asm-i386/paravirt.h') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 87fd4317bee9..837457b42dbe 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -124,7 +124,7 @@ struct paravirt_ops void (*flush_tlb_user)(void); void (*flush_tlb_kernel)(void); - void (*flush_tlb_single)(u32 addr); + void (*flush_tlb_single)(unsigned long addr); void (*map_pt_hook)(int type, pte_t *va, u32 pfn); @@ -188,7 +188,7 @@ extern struct paravirt_ops paravirt_ops; #define paravirt_clobber(clobber) \ [paravirt_clobber] "i" (clobber) -#define PARAVIRT_CALL "call *paravirt_ops+%c[paravirt_typenum]*4;" +#define PARAVIRT_CALL "call *(paravirt_ops+%c[paravirt_typenum]*4);" #define _paravirt_alt(insn_string, type, clobber) \ "771:\n\t" insn_string "\n" "772:\n" \ @@ -199,26 +199,234 @@ extern struct paravirt_ops paravirt_ops; " .short " clobber "\n" \ ".popsection\n" -#define paravirt_alt(insn_string) \ +#define paravirt_alt(insn_string) \ _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") -#define paravirt_enabled() (paravirt_ops.paravirt_enabled) +#define PVOP_CALL0(__rettype, __op) \ + ({ \ + __rettype __ret; \ + if (sizeof(__rettype) > sizeof(unsigned long)) { \ + unsigned long long __tmp; \ + unsigned long __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=A" (__tmp), "=c" (__ecx) \ + : paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + __ret = (__rettype)__tmp; \ + } else { \ + unsigned long __tmp, __edx, __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=a" (__tmp), "=d" (__edx), \ + "=c" (__ecx) \ + : paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + __ret = (__rettype)__tmp; \ + } \ + __ret; \ + }) +#define PVOP_VCALL0(__op) \ + ({ \ + unsigned long __eax, __edx, __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \ + : paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + }) + +#define PVOP_CALL1(__rettype, __op, arg1) \ + ({ \ + __rettype __ret; \ + if (sizeof(__rettype) > sizeof(unsigned long)) { \ + unsigned long long __tmp; \ + unsigned long __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=A" (__tmp), "=c" (__ecx) \ + : "a" ((u32)(arg1)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + __ret = (__rettype)__tmp; \ + } else { \ + unsigned long __tmp, __edx, __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=a" (__tmp), "=d" (__edx), \ + "=c" (__ecx) \ + : "0" ((u32)(arg1)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + __ret = (__rettype)__tmp; \ + } \ + __ret; \ + }) +#define PVOP_VCALL1(__op, arg1) \ + ({ \ + unsigned long __eax, __edx, __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \ + : "0" ((u32)(arg1)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + }) + +#define PVOP_CALL2(__rettype, __op, arg1, arg2) \ + ({ \ + __rettype __ret; \ + if (sizeof(__rettype) > sizeof(unsigned long)) { \ + unsigned long long __tmp; \ + unsigned long __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=A" (__tmp), "=c" (__ecx) \ + : "a" ((u32)(arg1)), \ + "d" ((u32)(arg2)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + __ret = (__rettype)__tmp; \ + } else { \ + unsigned long __tmp, __edx, __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=a" (__tmp), "=d" (__edx), \ + "=c" (__ecx) \ + : "0" ((u32)(arg1)), \ + "1" ((u32)(arg2)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + __ret = (__rettype)__tmp; \ + } \ + __ret; \ + }) +#define PVOP_VCALL2(__op, arg1, arg2) \ + ({ \ + unsigned long __eax, __edx, __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \ + : "0" ((u32)(arg1)), \ + "1" ((u32)(arg2)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + }) + +#define PVOP_CALL3(__rettype, __op, arg1, arg2, arg3) \ + ({ \ + __rettype __ret; \ + if (sizeof(__rettype) > sizeof(unsigned long)) { \ + unsigned long long __tmp; \ + unsigned long __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=A" (__tmp), "=c" (__ecx) \ + : "a" ((u32)(arg1)), \ + "d" ((u32)(arg2)), \ + "1" ((u32)(arg3)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + __ret = (__rettype)__tmp; \ + } else { \ + unsigned long __tmp, __edx, __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=a" (__tmp), "=d" (__edx), \ + "=c" (__ecx) \ + : "0" ((u32)(arg1)), \ + "1" ((u32)(arg2)), \ + "2" ((u32)(arg3)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + __ret = (__rettype)__tmp; \ + } \ + __ret; \ + }) +#define PVOP_VCALL3(__op, arg1, arg2, arg3) \ + ({ \ + unsigned long __eax, __edx, __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \ + : "0" ((u32)(arg1)), \ + "1" ((u32)(arg2)), \ + "2" ((u32)(arg3)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + }) + +#define PVOP_CALL4(__rettype, __op, arg1, arg2, arg3, arg4) \ + ({ \ + __rettype __ret; \ + if (sizeof(__rettype) > sizeof(unsigned long)) { \ + unsigned long long __tmp; \ + unsigned long __ecx; \ + asm volatile("push %[_arg4]; " \ + paravirt_alt(PARAVIRT_CALL) \ + "lea 4(%%esp),%%esp" \ + : "=A" (__tmp), "=c" (__ecx) \ + : "a" ((u32)(arg1)), \ + "d" ((u32)(arg2)), \ + "1" ((u32)(arg3)), \ + [_arg4] "mr" ((u32)(arg4)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc",); \ + __ret = (__rettype)__tmp; \ + } else { \ + unsigned long __tmp, __edx, __ecx; \ + asm volatile("push %[_arg4]; " \ + paravirt_alt(PARAVIRT_CALL) \ + "lea 4(%%esp),%%esp" \ + : "=a" (__tmp), "=d" (__edx), "=c" (__ecx) \ + : "0" ((u32)(arg1)), \ + "1" ((u32)(arg2)), \ + "2" ((u32)(arg3)), \ + [_arg4]"mr" ((u32)(arg4)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + __ret = (__rettype)__tmp; \ + } \ + __ret; \ + }) +#define PVOP_VCALL4(__op, arg1, arg2, arg3, arg4) \ + ({ \ + unsigned long __eax, __edx, __ecx; \ + asm volatile("push %[_arg4]; " \ + paravirt_alt(PARAVIRT_CALL) \ + "lea 4(%%esp),%%esp" \ + : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \ + : "0" ((u32)(arg1)), \ + "1" ((u32)(arg2)), \ + "2" ((u32)(arg3)), \ + [_arg4]"mr" ((u32)(arg4)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + }) + +static inline int paravirt_enabled(void) +{ + return paravirt_ops.paravirt_enabled; +} static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread) { - paravirt_ops.load_esp0(tss, thread); + PVOP_VCALL2(load_esp0, tss, thread); } #define ARCH_SETUP paravirt_ops.arch_setup(); static inline unsigned long get_wallclock(void) { - return paravirt_ops.get_wallclock(); + return PVOP_CALL0(unsigned long, get_wallclock); } static inline int set_wallclock(unsigned long nowtime) { - return paravirt_ops.set_wallclock(nowtime); + return PVOP_CALL1(int, set_wallclock, nowtime); } static inline void (*choose_time_init(void))(void) @@ -230,127 +438,208 @@ static inline void (*choose_time_init(void))(void) static inline void __cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { - paravirt_ops.cpuid(eax, ebx, ecx, edx); + PVOP_VCALL4(cpuid, eax, ebx, ecx, edx); } /* * These special macros can be used to get or set a debugging register */ -#define get_debugreg(var, reg) var = paravirt_ops.get_debugreg(reg) -#define set_debugreg(val, reg) paravirt_ops.set_debugreg(reg, val) +static inline unsigned long paravirt_get_debugreg(int reg) +{ + return PVOP_CALL1(unsigned long, get_debugreg, reg); +} +#define get_debugreg(var, reg) var = paravirt_get_debugreg(reg) +static inline void set_debugreg(unsigned long val, int reg) +{ + PVOP_VCALL2(set_debugreg, reg, val); +} -#define clts() paravirt_ops.clts() +static inline void clts(void) +{ + PVOP_VCALL0(clts); +} -#define read_cr0() paravirt_ops.read_cr0() -#define write_cr0(x) paravirt_ops.write_cr0(x) +static inline unsigned long read_cr0(void) +{ + return PVOP_CALL0(unsigned long, read_cr0); +} -#define read_cr2() paravirt_ops.read_cr2() -#define write_cr2(x) paravirt_ops.write_cr2(x) +static inline void write_cr0(unsigned long x) +{ + PVOP_VCALL1(write_cr0, x); +} + +static inline unsigned long read_cr2(void) +{ + return PVOP_CALL0(unsigned long, read_cr2); +} + +static inline void write_cr2(unsigned long x) +{ + PVOP_VCALL1(write_cr2, x); +} + +static inline unsigned long read_cr3(void) +{ + return PVOP_CALL0(unsigned long, read_cr3); +} -#define read_cr3() paravirt_ops.read_cr3() -#define write_cr3(x) paravirt_ops.write_cr3(x) +static inline void write_cr3(unsigned long x) +{ + PVOP_VCALL1(write_cr3, x); +} -#define read_cr4() paravirt_ops.read_cr4() -#define read_cr4_safe(x) paravirt_ops.read_cr4_safe() -#define write_cr4(x) paravirt_ops.write_cr4(x) +static inline unsigned long read_cr4(void) +{ + return PVOP_CALL0(unsigned long, read_cr4); +} +static inline unsigned long read_cr4_safe(void) +{ + return PVOP_CALL0(unsigned long, read_cr4_safe); +} -#define raw_ptep_get_and_clear(xp) (paravirt_ops.ptep_get_and_clear(xp)) +static inline void write_cr4(unsigned long x) +{ + PVOP_VCALL1(write_cr4, x); +} static inline void raw_safe_halt(void) { - paravirt_ops.safe_halt(); + PVOP_VCALL0(safe_halt); } static inline void halt(void) { - paravirt_ops.safe_halt(); + PVOP_VCALL0(safe_halt); +} + +static inline void wbinvd(void) +{ + PVOP_VCALL0(wbinvd); } -#define wbinvd() paravirt_ops.wbinvd() #define get_kernel_rpl() (paravirt_ops.kernel_rpl) +static inline u64 paravirt_read_msr(unsigned msr, int *err) +{ + return PVOP_CALL2(u64, read_msr, msr, err); +} +static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high) +{ + return PVOP_CALL3(int, write_msr, msr, low, high); +} + /* These should all do BUG_ON(_err), but our headers are too tangled. */ -#define rdmsr(msr,val1,val2) do { \ - int _err; \ - u64 _l = paravirt_ops.read_msr(msr,&_err); \ - val1 = (u32)_l; \ - val2 = _l >> 32; \ +#define rdmsr(msr,val1,val2) do { \ + int _err; \ + u64 _l = paravirt_read_msr(msr, &_err); \ + val1 = (u32)_l; \ + val2 = _l >> 32; \ } while(0) -#define wrmsr(msr,val1,val2) do { \ - u64 _l = ((u64)(val2) << 32) | (val1); \ - paravirt_ops.write_msr((msr), _l); \ +#define wrmsr(msr,val1,val2) do { \ + paravirt_write_msr(msr, val1, val2); \ } while(0) -#define rdmsrl(msr,val) do { \ - int _err; \ - val = paravirt_ops.read_msr((msr),&_err); \ +#define rdmsrl(msr,val) do { \ + int _err; \ + val = paravirt_read_msr(msr, &_err); \ } while(0) -#define wrmsrl(msr,val) (paravirt_ops.write_msr((msr),(val))) -#define wrmsr_safe(msr,a,b) ({ \ - u64 _l = ((u64)(b) << 32) | (a); \ - paravirt_ops.write_msr((msr),_l); \ -}) +#define wrmsrl(msr,val) ((void)paravirt_write_msr(msr, val, 0)) +#define wrmsr_safe(msr,a,b) paravirt_write_msr(msr, a, b) /* rdmsr with exception handling */ -#define rdmsr_safe(msr,a,b) ({ \ - int _err; \ - u64 _l = paravirt_ops.read_msr(msr,&_err); \ - (*a) = (u32)_l; \ - (*b) = _l >> 32; \ +#define rdmsr_safe(msr,a,b) ({ \ + int _err; \ + u64 _l = paravirt_read_msr(msr, &_err); \ + (*a) = (u32)_l; \ + (*b) = _l >> 32; \ _err; }) -#define rdtsc(low,high) do { \ - u64 _l = paravirt_ops.read_tsc(); \ - low = (u32)_l; \ - high = _l >> 32; \ + +static inline u64 paravirt_read_tsc(void) +{ + return PVOP_CALL0(u64, read_tsc); +} +#define rdtsc(low,high) do { \ + u64 _l = paravirt_read_tsc(); \ + low = (u32)_l; \ + high = _l >> 32; \ } while(0) -#define rdtscl(low) do { \ - u64 _l = paravirt_ops.read_tsc(); \ - low = (int)_l; \ +#define rdtscl(low) do { \ + u64 _l = paravirt_read_tsc(); \ + low = (int)_l; \ } while(0) -#define rdtscll(val) (val = paravirt_ops.read_tsc()) +#define rdtscll(val) (val = paravirt_read_tsc()) #define get_scheduled_cycles(val) (val = paravirt_ops.get_scheduled_cycles()) #define calculate_cpu_khz() (paravirt_ops.get_cpu_khz()) #define write_tsc(val1,val2) wrmsr(0x10, val1, val2) -#define rdpmc(counter,low,high) do { \ - u64 _l = paravirt_ops.read_pmc(); \ - low = (u32)_l; \ - high = _l >> 32; \ -} while(0) +static inline unsigned long long paravirt_read_pmc(int counter) +{ + return PVOP_CALL1(u64, read_pmc, counter); +} -#define load_TR_desc() (paravirt_ops.load_tr_desc()) -#define load_gdt(dtr) (paravirt_ops.load_gdt(dtr)) -#define load_idt(dtr) (paravirt_ops.load_idt(dtr)) -#define set_ldt(addr, entries) (paravirt_ops.set_ldt((addr), (entries))) -#define store_gdt(dtr) (paravirt_ops.store_gdt(dtr)) -#define store_idt(dtr) (paravirt_ops.store_idt(dtr)) -#define store_tr(tr) ((tr) = paravirt_ops.store_tr()) -#define load_TLS(t,cpu) (paravirt_ops.load_tls((t),(cpu))) -#define write_ldt_entry(dt, entry, low, high) \ - (paravirt_ops.write_ldt_entry((dt), (entry), (low), (high))) -#define write_gdt_entry(dt, entry, low, high) \ - (paravirt_ops.write_gdt_entry((dt), (entry), (low), (high))) -#define write_idt_entry(dt, entry, low, high) \ - (paravirt_ops.write_idt_entry((dt), (entry), (low), (high))) -#define set_iopl_mask(mask) (paravirt_ops.set_iopl_mask(mask)) - -#define __pte(x) paravirt_ops.make_pte(x) -#define __pgd(x) paravirt_ops.make_pgd(x) - -#define pte_val(x) paravirt_ops.pte_val(x) -#define pgd_val(x) paravirt_ops.pgd_val(x) +#define rdpmc(counter,low,high) do { \ + u64 _l = paravirt_read_pmc(counter); \ + low = (u32)_l; \ + high = _l >> 32; \ +} while(0) -#ifdef CONFIG_X86_PAE -#define __pmd(x) paravirt_ops.make_pmd(x) -#define pmd_val(x) paravirt_ops.pmd_val(x) -#endif +static inline void load_TR_desc(void) +{ + PVOP_VCALL0(load_tr_desc); +} +static inline void load_gdt(const struct Xgt_desc_struct *dtr) +{ + PVOP_VCALL1(load_gdt, dtr); +} +static inline void load_idt(const struct Xgt_desc_struct *dtr) +{ + PVOP_VCALL1(load_idt, dtr); +} +static inline void set_ldt(const void *addr, unsigned entries) +{ + PVOP_VCALL2(set_ldt, addr, entries); +} +static inline void store_gdt(struct Xgt_desc_struct *dtr) +{ + PVOP_VCALL1(store_gdt, dtr); +} +static inline void store_idt(struct Xgt_desc_struct *dtr) +{ + PVOP_VCALL1(store_idt, dtr); +} +static inline unsigned long paravirt_store_tr(void) +{ + return PVOP_CALL0(unsigned long, store_tr); +} +#define store_tr(tr) ((tr) = paravirt_store_tr()) +static inline void load_TLS(struct thread_struct *t, unsigned cpu) +{ + PVOP_VCALL2(load_tls, t, cpu); +} +static inline void write_ldt_entry(void *dt, int entry, u32 low, u32 high) +{ + PVOP_VCALL4(write_ldt_entry, dt, entry, low, high); +} +static inline void write_gdt_entry(void *dt, int entry, u32 low, u32 high) +{ + PVOP_VCALL4(write_gdt_entry, dt, entry, low, high); +} +static inline void write_idt_entry(void *dt, int entry, u32 low, u32 high) +{ + PVOP_VCALL4(write_idt_entry, dt, entry, low, high); +} +static inline void set_iopl_mask(unsigned mask) +{ + PVOP_VCALL1(set_iopl_mask, mask); +} /* The paravirtualized I/O functions */ static inline void slow_down_io(void) { @@ -368,27 +657,27 @@ static inline void slow_down_io(void) { */ static inline void apic_write(unsigned long reg, unsigned long v) { - paravirt_ops.apic_write(reg,v); + PVOP_VCALL2(apic_write, reg, v); } static inline void apic_write_atomic(unsigned long reg, unsigned long v) { - paravirt_ops.apic_write_atomic(reg,v); + PVOP_VCALL2(apic_write_atomic, reg, v); } static inline unsigned long apic_read(unsigned long reg) { - return paravirt_ops.apic_read(reg); + return PVOP_CALL1(unsigned long, apic_read, reg); } static inline void setup_boot_clock(void) { - paravirt_ops.setup_boot_clock(); + PVOP_VCALL0(setup_boot_clock); } static inline void setup_secondary_clock(void) { - paravirt_ops.setup_secondary_clock(); + PVOP_VCALL0(setup_secondary_clock); } #endif @@ -408,93 +697,205 @@ static inline void paravirt_pagetable_setup_done(pgd_t *base) static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip, unsigned long start_esp) { - return paravirt_ops.startup_ipi_hook(phys_apicid, start_eip, start_esp); + PVOP_VCALL3(startup_ipi_hook, phys_apicid, start_eip, start_esp); } #endif static inline void paravirt_activate_mm(struct mm_struct *prev, struct mm_struct *next) { - paravirt_ops.activate_mm(prev, next); + PVOP_VCALL2(activate_mm, prev, next); } static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { - paravirt_ops.dup_mmap(oldmm, mm); + PVOP_VCALL2(dup_mmap, oldmm, mm); } static inline void arch_exit_mmap(struct mm_struct *mm) { - paravirt_ops.exit_mmap(mm); + PVOP_VCALL1(exit_mmap, mm); } -#define __flush_tlb() paravirt_ops.flush_tlb_user() -#define __flush_tlb_global() paravirt_ops.flush_tlb_kernel() -#define __flush_tlb_single(addr) paravirt_ops.flush_tlb_single(addr) +static inline void __flush_tlb(void) +{ + PVOP_VCALL0(flush_tlb_user); +} +static inline void __flush_tlb_global(void) +{ + PVOP_VCALL0(flush_tlb_kernel); +} +static inline void __flush_tlb_single(unsigned long addr) +{ + PVOP_VCALL1(flush_tlb_single, addr); +} -#define paravirt_map_pt_hook(type, va, pfn) paravirt_ops.map_pt_hook(type, va, pfn) +static inline void paravirt_map_pt_hook(int type, pte_t *va, u32 pfn) +{ + PVOP_VCALL3(map_pt_hook, type, va, pfn); +} -#define paravirt_alloc_pt(pfn) paravirt_ops.alloc_pt(pfn) -#define paravirt_release_pt(pfn) paravirt_ops.release_pt(pfn) +static inline void paravirt_alloc_pt(unsigned pfn) +{ + PVOP_VCALL1(alloc_pt, pfn); +} +static inline void paravirt_release_pt(unsigned pfn) +{ + PVOP_VCALL1(release_pt, pfn); +} -#define paravirt_alloc_pd(pfn) paravirt_ops.alloc_pd(pfn) -#define paravirt_alloc_pd_clone(pfn, clonepfn, start, count) \ - paravirt_ops.alloc_pd_clone(pfn, clonepfn, start, count) -#define paravirt_release_pd(pfn) paravirt_ops.release_pd(pfn) +static inline void paravirt_alloc_pd(unsigned pfn) +{ + PVOP_VCALL1(alloc_pd, pfn); +} -static inline void set_pte(pte_t *ptep, pte_t pteval) +static inline void paravirt_alloc_pd_clone(unsigned pfn, unsigned clonepfn, + unsigned start, unsigned count) +{ + PVOP_VCALL4(alloc_pd_clone, pfn, clonepfn, start, count); +} +static inline void paravirt_release_pd(unsigned pfn) { - paravirt_ops.set_pte(ptep, pteval); + PVOP_VCALL1(release_pd, pfn); } -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pteval) +static inline void pte_update(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) { - paravirt_ops.set_pte_at(mm, addr, ptep, pteval); + PVOP_VCALL3(pte_update, mm, addr, ptep); } -static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval) +static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) { - paravirt_ops.set_pmd(pmdp, pmdval); + PVOP_VCALL3(pte_update_defer, mm, addr, ptep); } -static inline void pte_update(struct mm_struct *mm, u32 addr, pte_t *ptep) +#ifdef CONFIG_X86_PAE +static inline pte_t __pte(unsigned long long val) { - paravirt_ops.pte_update(mm, addr, ptep); + unsigned long long ret = PVOP_CALL2(unsigned long long, make_pte, + val, val >> 32); + return (pte_t) { ret, ret >> 32 }; } -static inline void pte_update_defer(struct mm_struct *mm, u32 addr, pte_t *ptep) +static inline pmd_t __pmd(unsigned long long val) { - paravirt_ops.pte_update_defer(mm, addr, ptep); + return (pmd_t) { PVOP_CALL2(unsigned long long, make_pmd, val, val >> 32) }; +} + +static inline pgd_t __pgd(unsigned long long val) +{ + return (pgd_t) { PVOP_CALL2(unsigned long long, make_pgd, val, val >> 32) }; +} + +static inline unsigned long long pte_val(pte_t x) +{ + return PVOP_CALL2(unsigned long long, pte_val, x.pte_low, x.pte_high); +} + +static inline unsigned long long pmd_val(pmd_t x) +{ + return PVOP_CALL2(unsigned long long, pmd_val, x.pmd, x.pmd >> 32); +} + +static inline unsigned long long pgd_val(pgd_t x) +{ + return PVOP_CALL2(unsigned long long, pgd_val, x.pgd, x.pgd >> 32); +} + +static inline void set_pte(pte_t *ptep, pte_t pteval) +{ + PVOP_VCALL3(set_pte, ptep, pteval.pte_low, pteval.pte_high); +} + +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval) +{ + /* 5 arg words */ + paravirt_ops.set_pte_at(mm, addr, ptep, pteval); } -#ifdef CONFIG_X86_PAE static inline void set_pte_atomic(pte_t *ptep, pte_t pteval) { - paravirt_ops.set_pte_atomic(ptep, pteval); + PVOP_VCALL3(set_pte_atomic, ptep, pteval.pte_low, pteval.pte_high); } -static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) +static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) { + /* 5 arg words */ paravirt_ops.set_pte_present(mm, addr, ptep, pte); } +static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval) +{ + PVOP_VCALL3(set_pmd, pmdp, pmdval.pmd, pmdval.pmd >> 32); +} + static inline void set_pud(pud_t *pudp, pud_t pudval) { - paravirt_ops.set_pud(pudp, pudval); + PVOP_VCALL3(set_pud, pudp, pudval.pgd.pgd, pudval.pgd.pgd >> 32); } static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - paravirt_ops.pte_clear(mm, addr, ptep); + PVOP_VCALL3(pte_clear, mm, addr, ptep); } static inline void pmd_clear(pmd_t *pmdp) { - paravirt_ops.pmd_clear(pmdp); + PVOP_VCALL1(pmd_clear, pmdp); +} + +static inline pte_t raw_ptep_get_and_clear(pte_t *p) +{ + unsigned long long val = PVOP_CALL1(unsigned long long, ptep_get_and_clear, p); + return (pte_t) { val, val >> 32 }; +} +#else /* !CONFIG_X86_PAE */ +static inline pte_t __pte(unsigned long val) +{ + return (pte_t) { PVOP_CALL1(unsigned long, make_pte, val) }; } -#endif + +static inline pgd_t __pgd(unsigned long val) +{ + return (pgd_t) { PVOP_CALL1(unsigned long, make_pgd, val) }; +} + +static inline unsigned long pte_val(pte_t x) +{ + return PVOP_CALL1(unsigned long, pte_val, x.pte_low); +} + +static inline unsigned long pgd_val(pgd_t x) +{ + return PVOP_CALL1(unsigned long, pgd_val, x.pgd); +} + +static inline void set_pte(pte_t *ptep, pte_t pteval) +{ + PVOP_VCALL2(set_pte, ptep, pteval.pte_low); +} + +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval) +{ + PVOP_VCALL4(set_pte_at, mm, addr, ptep, pteval.pte_low); +} + +static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval) +{ + PVOP_VCALL2(set_pmd, pmdp, pmdval.pud.pgd.pgd); +} + +static inline pte_t raw_ptep_get_and_clear(pte_t *p) +{ + return (pte_t) { PVOP_CALL1(unsigned long, ptep_get_and_clear, p) }; +} +#endif /* CONFIG_X86_PAE */ /* Lazy mode for batching updates / context switch */ #define PARAVIRT_LAZY_NONE 0 @@ -503,14 +904,37 @@ static inline void pmd_clear(pmd_t *pmdp) #define PARAVIRT_LAZY_FLUSH 3 #define __HAVE_ARCH_ENTER_LAZY_CPU_MODE -#define arch_enter_lazy_cpu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_CPU) -#define arch_leave_lazy_cpu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_NONE) -#define arch_flush_lazy_cpu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_FLUSH) +static inline void arch_enter_lazy_cpu_mode(void) +{ + PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_CPU); +} + +static inline void arch_leave_lazy_cpu_mode(void) +{ + PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_NONE); +} + +static inline void arch_flush_lazy_cpu_mode(void) +{ + PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_FLUSH); +} + #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE -#define arch_enter_lazy_mmu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_MMU) -#define arch_leave_lazy_mmu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_NONE) -#define arch_flush_lazy_mmu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_FLUSH) +static inline void arch_enter_lazy_mmu_mode(void) +{ + PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_MMU); +} + +static inline void arch_leave_lazy_mmu_mode(void) +{ + PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_NONE); +} + +static inline void arch_flush_lazy_mmu_mode(void) +{ + PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_FLUSH); +} void _paravirt_nop(void); #define paravirt_nop ((void *)_paravirt_nop) @@ -603,6 +1027,16 @@ static inline unsigned long __raw_local_irq_save(void) paravirt_clobber(CLBR_EAX) #undef PARAVIRT_CALL +#undef PVOP_VCALL0 +#undef PVOP_CALL0 +#undef PVOP_VCALL1 +#undef PVOP_CALL1 +#undef PVOP_VCALL2 +#undef PVOP_CALL2 +#undef PVOP_VCALL3 +#undef PVOP_CALL3 +#undef PVOP_VCALL4 +#undef PVOP_CALL4 #else /* __ASSEMBLY__ */ -- cgit v1.2.1 From 294688c028e80fd467cdd22da79f62c5f311eaf5 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:14 +0200 Subject: [PATCH] i386: PARAVIRT: Document asm-i386/paravirt.h Clean things up, and broadly document: - the paravirt_ops functions themselves - the patching mechanism Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Rusty Russell --- include/asm-i386/paravirt.h | 131 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 121 insertions(+), 10 deletions(-) (limited to 'include/asm-i386/paravirt.h') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 837457b42dbe..8bfaf10d9961 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -21,6 +21,14 @@ struct Xgt_desc_struct; struct tss_struct; struct mm_struct; struct desc_struct; + +/* Lazy mode for batching updates / context switch */ +enum paravirt_lazy_mode { + PARAVIRT_LAZY_NONE = 0, + PARAVIRT_LAZY_MMU = 1, + PARAVIRT_LAZY_CPU = 2, +}; + struct paravirt_ops { unsigned int kernel_rpl; @@ -37,22 +45,33 @@ struct paravirt_ops */ unsigned (*patch)(u8 type, u16 clobber, void *firstinsn, unsigned len); + /* Basic arch-specific setup */ void (*arch_setup)(void); char *(*memory_setup)(void); void (*init_IRQ)(void); + void (*time_init)(void); + /* + * Called before/after init_mm pagetable setup. setup_start + * may reset %cr3, and may pre-install parts of the pagetable; + * pagetable setup is expected to preserve any existing + * mapping. + */ void (*pagetable_setup_start)(pgd_t *pgd_base); void (*pagetable_setup_done)(pgd_t *pgd_base); + /* Print a banner to identify the environment */ void (*banner)(void); + /* Set and set time of day */ unsigned long (*get_wallclock)(void); int (*set_wallclock)(unsigned long); - void (*time_init)(void); + /* cpuid emulation, mostly so that caps bits can be disabled */ void (*cpuid)(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx); + /* hooks for various privileged instructions */ unsigned long (*get_debugreg)(int regno); void (*set_debugreg)(int regno, unsigned long value); @@ -71,15 +90,23 @@ struct paravirt_ops unsigned long (*read_cr4)(void); void (*write_cr4)(unsigned long); + /* + * Get/set interrupt state. save_fl and restore_fl are only + * expected to use X86_EFLAGS_IF; all other bits + * returned from save_fl are undefined, and may be ignored by + * restore_fl. + */ unsigned long (*save_fl)(void); void (*restore_fl)(unsigned long); void (*irq_disable)(void); void (*irq_enable)(void); void (*safe_halt)(void); void (*halt)(void); + void (*wbinvd)(void); - /* err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ + /* MSR, PMC and TSR operations. + err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ u64 (*read_msr)(unsigned int msr, int *err); int (*write_msr)(unsigned int msr, u64 val); @@ -88,6 +115,7 @@ struct paravirt_ops u64 (*get_scheduled_cycles)(void); unsigned long (*get_cpu_khz)(void); + /* Segment descriptor handling */ void (*load_tr_desc)(void); void (*load_gdt)(const struct Xgt_desc_struct *); void (*load_idt)(const struct Xgt_desc_struct *); @@ -105,9 +133,12 @@ struct paravirt_ops void (*load_esp0)(struct tss_struct *tss, struct thread_struct *t); void (*set_iopl_mask)(unsigned mask); - void (*io_delay)(void); + /* + * Hooks for intercepting the creation/use/destruction of an + * mm_struct. + */ void (*activate_mm)(struct mm_struct *prev, struct mm_struct *next); void (*dup_mmap)(struct mm_struct *oldmm, @@ -115,30 +146,43 @@ struct paravirt_ops void (*exit_mmap)(struct mm_struct *mm); #ifdef CONFIG_X86_LOCAL_APIC + /* + * Direct APIC operations, principally for VMI. Ideally + * these shouldn't be in this interface. + */ void (*apic_write)(unsigned long reg, unsigned long v); void (*apic_write_atomic)(unsigned long reg, unsigned long v); unsigned long (*apic_read)(unsigned long reg); void (*setup_boot_clock)(void); void (*setup_secondary_clock)(void); + + void (*startup_ipi_hook)(int phys_apicid, + unsigned long start_eip, + unsigned long start_esp); #endif + /* TLB operations */ void (*flush_tlb_user)(void); void (*flush_tlb_kernel)(void); void (*flush_tlb_single)(unsigned long addr); void (*map_pt_hook)(int type, pte_t *va, u32 pfn); + /* Hooks for allocating/releasing pagetable pages */ void (*alloc_pt)(u32 pfn); void (*alloc_pd)(u32 pfn); void (*alloc_pd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count); void (*release_pt)(u32 pfn); void (*release_pd)(u32 pfn); + /* Pagetable manipulation functions */ void (*set_pte)(pte_t *ptep, pte_t pteval); - void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval); + void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval); void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); void (*pte_update)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); - void (*pte_update_defer)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); + void (*pte_update_defer)(struct mm_struct *mm, + unsigned long addr, pte_t *ptep); pte_t (*ptep_get_and_clear)(pte_t *ptep); @@ -164,13 +208,12 @@ struct paravirt_ops pgd_t (*make_pgd)(unsigned long pgd); #endif - void (*set_lazy_mode)(int mode); + /* Set deferred update mode, used for batching operations. */ + void (*set_lazy_mode)(enum paravirt_lazy_mode mode); /* These two are jmp to, not actually called. */ void (*irq_enable_sysexit)(void); void (*iret)(void); - - void (*startup_ipi_hook)(int phys_apicid, unsigned long start_eip, unsigned long start_esp); }; /* Mark a paravirt probe function. */ @@ -188,8 +231,10 @@ extern struct paravirt_ops paravirt_ops; #define paravirt_clobber(clobber) \ [paravirt_clobber] "i" (clobber) -#define PARAVIRT_CALL "call *(paravirt_ops+%c[paravirt_typenum]*4);" - +/* + * Generate some code, and mark it as patchable by the + * apply_paravirt() alternate instruction patcher. + */ #define _paravirt_alt(insn_string, type, clobber) \ "771:\n\t" insn_string "\n" "772:\n" \ ".pushsection .parainstructions,\"a\"\n" \ @@ -199,9 +244,74 @@ extern struct paravirt_ops paravirt_ops; " .short " clobber "\n" \ ".popsection\n" +/* Generate patchable code, with the default asm parameters. */ #define paravirt_alt(insn_string) \ _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") +/* + * This generates an indirect call based on the operation type number. + * The type number, computed in PARAVIRT_PATCH, is derived from the + * offset into the paravirt_ops structure, and can therefore be freely + * converted back into a structure offset. + */ +#define PARAVIRT_CALL "call *(paravirt_ops+%c[paravirt_typenum]*4);" + +/* + * These macros are intended to wrap calls into a paravirt_ops + * operation, so that they can be later identified and patched at + * runtime. + * + * Normally, a call to a pv_op function is a simple indirect call: + * (paravirt_ops.operations)(args...). + * + * Unfortunately, this is a relatively slow operation for modern CPUs, + * because it cannot necessarily determine what the destination + * address is. In this case, the address is a runtime constant, so at + * the very least we can patch the call to e a simple direct call, or + * ideally, patch an inline implementation into the callsite. (Direct + * calls are essentially free, because the call and return addresses + * are completely predictable.) + * + * These macros rely on the standard gcc "regparm(3)" calling + * convention, in which the first three arguments are placed in %eax, + * %edx, %ecx (in that order), and the remaining arguments are placed + * on the stack. All caller-save registers (eax,edx,ecx) are expected + * to be modified (either clobbered or used for return values). + * + * The call instruction itself is marked by placing its start address + * and size into the .parainstructions section, so that + * apply_paravirt() in arch/i386/kernel/alternative.c can do the + * appropriate patching under the control of the backend paravirt_ops + * implementation. + * + * Unfortunately there's no way to get gcc to generate the args setup + * for the call, and then allow the call itself to be generated by an + * inline asm. Because of this, we must do the complete arg setup and + * return value handling from within these macros. This is fairly + * cumbersome. + * + * There are 5 sets of PVOP_* macros for dealing with 0-4 arguments. + * It could be extended to more arguments, but there would be little + * to be gained from that. For each number of arguments, there are + * the two VCALL and CALL variants for void and non-void functions. + * + * When there is a return value, the invoker of the macro must specify + * the return type. The macro then uses sizeof() on that type to + * determine whether its a 32 or 64 bit value, and places the return + * in the right register(s) (just %eax for 32-bit, and %edx:%eax for + * 64-bit). + * + * 64-bit arguments are passed as a pair of adjacent 32-bit arguments + * in low,high order. + * + * Small structures are passed and returned in registers. The macro + * calling convention can't directly deal with this, so the wrapper + * functions must do this. + * + * These PVOP_* macros are only defined within this header. This + * means that all uses must be wrapped in inline functions. This also + * makes sure the incoming and outgoing types are always correct. + */ #define PVOP_CALL0(__rettype, __op) \ ({ \ __rettype __ret; \ @@ -1026,6 +1136,7 @@ static inline unsigned long __raw_local_irq_save(void) [paravirt_sti_type] "i" (PARAVIRT_PATCH(irq_enable)), \ paravirt_clobber(CLBR_EAX) +/* Make sure as little as possible of this mess escapes. */ #undef PARAVIRT_CALL #undef PVOP_VCALL0 #undef PVOP_CALL0 -- cgit v1.2.1 From 63f70270ccd981ce40a8ff58c03a8c2e97e368be Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:14 +0200 Subject: [PATCH] i386: PARAVIRT: add common patching machinery Implement the actual patching machinery. paravirt_patch_default() contains the logic to automatically patch a callsite based on a few simple rules: - if the paravirt_op function is paravirt_nop, then patch nops - if the paravirt_op function is a jmp target, then jmp to it - if the paravirt_op function is callable and doesn't clobber too much for the callsite, call it directly paravirt_patch_default is suitable as a default implementation of paravirt_ops.patch, will remove most of the expensive indirect calls in favour of either a direct call or a pile of nops. Backends may implement their own patcher, however. There are several helper functions to help with this: paravirt_patch_nop nop out a callsite paravirt_patch_ignore leave the callsite as-is paravirt_patch_call patch a call if the caller and callee have compatible clobbers paravirt_patch_jmp patch in a jmp paravirt_patch_insns patch some literal instructions over the callsite, if they fit This patch also implements more direct patches for the native case, so that when running on native hardware many common operations are implemented inline. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Rusty Russell Cc: Zachary Amsden Cc: Anthony Liguori Acked-by: Ingo Molnar --- include/asm-i386/paravirt.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/asm-i386/paravirt.h') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 8bfaf10d9961..4b3d50858670 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -248,6 +248,18 @@ extern struct paravirt_ops paravirt_ops; #define paravirt_alt(insn_string) \ _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") +unsigned paravirt_patch_nop(void); +unsigned paravirt_patch_ignore(unsigned len); +unsigned paravirt_patch_call(void *target, u16 tgt_clobbers, + void *site, u16 site_clobbers, + unsigned len); +unsigned paravirt_patch_jmp(void *target, void *site, unsigned len); +unsigned paravirt_patch_default(u8 type, u16 clobbers, void *site, unsigned len); + +unsigned paravirt_patch_insns(void *site, unsigned len, + const char *start, const char *end); + + /* * This generates an indirect call based on the operation type number. * The type number, computed in PARAVIRT_PATCH, is derived from the -- cgit v1.2.1 From d4c104771a1c58e3de2a888b73b0ba1b54c0ae76 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:15 +0200 Subject: [PATCH] i386: PARAVIRT: add flush_tlb_others paravirt_op This patch adds a pv_op for flush_tlb_others. Linux running on native hardware uses cross-CPU IPIs to flush the TLB on any CPU which may have a particular mm's pagetable entries cached in its TLB. This is inefficient in a paravirtualized environment, since the hypervisor knows which real CPUs actually contain cached mappings, which may be a small subset of a guest's VCPUs. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen --- include/asm-i386/paravirt.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/asm-i386/paravirt.h') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 4b3d50858670..f880b06d6d56 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -15,6 +15,7 @@ #ifndef __ASSEMBLY__ #include +#include struct thread_struct; struct Xgt_desc_struct; @@ -165,6 +166,8 @@ struct paravirt_ops void (*flush_tlb_user)(void); void (*flush_tlb_kernel)(void); void (*flush_tlb_single)(unsigned long addr); + void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm, + unsigned long va); void (*map_pt_hook)(int type, pte_t *va, u32 pfn); @@ -853,6 +856,12 @@ static inline void __flush_tlb_single(unsigned long addr) PVOP_VCALL1(flush_tlb_single, addr); } +static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, + unsigned long va) +{ + PVOP_VCALL3(flush_tlb_others, &cpumask, mm, va); +} + static inline void paravirt_map_pt_hook(int type, pte_t *va, u32 pfn) { PVOP_VCALL3(map_pt_hook, type, va, pfn); -- cgit v1.2.1 From a27fe809b82c5e18932fcceded28d0d1481ce7bb Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:15 +0200 Subject: [PATCH] i386: PARAVIRT: revert map_pt_hook. Back out the map_pt_hook to clear the way for kmap_atomic_pte. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Zachary Amsden --- include/asm-i386/paravirt.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/asm-i386/paravirt.h') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index f880b06d6d56..10f44af76b49 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -169,8 +169,6 @@ struct paravirt_ops void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm, unsigned long va); - void (*map_pt_hook)(int type, pte_t *va, u32 pfn); - /* Hooks for allocating/releasing pagetable pages */ void (*alloc_pt)(u32 pfn); void (*alloc_pd)(u32 pfn); @@ -862,11 +860,6 @@ static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, PVOP_VCALL3(flush_tlb_others, &cpumask, mm, va); } -static inline void paravirt_map_pt_hook(int type, pte_t *va, u32 pfn) -{ - PVOP_VCALL3(map_pt_hook, type, va, pfn); -} - static inline void paravirt_alloc_pt(unsigned pfn) { PVOP_VCALL1(alloc_pt, pfn); -- cgit v1.2.1 From ce6234b5298902aaec831a67d5f8d9bd2ef5a488 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:15 +0200 Subject: [PATCH] i386: PARAVIRT: add kmap_atomic_pte for mapping highpte pages Xen and VMI both have special requirements when mapping a highmem pte page into the kernel address space. These can be dealt with by adding a new kmap_atomic_pte() function for mapping highptes, and hooking it into the paravirt_ops infrastructure. Xen specifically wants to map the pte page RO, so this patch exposes a helper function, kmap_atomic_prot, which maps the page with the specified page protections. This also adds a kmap_flush_unused() function to clear out the cached kmap mappings. Xen needs this to clear out any potential stray RW mappings of pages which will become part of a pagetable. [ Zach - vmi.c will need some attention after this patch. It wasn't immediately obvious to me what needs to be done. ] Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Zachary Amsden --- include/asm-i386/paravirt.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/asm-i386/paravirt.h') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 10f44af76b49..5048b41428fa 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -16,7 +16,9 @@ #ifndef __ASSEMBLY__ #include #include +#include +struct page; struct thread_struct; struct Xgt_desc_struct; struct tss_struct; @@ -187,6 +189,10 @@ struct paravirt_ops pte_t (*ptep_get_and_clear)(pte_t *ptep); +#ifdef CONFIG_HIGHPTE + void *(*kmap_atomic_pte)(struct page *page, enum km_type type); +#endif + #ifdef CONFIG_X86_PAE void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); void (*set_pte_present)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); @@ -884,6 +890,15 @@ static inline void paravirt_release_pd(unsigned pfn) PVOP_VCALL1(release_pd, pfn); } +#ifdef CONFIG_HIGHPTE +static inline void *kmap_atomic_pte(struct page *page, enum km_type type) +{ + unsigned long ret; + ret = PVOP_CALL2(unsigned long, kmap_atomic_pte, page, type); + return (void *)ret; +} +#endif + static inline void pte_update(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { -- cgit v1.2.1 From 4e0fa85602a4fa219fc3a9c053d5140bf987d3e3 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:15 +0200 Subject: [PATCH] i386: PARAVIRT: Use enums for paravirt lazy flush modi Remove #defines, add enum for PARAVIRT_LAZY_FLUSH. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen --- include/asm-i386/paravirt.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include/asm-i386/paravirt.h') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 5048b41428fa..c5451923c79e 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -30,6 +30,7 @@ enum paravirt_lazy_mode { PARAVIRT_LAZY_NONE = 0, PARAVIRT_LAZY_MMU = 1, PARAVIRT_LAZY_CPU = 2, + PARAVIRT_LAZY_FLUSH = 3, }; struct paravirt_ops @@ -1036,12 +1037,6 @@ static inline pte_t raw_ptep_get_and_clear(pte_t *p) } #endif /* CONFIG_X86_PAE */ -/* Lazy mode for batching updates / context switch */ -#define PARAVIRT_LAZY_NONE 0 -#define PARAVIRT_LAZY_MMU 1 -#define PARAVIRT_LAZY_CPU 2 -#define PARAVIRT_LAZY_FLUSH 3 - #define __HAVE_ARCH_ENTER_LAZY_CPU_MODE static inline void arch_enter_lazy_cpu_mode(void) { -- cgit v1.2.1 From 1a45b7aaa5051489b46afbc48509bd91f8b4a1ba Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:15 +0200 Subject: [PATCH] i386: PARAVIRT: Clean up paravirt patchable wrappers Replace all the open-coded macros for generating calls with a pair of more general macros (__PVOP_CALL/VCALL), and redefine all the PVOP_V?CALL[0-4] in terms of them. [ Andrew, Andi: this should slot in immediately after "Document asm-i386/paravirt.h" (paravirt_ops-document-asm-i386-paravirth.patch) ] Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Ingo Molnar --- include/asm-i386/paravirt.h | 248 +++++++++++--------------------------------- 1 file changed, 60 insertions(+), 188 deletions(-) (limited to 'include/asm-i386/paravirt.h') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index c5451923c79e..2ba18963e114 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -332,211 +332,81 @@ unsigned paravirt_patch_insns(void *site, unsigned len, * means that all uses must be wrapped in inline functions. This also * makes sure the incoming and outgoing types are always correct. */ -#define PVOP_CALL0(__rettype, __op) \ - ({ \ - __rettype __ret; \ - if (sizeof(__rettype) > sizeof(unsigned long)) { \ - unsigned long long __tmp; \ - unsigned long __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=A" (__tmp), "=c" (__ecx) \ - : paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc"); \ - __ret = (__rettype)__tmp; \ - } else { \ - unsigned long __tmp, __edx, __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=a" (__tmp), "=d" (__edx), \ - "=c" (__ecx) \ - : paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc"); \ - __ret = (__rettype)__tmp; \ - } \ - __ret; \ - }) -#define PVOP_VCALL0(__op) \ +#define __PVOP_CALL(rettype, op, pre, post, ...) \ ({ \ + rettype __ret; \ unsigned long __eax, __edx, __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \ - : paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc"); \ - }) - -#define PVOP_CALL1(__rettype, __op, arg1) \ - ({ \ - __rettype __ret; \ - if (sizeof(__rettype) > sizeof(unsigned long)) { \ - unsigned long long __tmp; \ - unsigned long __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=A" (__tmp), "=c" (__ecx) \ - : "a" ((u32)(arg1)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc"); \ - __ret = (__rettype)__tmp; \ - } else { \ - unsigned long __tmp, __edx, __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=a" (__tmp), "=d" (__edx), \ - "=c" (__ecx) \ - : "0" ((u32)(arg1)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc"); \ - __ret = (__rettype)__tmp; \ - } \ - __ret; \ - }) -#define PVOP_VCALL1(__op, arg1) \ - ({ \ - unsigned long __eax, __edx, __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \ - : "0" ((u32)(arg1)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc"); \ - }) - -#define PVOP_CALL2(__rettype, __op, arg1, arg2) \ - ({ \ - __rettype __ret; \ - if (sizeof(__rettype) > sizeof(unsigned long)) { \ - unsigned long long __tmp; \ - unsigned long __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=A" (__tmp), "=c" (__ecx) \ - : "a" ((u32)(arg1)), \ - "d" ((u32)(arg2)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc"); \ - __ret = (__rettype)__tmp; \ - } else { \ - unsigned long __tmp, __edx, __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=a" (__tmp), "=d" (__edx), \ - "=c" (__ecx) \ - : "0" ((u32)(arg1)), \ - "1" ((u32)(arg2)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc"); \ - __ret = (__rettype)__tmp; \ - } \ - __ret; \ - }) -#define PVOP_VCALL2(__op, arg1, arg2) \ - ({ \ - unsigned long __eax, __edx, __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \ - : "0" ((u32)(arg1)), \ - "1" ((u32)(arg2)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc"); \ - }) - -#define PVOP_CALL3(__rettype, __op, arg1, arg2, arg3) \ - ({ \ - __rettype __ret; \ - if (sizeof(__rettype) > sizeof(unsigned long)) { \ - unsigned long long __tmp; \ - unsigned long __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=A" (__tmp), "=c" (__ecx) \ - : "a" ((u32)(arg1)), \ - "d" ((u32)(arg2)), \ - "1" ((u32)(arg3)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc"); \ - __ret = (__rettype)__tmp; \ - } else { \ - unsigned long __tmp, __edx, __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=a" (__tmp), "=d" (__edx), \ + if (sizeof(rettype) > sizeof(unsigned long)) { \ + asm volatile(pre \ + paravirt_alt(PARAVIRT_CALL) \ + post \ + : "=a" (__eax), "=d" (__edx), \ "=c" (__ecx) \ - : "0" ((u32)(arg1)), \ - "1" ((u32)(arg2)), \ - "2" ((u32)(arg3)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ + : paravirt_type(op), \ + paravirt_clobber(CLBR_ANY), \ + ##__VA_ARGS__ \ : "memory", "cc"); \ - __ret = (__rettype)__tmp; \ - } \ - __ret; \ - }) -#define PVOP_VCALL3(__op, arg1, arg2, arg3) \ - ({ \ - unsigned long __eax, __edx, __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \ - : "0" ((u32)(arg1)), \ - "1" ((u32)(arg2)), \ - "2" ((u32)(arg3)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc"); \ - }) - -#define PVOP_CALL4(__rettype, __op, arg1, arg2, arg3, arg4) \ - ({ \ - __rettype __ret; \ - if (sizeof(__rettype) > sizeof(unsigned long)) { \ - unsigned long long __tmp; \ - unsigned long __ecx; \ - asm volatile("push %[_arg4]; " \ - paravirt_alt(PARAVIRT_CALL) \ - "lea 4(%%esp),%%esp" \ - : "=A" (__tmp), "=c" (__ecx) \ - : "a" ((u32)(arg1)), \ - "d" ((u32)(arg2)), \ - "1" ((u32)(arg3)), \ - [_arg4] "mr" ((u32)(arg4)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc",); \ - __ret = (__rettype)__tmp; \ + __ret = (rettype)((((u64)__edx) << 32) | __eax); \ } else { \ - unsigned long __tmp, __edx, __ecx; \ - asm volatile("push %[_arg4]; " \ + asm volatile(pre \ paravirt_alt(PARAVIRT_CALL) \ - "lea 4(%%esp),%%esp" \ - : "=a" (__tmp), "=d" (__edx), "=c" (__ecx) \ - : "0" ((u32)(arg1)), \ - "1" ((u32)(arg2)), \ - "2" ((u32)(arg3)), \ - [_arg4]"mr" ((u32)(arg4)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ + post \ + : "=a" (__eax), "=d" (__edx), \ + "=c" (__ecx) \ + : paravirt_type(op), \ + paravirt_clobber(CLBR_ANY), \ + ##__VA_ARGS__ \ : "memory", "cc"); \ - __ret = (__rettype)__tmp; \ + __ret = (rettype)__eax; \ } \ __ret; \ }) -#define PVOP_VCALL4(__op, arg1, arg2, arg3, arg4) \ +#define __PVOP_VCALL(op, pre, post, ...) \ ({ \ unsigned long __eax, __edx, __ecx; \ - asm volatile("push %[_arg4]; " \ + asm volatile(pre \ paravirt_alt(PARAVIRT_CALL) \ - "lea 4(%%esp),%%esp" \ + post \ : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \ - : "0" ((u32)(arg1)), \ - "1" ((u32)(arg2)), \ - "2" ((u32)(arg3)), \ - [_arg4]"mr" ((u32)(arg4)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ + : paravirt_type(op), \ + paravirt_clobber(CLBR_ANY), \ + ##__VA_ARGS__ \ : "memory", "cc"); \ }) +#define PVOP_CALL0(rettype, op) \ + __PVOP_CALL(rettype, op, "", "") +#define PVOP_VCALL0(op) \ + __PVOP_VCALL(op, "", "") + +#define PVOP_CALL1(rettype, op, arg1) \ + __PVOP_CALL(rettype, op, "", "", "0" ((u32)(arg1))) +#define PVOP_VCALL1(op, arg1) \ + __PVOP_VCALL(op, "", "", "0" ((u32)(arg1))) + +#define PVOP_CALL2(rettype, op, arg1, arg2) \ + __PVOP_CALL(rettype, op, "", "", "0" ((u32)(arg1)), "1" ((u32)(arg2))) +#define PVOP_VCALL2(op, arg1, arg2) \ + __PVOP_VCALL(op, "", "", "0" ((u32)(arg1)), "1" ((u32)(arg2))) + +#define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \ + __PVOP_CALL(rettype, op, "", "", "0" ((u32)(arg1)), \ + "1"((u32)(arg2)), "2"((u32)(arg3))) +#define PVOP_VCALL3(op, arg1, arg2, arg3) \ + __PVOP_VCALL(op, "", "", "0" ((u32)(arg1)), "1"((u32)(arg2)), \ + "2"((u32)(arg3))) + +#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ + __PVOP_CALL(rettype, op, \ + "push %[_arg4];", "lea 4(%%esp),%%esp;", \ + "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ + "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) +#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ + __PVOP_VCALL(op, \ + "push %[_arg4];", "lea 4(%%esp),%%esp;", \ + "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ + "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) + static inline int paravirt_enabled(void) { return paravirt_ops.paravirt_enabled; @@ -1162,6 +1032,8 @@ static inline unsigned long __raw_local_irq_save(void) /* Make sure as little as possible of this mess escapes. */ #undef PARAVIRT_CALL +#undef __PVOP_CALL +#undef __PVOP_VCALL #undef PVOP_VCALL0 #undef PVOP_CALL0 #undef PVOP_VCALL1 -- cgit v1.2.1 From 4cdd9c8931767e1c56a51a1078d33a8c340f4405 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:15 +0200 Subject: [PATCH] i386: PARAVIRT: drop unused ptep_get_and_clear In shadow mode hypervisors, ptep_get_and_clear achieves the desired purpose of keeping the shadows in sync by issuing a native_get_and_clear, followed by a call to pte_update, which indicates the PTE has been modified. Direct mode hypervisors (Xen) have no need for this anyway, and will trap the update using writable pagetables. This means no hypervisor makes use of ptep_get_and_clear; there is no reason to have it in the paravirt-ops structure. Change confusing terminology about raw vs. native functions into consistent use of native_pte_xxx for operations which do not invoke paravirt-ops. Signed-off-by: Zachary Amsden Signed-off-by: Andi Kleen --- include/asm-i386/paravirt.h | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) (limited to 'include/asm-i386/paravirt.h') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 2ba18963e114..e2e7f98723c5 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -188,8 +188,6 @@ struct paravirt_ops void (*pte_update_defer)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); - pte_t (*ptep_get_and_clear)(pte_t *ptep); - #ifdef CONFIG_HIGHPTE void *(*kmap_atomic_pte)(struct page *page, enum km_type type); #endif @@ -859,12 +857,8 @@ static inline void pmd_clear(pmd_t *pmdp) PVOP_VCALL1(pmd_clear, pmdp); } -static inline pte_t raw_ptep_get_and_clear(pte_t *p) -{ - unsigned long long val = PVOP_CALL1(unsigned long long, ptep_get_and_clear, p); - return (pte_t) { val, val >> 32 }; -} #else /* !CONFIG_X86_PAE */ + static inline pte_t __pte(unsigned long val) { return (pte_t) { PVOP_CALL1(unsigned long, make_pte, val) }; @@ -900,11 +894,6 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval) { PVOP_VCALL2(set_pmd, pmdp, pmdval.pud.pgd.pgd); } - -static inline pte_t raw_ptep_get_and_clear(pte_t *p) -{ - return (pte_t) { PVOP_CALL1(unsigned long, ptep_get_and_clear, p) }; -} #endif /* CONFIG_X86_PAE */ #define __HAVE_ARCH_ENTER_LAZY_CPU_MODE -- cgit v1.2.1