diff options
Diffstat (limited to 'include/asm-x86')
209 files changed, 10227 insertions, 11740 deletions
diff --git a/include/asm-x86/Kbuild b/include/asm-x86/Kbuild index 12db5a1cdd74..3c6f0f80e827 100644 --- a/include/asm-x86/Kbuild +++ b/include/asm-x86/Kbuild @@ -3,21 +3,20 @@ include include/asm-generic/Kbuild.asm header-y += boot.h header-y += bootparam.h header-y += debugreg.h +header-y += kvm.h header-y += ldt.h header-y += msr-index.h header-y += prctl.h header-y += ptrace-abi.h header-y += sigcontext32.h header-y += ucontext.h -header-y += vsyscall32.h unifdef-y += e820.h unifdef-y += ist.h unifdef-y += mce.h unifdef-y += msr.h unifdef-y += mtrr.h -unifdef-y += page_32.h -unifdef-y += page_64.h +unifdef-y += page.h unifdef-y += posix_types_32.h unifdef-y += posix_types_64.h unifdef-y += ptrace.h diff --git a/include/asm-x86/acpi.h b/include/asm-x86/acpi.h index f8a89793ac8c..98a9ca266531 100644 --- a/include/asm-x86/acpi.h +++ b/include/asm-x86/acpi.h @@ -1,13 +1,123 @@ #ifndef _ASM_X86_ACPI_H #define _ASM_X86_ACPI_H -#ifdef CONFIG_X86_32 -# include "acpi_32.h" -#else -# include "acpi_64.h" -#endif +/* + * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> + * Copyright (C) 2001 Patrick Mochel <mochel@osdl.org> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#include <acpi/pdc_intel.h> +#include <asm/numa.h> #include <asm/processor.h> +#include <asm/mmu.h> + +#define COMPILER_DEPENDENT_INT64 long long +#define COMPILER_DEPENDENT_UINT64 unsigned long long + +/* + * Calling conventions: + * + * ACPI_SYSTEM_XFACE - Interfaces to host OS (handlers, threads) + * ACPI_EXTERNAL_XFACE - External ACPI interfaces + * ACPI_INTERNAL_XFACE - Internal ACPI interfaces + * ACPI_INTERNAL_VAR_XFACE - Internal variable-parameter list interfaces + */ +#define ACPI_SYSTEM_XFACE +#define ACPI_EXTERNAL_XFACE +#define ACPI_INTERNAL_XFACE +#define ACPI_INTERNAL_VAR_XFACE + +/* Asm macros */ + +#define ACPI_ASM_MACROS +#define BREAKPOINT3 +#define ACPI_DISABLE_IRQS() local_irq_disable() +#define ACPI_ENABLE_IRQS() local_irq_enable() +#define ACPI_FLUSH_CPU_CACHE() wbinvd() + +int __acpi_acquire_global_lock(unsigned int *lock); +int __acpi_release_global_lock(unsigned int *lock); + +#define ACPI_ACQUIRE_GLOBAL_LOCK(facs, Acq) \ + ((Acq) = __acpi_acquire_global_lock(&facs->global_lock)) + +#define ACPI_RELEASE_GLOBAL_LOCK(facs, Acq) \ + ((Acq) = __acpi_release_global_lock(&facs->global_lock)) + +/* + * Math helper asm macros + */ +#define ACPI_DIV_64_BY_32(n_hi, n_lo, d32, q32, r32) \ + asm("divl %2;" \ + :"=a"(q32), "=d"(r32) \ + :"r"(d32), \ + "0"(n_lo), "1"(n_hi)) + + +#define ACPI_SHIFT_RIGHT_64(n_hi, n_lo) \ + asm("shrl $1,%2 ;" \ + "rcrl $1,%3;" \ + :"=r"(n_hi), "=r"(n_lo) \ + :"0"(n_hi), "1"(n_lo)) + +#ifdef CONFIG_ACPI +extern int acpi_lapic; +extern int acpi_ioapic; +extern int acpi_noirq; +extern int acpi_strict; +extern int acpi_disabled; +extern int acpi_ht; +extern int acpi_pci_disabled; +extern int acpi_skip_timer_override; +extern int acpi_use_timer_override; + +static inline void disable_acpi(void) +{ + acpi_disabled = 1; + acpi_ht = 0; + acpi_pci_disabled = 1; + acpi_noirq = 1; +} + +/* Fixmap pages to reserve for ACPI boot-time tables (see fixmap.h) */ +#define FIX_ACPI_PAGES 4 + +extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq); + +static inline void acpi_noirq_set(void) { acpi_noirq = 1; } +static inline void acpi_disable_pci(void) +{ + acpi_pci_disabled = 1; + acpi_noirq_set(); +} +extern int acpi_irq_balance_set(char *str); + +/* routines for saving/restoring kernel state */ +extern int acpi_save_state_mem(void); +extern void acpi_restore_state_mem(void); + +extern unsigned long acpi_wakeup_address; + +/* early initialization routine */ +extern void acpi_reserve_bootmem(void); /* * Check if the CPU can handle C2 and deeper @@ -29,4 +139,35 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate) return max_cstate; } +#else /* !CONFIG_ACPI */ + +#define acpi_lapic 0 +#define acpi_ioapic 0 +static inline void acpi_noirq_set(void) { } +static inline void acpi_disable_pci(void) { } +static inline void disable_acpi(void) { } + +#endif /* !CONFIG_ACPI */ + +#define ARCH_HAS_POWER_INIT 1 + +struct bootnode; + +#ifdef CONFIG_ACPI_NUMA +extern int acpi_numa; +extern int acpi_scan_nodes(unsigned long start, unsigned long end); +#ifdef CONFIG_X86_64 +# define NR_NODE_MEMBLKS (MAX_NUMNODES*2) +#endif +extern void acpi_fake_nodes(const struct bootnode *fake_nodes, + int num_nodes); +#else +static inline void acpi_fake_nodes(const struct bootnode *fake_nodes, + int num_nodes) +{ +} #endif + +#define acpi_unlazy_tlb(x) leave_mm(x) + +#endif /*__X86_ASM_ACPI_H*/ diff --git a/include/asm-x86/acpi_32.h b/include/asm-x86/acpi_32.h deleted file mode 100644 index 723493e6c851..000000000000 --- a/include/asm-x86/acpi_32.h +++ /dev/null @@ -1,143 +0,0 @@ -/* - * asm-i386/acpi.h - * - * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> - * Copyright (C) 2001 Patrick Mochel <mochel@osdl.org> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - */ - -#ifndef _ASM_ACPI_H -#define _ASM_ACPI_H - -#ifdef __KERNEL__ - -#include <acpi/pdc_intel.h> - -#include <asm/system.h> /* defines cmpxchg */ - -#define COMPILER_DEPENDENT_INT64 long long -#define COMPILER_DEPENDENT_UINT64 unsigned long long - -/* - * Calling conventions: - * - * ACPI_SYSTEM_XFACE - Interfaces to host OS (handlers, threads) - * ACPI_EXTERNAL_XFACE - External ACPI interfaces - * ACPI_INTERNAL_XFACE - Internal ACPI interfaces - * ACPI_INTERNAL_VAR_XFACE - Internal variable-parameter list interfaces - */ -#define ACPI_SYSTEM_XFACE -#define ACPI_EXTERNAL_XFACE -#define ACPI_INTERNAL_XFACE -#define ACPI_INTERNAL_VAR_XFACE - -/* Asm macros */ - -#define ACPI_ASM_MACROS -#define BREAKPOINT3 -#define ACPI_DISABLE_IRQS() local_irq_disable() -#define ACPI_ENABLE_IRQS() local_irq_enable() -#define ACPI_FLUSH_CPU_CACHE() wbinvd() - -int __acpi_acquire_global_lock(unsigned int *lock); -int __acpi_release_global_lock(unsigned int *lock); - -#define ACPI_ACQUIRE_GLOBAL_LOCK(facs, Acq) \ - ((Acq) = __acpi_acquire_global_lock(&facs->global_lock)) - -#define ACPI_RELEASE_GLOBAL_LOCK(facs, Acq) \ - ((Acq) = __acpi_release_global_lock(&facs->global_lock)) - -/* - * Math helper asm macros - */ -#define ACPI_DIV_64_BY_32(n_hi, n_lo, d32, q32, r32) \ - asm("divl %2;" \ - :"=a"(q32), "=d"(r32) \ - :"r"(d32), \ - "0"(n_lo), "1"(n_hi)) - - -#define ACPI_SHIFT_RIGHT_64(n_hi, n_lo) \ - asm("shrl $1,%2;" \ - "rcrl $1,%3;" \ - :"=r"(n_hi), "=r"(n_lo) \ - :"0"(n_hi), "1"(n_lo)) - -extern void early_quirks(void); - -#ifdef CONFIG_ACPI -extern int acpi_lapic; -extern int acpi_ioapic; -extern int acpi_noirq; -extern int acpi_strict; -extern int acpi_disabled; -extern int acpi_ht; -extern int acpi_pci_disabled; -static inline void disable_acpi(void) -{ - acpi_disabled = 1; - acpi_ht = 0; - acpi_pci_disabled = 1; - acpi_noirq = 1; -} - -/* Fixmap pages to reserve for ACPI boot-time tables (see fixmap.h) */ -#define FIX_ACPI_PAGES 4 - -extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq); - -#ifdef CONFIG_X86_IO_APIC -extern int acpi_skip_timer_override; -extern int acpi_use_timer_override; -#endif - -static inline void acpi_noirq_set(void) { acpi_noirq = 1; } -static inline void acpi_disable_pci(void) -{ - acpi_pci_disabled = 1; - acpi_noirq_set(); -} -extern int acpi_irq_balance_set(char *str); - -/* routines for saving/restoring kernel state */ -extern int acpi_save_state_mem(void); -extern void acpi_restore_state_mem(void); - -extern unsigned long acpi_wakeup_address; - -/* early initialization routine */ -extern void acpi_reserve_bootmem(void); - -#else /* !CONFIG_ACPI */ - -#define acpi_lapic 0 -#define acpi_ioapic 0 -static inline void acpi_noirq_set(void) { } -static inline void acpi_disable_pci(void) { } -static inline void disable_acpi(void) { } - -#endif /* !CONFIG_ACPI */ - -#define ARCH_HAS_POWER_INIT 1 - -#endif /*__KERNEL__*/ - -#endif /*_ASM_ACPI_H*/ diff --git a/include/asm-x86/acpi_64.h b/include/asm-x86/acpi_64.h deleted file mode 100644 index 98173357dd89..000000000000 --- a/include/asm-x86/acpi_64.h +++ /dev/null @@ -1,153 +0,0 @@ -/* - * asm-x86_64/acpi.h - * - * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> - * Copyright (C) 2001 Patrick Mochel <mochel@osdl.org> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - */ - -#ifndef _ASM_ACPI_H -#define _ASM_ACPI_H - -#ifdef __KERNEL__ - -#include <acpi/pdc_intel.h> -#include <asm/numa.h> - -#define COMPILER_DEPENDENT_INT64 long long -#define COMPILER_DEPENDENT_UINT64 unsigned long long - -/* - * Calling conventions: - * - * ACPI_SYSTEM_XFACE - Interfaces to host OS (handlers, threads) - * ACPI_EXTERNAL_XFACE - External ACPI interfaces - * ACPI_INTERNAL_XFACE - Internal ACPI interfaces - * ACPI_INTERNAL_VAR_XFACE - Internal variable-parameter list interfaces - */ -#define ACPI_SYSTEM_XFACE -#define ACPI_EXTERNAL_XFACE -#define ACPI_INTERNAL_XFACE -#define ACPI_INTERNAL_VAR_XFACE - -/* Asm macros */ - -#define ACPI_ASM_MACROS -#define BREAKPOINT3 -#define ACPI_DISABLE_IRQS() local_irq_disable() -#define ACPI_ENABLE_IRQS() local_irq_enable() -#define ACPI_FLUSH_CPU_CACHE() wbinvd() - -int __acpi_acquire_global_lock(unsigned int *lock); -int __acpi_release_global_lock(unsigned int *lock); - -#define ACPI_ACQUIRE_GLOBAL_LOCK(facs, Acq) \ - ((Acq) = __acpi_acquire_global_lock(&facs->global_lock)) - -#define ACPI_RELEASE_GLOBAL_LOCK(facs, Acq) \ - ((Acq) = __acpi_release_global_lock(&facs->global_lock)) - -/* - * Math helper asm macros - */ -#define ACPI_DIV_64_BY_32(n_hi, n_lo, d32, q32, r32) \ - asm("divl %2;" \ - :"=a"(q32), "=d"(r32) \ - :"r"(d32), \ - "0"(n_lo), "1"(n_hi)) - - -#define ACPI_SHIFT_RIGHT_64(n_hi, n_lo) \ - asm("shrl $1,%2;" \ - "rcrl $1,%3;" \ - :"=r"(n_hi), "=r"(n_lo) \ - :"0"(n_hi), "1"(n_lo)) - -#ifdef CONFIG_ACPI -extern int acpi_lapic; -extern int acpi_ioapic; -extern int acpi_noirq; -extern int acpi_strict; -extern int acpi_disabled; -extern int acpi_pci_disabled; -extern int acpi_ht; -static inline void disable_acpi(void) -{ - acpi_disabled = 1; - acpi_ht = 0; - acpi_pci_disabled = 1; - acpi_noirq = 1; -} - -/* Fixmap pages to reserve for ACPI boot-time tables (see fixmap.h) */ -#define FIX_ACPI_PAGES 4 - -extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq); -static inline void acpi_noirq_set(void) { acpi_noirq = 1; } -static inline void acpi_disable_pci(void) -{ - acpi_pci_disabled = 1; - acpi_noirq_set(); -} -extern int acpi_irq_balance_set(char *str); - -/* routines for saving/restoring kernel state */ -extern int acpi_save_state_mem(void); -extern void acpi_restore_state_mem(void); - -extern unsigned long acpi_wakeup_address; - -/* early initialization routine */ -extern void acpi_reserve_bootmem(void); - -#else /* !CONFIG_ACPI */ - -#define acpi_lapic 0 -#define acpi_ioapic 0 -static inline void acpi_noirq_set(void) { } -static inline void acpi_disable_pci(void) { } - -#endif /* !CONFIG_ACPI */ - -extern int acpi_numa; -extern int acpi_scan_nodes(unsigned long start, unsigned long end); -#define NR_NODE_MEMBLKS (MAX_NUMNODES*2) - -extern int acpi_disabled; -extern int acpi_pci_disabled; - -#define ARCH_HAS_POWER_INIT 1 - -extern int acpi_skip_timer_override; -extern int acpi_use_timer_override; - -#ifdef CONFIG_ACPI_NUMA -extern void __init acpi_fake_nodes(const struct bootnode *fake_nodes, - int num_nodes); -#else -static inline void acpi_fake_nodes(const struct bootnode *fake_nodes, - int num_nodes) -{ -} -#endif - -#endif /*__KERNEL__*/ - -#endif /*_ASM_ACPI_H*/ diff --git a/include/asm-x86/agp.h b/include/asm-x86/agp.h index 62df2a9e7130..e4004a9f6a9a 100644 --- a/include/asm-x86/agp.h +++ b/include/asm-x86/agp.h @@ -12,13 +12,8 @@ * page. This avoids data corruption on some CPUs. */ -/* - * Caller's responsibility to call global_flush_tlb() for performance - * reasons - */ -#define map_page_into_agp(page) change_page_attr(page, 1, PAGE_KERNEL_NOCACHE) -#define unmap_page_from_agp(page) change_page_attr(page, 1, PAGE_KERNEL) -#define flush_agp_mappings() global_flush_tlb() +#define map_page_into_agp(page) set_pages_uc(page, 1) +#define unmap_page_from_agp(page) set_pages_wb(page, 1) /* * Could use CLFLUSH here if the cpu supports it. But then it would diff --git a/include/asm-x86/alternative.h b/include/asm-x86/alternative.h index 9eef6a32a130..d8bacf3c4b08 100644 --- a/include/asm-x86/alternative.h +++ b/include/asm-x86/alternative.h @@ -1,5 +1,161 @@ -#ifdef CONFIG_X86_32 -# include "alternative_32.h" +#ifndef _ASM_X86_ALTERNATIVE_H +#define _ASM_X86_ALTERNATIVE_H + +#include <linux/types.h> +#include <linux/stddef.h> +#include <asm/asm.h> + +/* + * Alternative inline assembly for SMP. + * + * The LOCK_PREFIX macro defined here replaces the LOCK and + * LOCK_PREFIX macros used everywhere in the source tree. + * + * SMP alternatives use the same data structures as the other + * alternatives and the X86_FEATURE_UP flag to indicate the case of a + * UP system running a SMP kernel. The existing apply_alternatives() + * works fine for patching a SMP kernel for UP. + * + * The SMP alternative tables can be kept after boot and contain both + * UP and SMP versions of the instructions to allow switching back to + * SMP at runtime, when hotplugging in a new CPU, which is especially + * useful in virtualized environments. + * + * The very common lock prefix is handled as special case in a + * separate table which is a pure address list without replacement ptr + * and size information. That keeps the table sizes small. + */ + +#ifdef CONFIG_SMP +#define LOCK_PREFIX \ + ".section .smp_locks,\"a\"\n" \ + _ASM_ALIGN "\n" \ + _ASM_PTR "661f\n" /* address */ \ + ".previous\n" \ + "661:\n\tlock; " + +#else /* ! CONFIG_SMP */ +#define LOCK_PREFIX "" +#endif + +/* This must be included *after* the definition of LOCK_PREFIX */ +#include <asm/cpufeature.h> + +struct alt_instr { + u8 *instr; /* original instruction */ + u8 *replacement; + u8 cpuid; /* cpuid bit set for replacement */ + u8 instrlen; /* length of original instruction */ + u8 replacementlen; /* length of new instruction, <= instrlen */ + u8 pad1; +#ifdef CONFIG_X86_64 + u32 pad2; +#endif +}; + +extern void alternative_instructions(void); +extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); + +struct module; + +#ifdef CONFIG_SMP +extern void alternatives_smp_module_add(struct module *mod, char *name, + void *locks, void *locks_end, + void *text, void *text_end); +extern void alternatives_smp_module_del(struct module *mod); +extern void alternatives_smp_switch(int smp); +#else +static inline void alternatives_smp_module_add(struct module *mod, char *name, + void *locks, void *locks_end, + void *text, void *text_end) {} +static inline void alternatives_smp_module_del(struct module *mod) {} +static inline void alternatives_smp_switch(int smp) {} +#endif /* CONFIG_SMP */ + +/* + * Alternative instructions for different CPU types or capabilities. + * + * This allows to use optimized instructions even on generic binary + * kernels. + * + * length of oldinstr must be longer or equal the length of newinstr + * It can be padded with nops as needed. + * + * For non barrier like inlines please define new variants + * without volatile and memory clobber. + */ +#define alternative(oldinstr, newinstr, feature) \ + asm volatile ("661:\n\t" oldinstr "\n662:\n" \ + ".section .altinstructions,\"a\"\n" \ + _ASM_ALIGN "\n" \ + _ASM_PTR "661b\n" /* label */ \ + _ASM_PTR "663f\n" /* new instruction */ \ + " .byte %c0\n" /* feature bit */ \ + " .byte 662b-661b\n" /* sourcelen */ \ + " .byte 664f-663f\n" /* replacementlen */ \ + ".previous\n" \ + ".section .altinstr_replacement,\"ax\"\n" \ + "663:\n\t" newinstr "\n664:\n" /* replacement */ \ + ".previous" :: "i" (feature) : "memory") + +/* + * Alternative inline assembly with input. + * + * Pecularities: + * No memory clobber here. + * Argument numbers start with 1. + * Best is to use constraints that are fixed size (like (%1) ... "r") + * If you use variable sized constraints like "m" or "g" in the + * replacement make sure to pad to the worst case length. + */ +#define alternative_input(oldinstr, newinstr, feature, input...) \ + asm volatile ("661:\n\t" oldinstr "\n662:\n" \ + ".section .altinstructions,\"a\"\n" \ + _ASM_ALIGN "\n" \ + _ASM_PTR "661b\n" /* label */ \ + _ASM_PTR "663f\n" /* new instruction */ \ + " .byte %c0\n" /* feature bit */ \ + " .byte 662b-661b\n" /* sourcelen */ \ + " .byte 664f-663f\n" /* replacementlen */ \ + ".previous\n" \ + ".section .altinstr_replacement,\"ax\"\n" \ + "663:\n\t" newinstr "\n664:\n" /* replacement */ \ + ".previous" :: "i" (feature), ##input) + +/* Like alternative_input, but with a single output argument */ +#define alternative_io(oldinstr, newinstr, feature, output, input...) \ + asm volatile ("661:\n\t" oldinstr "\n662:\n" \ + ".section .altinstructions,\"a\"\n" \ + _ASM_ALIGN "\n" \ + _ASM_PTR "661b\n" /* label */ \ + _ASM_PTR "663f\n" /* new instruction */ \ + " .byte %c[feat]\n" /* feature bit */ \ + " .byte 662b-661b\n" /* sourcelen */ \ + " .byte 664f-663f\n" /* replacementlen */ \ + ".previous\n" \ + ".section .altinstr_replacement,\"ax\"\n" \ + "663:\n\t" newinstr "\n664:\n" /* replacement */ \ + ".previous" : output : [feat] "i" (feature), ##input) + +/* + * use this macro(s) if you need more than one output parameter + * in alternative_io + */ +#define ASM_OUTPUT2(a, b) a, b + +struct paravirt_patch_site; +#ifdef CONFIG_PARAVIRT +void apply_paravirt(struct paravirt_patch_site *start, + struct paravirt_patch_site *end); #else -# include "alternative_64.h" +static inline void +apply_paravirt(struct paravirt_patch_site *start, + struct paravirt_patch_site *end) +{} +#define __parainstructions NULL +#define __parainstructions_end NULL #endif + +extern void text_poke(void *addr, unsigned char *opcode, int len); + +#endif /* _ASM_X86_ALTERNATIVE_H */ diff --git a/include/asm-x86/alternative_32.h b/include/asm-x86/alternative_32.h deleted file mode 100644 index bda6c810c0f4..000000000000 --- a/include/asm-x86/alternative_32.h +++ /dev/null @@ -1,154 +0,0 @@ -#ifndef _I386_ALTERNATIVE_H -#define _I386_ALTERNATIVE_H - -#include <asm/types.h> -#include <linux/stddef.h> -#include <linux/types.h> - -struct alt_instr { - u8 *instr; /* original instruction */ - u8 *replacement; - u8 cpuid; /* cpuid bit set for replacement */ - u8 instrlen; /* length of original instruction */ - u8 replacementlen; /* length of new instruction, <= instrlen */ - u8 pad; -}; - -extern void alternative_instructions(void); -extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); - -struct module; -#ifdef CONFIG_SMP -extern void alternatives_smp_module_add(struct module *mod, char *name, - void *locks, void *locks_end, - void *text, void *text_end); -extern void alternatives_smp_module_del(struct module *mod); -extern void alternatives_smp_switch(int smp); -#else -static inline void alternatives_smp_module_add(struct module *mod, char *name, - void *locks, void *locks_end, - void *text, void *text_end) {} -static inline void alternatives_smp_module_del(struct module *mod) {} -static inline void alternatives_smp_switch(int smp) {} -#endif /* CONFIG_SMP */ - -/* - * Alternative instructions for different CPU types or capabilities. - * - * This allows to use optimized instructions even on generic binary - * kernels. - * - * length of oldinstr must be longer or equal the length of newinstr - * It can be padded with nops as needed. - * - * For non barrier like inlines please define new variants - * without volatile and memory clobber. - */ -#define alternative(oldinstr, newinstr, feature) \ - asm volatile ("661:\n\t" oldinstr "\n662:\n" \ - ".section .altinstructions,\"a\"\n" \ - " .align 4\n" \ - " .long 661b\n" /* label */ \ - " .long 663f\n" /* new instruction */ \ - " .byte %c0\n" /* feature bit */ \ - " .byte 662b-661b\n" /* sourcelen */ \ - " .byte 664f-663f\n" /* replacementlen */ \ - ".previous\n" \ - ".section .altinstr_replacement,\"ax\"\n" \ - "663:\n\t" newinstr "\n664:\n" /* replacement */\ - ".previous" :: "i" (feature) : "memory") - -/* - * Alternative inline assembly with input. - * - * Pecularities: - * No memory clobber here. - * Argument numbers start with 1. - * Best is to use constraints that are fixed size (like (%1) ... "r") - * If you use variable sized constraints like "m" or "g" in the - * replacement maake sure to pad to the worst case length. - */ -#define alternative_input(oldinstr, newinstr, feature, input...) \ - asm volatile ("661:\n\t" oldinstr "\n662:\n" \ - ".section .altinstructions,\"a\"\n" \ - " .align 4\n" \ - " .long 661b\n" /* label */ \ - " .long 663f\n" /* new instruction */ \ - " .byte %c0\n" /* feature bit */ \ - " .byte 662b-661b\n" /* sourcelen */ \ - " .byte 664f-663f\n" /* replacementlen */ \ - ".previous\n" \ - ".section .altinstr_replacement,\"ax\"\n" \ - "663:\n\t" newinstr "\n664:\n" /* replacement */\ - ".previous" :: "i" (feature), ##input) - -/* Like alternative_input, but with a single output argument */ -#define alternative_io(oldinstr, newinstr, feature, output, input...) \ - asm volatile ("661:\n\t" oldinstr "\n662:\n" \ - ".section .altinstructions,\"a\"\n" \ - " .align 4\n" \ - " .long 661b\n" /* label */ \ - " .long 663f\n" /* new instruction */ \ - " .byte %c[feat]\n" /* feature bit */ \ - " .byte 662b-661b\n" /* sourcelen */ \ - " .byte 664f-663f\n" /* replacementlen */ \ - ".previous\n" \ - ".section .altinstr_replacement,\"ax\"\n" \ - "663:\n\t" newinstr "\n664:\n" /* replacement */ \ - ".previous" : output : [feat] "i" (feature), ##input) - -/* - * use this macro(s) if you need more than one output parameter - * in alternative_io - */ -#define ASM_OUTPUT2(a, b) a, b - -/* - * Alternative inline assembly for SMP. - * - * The LOCK_PREFIX macro defined here replaces the LOCK and - * LOCK_PREFIX macros used everywhere in the source tree. - * - * SMP alternatives use the same data structures as the other - * alternatives and the X86_FEATURE_UP flag to indicate the case of a - * UP system running a SMP kernel. The existing apply_alternatives() - * works fine for patching a SMP kernel for UP. - * - * The SMP alternative tables can be kept after boot and contain both - * UP and SMP versions of the instructions to allow switching back to - * SMP at runtime, when hotplugging in a new CPU, which is especially - * useful in virtualized environments. - * - * The very common lock prefix is handled as special case in a - * separate table which is a pure address list without replacement ptr - * and size information. That keeps the table sizes small. - */ - -#ifdef CONFIG_SMP -#define LOCK_PREFIX \ - ".section .smp_locks,\"a\"\n" \ - " .align 4\n" \ - " .long 661f\n" /* address */ \ - ".previous\n" \ - "661:\n\tlock; " - -#else /* ! CONFIG_SMP */ -#define LOCK_PREFIX "" -#endif - -struct paravirt_patch_site; -#ifdef CONFIG_PARAVIRT -void apply_paravirt(struct paravirt_patch_site *start, - struct paravirt_patch_site *end); -#else -static inline void -apply_paravirt(struct paravirt_patch_site *start, - struct paravirt_patch_site *end) -{} -#define __parainstructions NULL -#define __parainstructions_end NULL -#endif - -extern void text_poke(void *addr, unsigned char *opcode, int len); - -#endif /* _I386_ALTERNATIVE_H */ diff --git a/include/asm-x86/alternative_64.h b/include/asm-x86/alternative_64.h deleted file mode 100644 index ab161e810151..000000000000 --- a/include/asm-x86/alternative_64.h +++ /dev/null @@ -1,159 +0,0 @@ -#ifndef _X86_64_ALTERNATIVE_H -#define _X86_64_ALTERNATIVE_H - -#ifdef __KERNEL__ - -#include <linux/types.h> -#include <linux/stddef.h> - -/* - * Alternative inline assembly for SMP. - * - * The LOCK_PREFIX macro defined here replaces the LOCK and - * LOCK_PREFIX macros used everywhere in the source tree. - * - * SMP alternatives use the same data structures as the other - * alternatives and the X86_FEATURE_UP flag to indicate the case of a - * UP system running a SMP kernel. The existing apply_alternatives() - * works fine for patching a SMP kernel for UP. - * - * The SMP alternative tables can be kept after boot and contain both - * UP and SMP versions of the instructions to allow switching back to - * SMP at runtime, when hotplugging in a new CPU, which is especially - * useful in virtualized environments. - * - * The very common lock prefix is handled as special case in a - * separate table which is a pure address list without replacement ptr - * and size information. That keeps the table sizes small. - */ - -#ifdef CONFIG_SMP -#define LOCK_PREFIX \ - ".section .smp_locks,\"a\"\n" \ - " .align 8\n" \ - " .quad 661f\n" /* address */ \ - ".previous\n" \ - "661:\n\tlock; " - -#else /* ! CONFIG_SMP */ -#define LOCK_PREFIX "" -#endif - -/* This must be included *after* the definition of LOCK_PREFIX */ -#include <asm/cpufeature.h> - -struct alt_instr { - u8 *instr; /* original instruction */ - u8 *replacement; - u8 cpuid; /* cpuid bit set for replacement */ - u8 instrlen; /* length of original instruction */ - u8 replacementlen; /* length of new instruction, <= instrlen */ - u8 pad[5]; -}; - -extern void alternative_instructions(void); -extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); - -struct module; - -#ifdef CONFIG_SMP -extern void alternatives_smp_module_add(struct module *mod, char *name, - void *locks, void *locks_end, - void *text, void *text_end); -extern void alternatives_smp_module_del(struct module *mod); -extern void alternatives_smp_switch(int smp); -#else -static inline void alternatives_smp_module_add(struct module *mod, char *name, - void *locks, void *locks_end, - void *text, void *text_end) {} -static inline void alternatives_smp_module_del(struct module *mod) {} -static inline void alternatives_smp_switch(int smp) {} -#endif - -#endif - -/* - * Alternative instructions for different CPU types or capabilities. - * - * This allows to use optimized instructions even on generic binary - * kernels. - * - * length of oldinstr must be longer or equal the length of newinstr - * It can be padded with nops as needed. - * - * For non barrier like inlines please define new variants - * without volatile and memory clobber. - */ -#define alternative(oldinstr, newinstr, feature) \ - asm volatile ("661:\n\t" oldinstr "\n662:\n" \ - ".section .altinstructions,\"a\"\n" \ - " .align 8\n" \ - " .quad 661b\n" /* label */ \ - " .quad 663f\n" /* new instruction */ \ - " .byte %c0\n" /* feature bit */ \ - " .byte 662b-661b\n" /* sourcelen */ \ - " .byte 664f-663f\n" /* replacementlen */ \ - ".previous\n" \ - ".section .altinstr_replacement,\"ax\"\n" \ - "663:\n\t" newinstr "\n664:\n" /* replacement */ \ - ".previous" :: "i" (feature) : "memory") - -/* - * Alternative inline assembly with input. - * - * Pecularities: - * No memory clobber here. - * Argument numbers start with 1. - * Best is to use constraints that are fixed size (like (%1) ... "r") - * If you use variable sized constraints like "m" or "g" in the - * replacement make sure to pad to the worst case length. - */ -#define alternative_input(oldinstr, newinstr, feature, input...) \ - asm volatile ("661:\n\t" oldinstr "\n662:\n" \ - ".section .altinstructions,\"a\"\n" \ - " .align 8\n" \ - " .quad 661b\n" /* label */ \ - " .quad 663f\n" /* new instruction */ \ - " .byte %c0\n" /* feature bit */ \ - " .byte 662b-661b\n" /* sourcelen */ \ - " .byte 664f-663f\n" /* replacementlen */ \ - ".previous\n" \ - ".section .altinstr_replacement,\"ax\"\n" \ - "663:\n\t" newinstr "\n664:\n" /* replacement */ \ - ".previous" :: "i" (feature), ##input) - -/* Like alternative_input, but with a single output argument */ -#define alternative_io(oldinstr, newinstr, feature, output, input...) \ - asm volatile ("661:\n\t" oldinstr "\n662:\n" \ - ".section .altinstructions,\"a\"\n" \ - " .align 8\n" \ - " .quad 661b\n" /* label */ \ - " .quad 663f\n" /* new instruction */ \ - " .byte %c[feat]\n" /* feature bit */ \ - " .byte 662b-661b\n" /* sourcelen */ \ - " .byte 664f-663f\n" /* replacementlen */ \ - ".previous\n" \ - ".section .altinstr_replacement,\"ax\"\n" \ - "663:\n\t" newinstr "\n664:\n" /* replacement */ \ - ".previous" : output : [feat] "i" (feature), ##input) - -/* - * use this macro(s) if you need more than one output parameter - * in alternative_io - */ -#define ASM_OUTPUT2(a, b) a, b - -struct paravirt_patch; -#ifdef CONFIG_PARAVIRT -void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end); -#else -static inline void -apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end) -{} -#define __parainstructions NULL -#define __parainstructions_end NULL -#endif - -extern void text_poke(void *addr, unsigned char *opcode, int len); - -#endif /* _X86_64_ALTERNATIVE_H */ diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h index 9fbcc0bd2ac4..bcfc07fd3661 100644 --- a/include/asm-x86/apic.h +++ b/include/asm-x86/apic.h @@ -1,5 +1,140 @@ -#ifdef CONFIG_X86_32 -# include "apic_32.h" +#ifndef _ASM_X86_APIC_H +#define _ASM_X86_APIC_H + +#include <linux/pm.h> +#include <linux/delay.h> +#include <asm/fixmap.h> +#include <asm/apicdef.h> +#include <asm/processor.h> +#include <asm/system.h> + +#define ARCH_APICTIMER_STOPS_ON_C3 1 + +#define Dprintk(x...) + +/* + * Debugging macros + */ +#define APIC_QUIET 0 +#define APIC_VERBOSE 1 +#define APIC_DEBUG 2 + +/* + * Define the default level of output to be very little + * This can be turned up by using apic=verbose for more + * information and apic=debug for _lots_ of information. + * apic_verbosity is defined in apic.c + */ +#define apic_printk(v, s, a...) do { \ + if ((v) <= apic_verbosity) \ + printk(s, ##a); \ + } while (0) + + +extern void generic_apic_probe(void); + +#ifdef CONFIG_X86_LOCAL_APIC + +extern int apic_verbosity; +extern int timer_over_8254; +extern int local_apic_timer_c2_ok; +extern int local_apic_timer_disabled; + +extern int apic_runs_main_timer; +extern int ioapic_force; +extern int disable_apic; +extern int disable_apic_timer; +extern unsigned boot_cpu_id; + +/* + * Basic functions accessing APICs. + */ +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> #else -# include "apic_64.h" +#define apic_write native_apic_write +#define apic_write_atomic native_apic_write_atomic +#define apic_read native_apic_read +#define setup_boot_clock setup_boot_APIC_clock +#define setup_secondary_clock setup_secondary_APIC_clock #endif + +static inline void native_apic_write(unsigned long reg, u32 v) +{ + *((volatile u32 *)(APIC_BASE + reg)) = v; +} + +static inline void native_apic_write_atomic(unsigned long reg, u32 v) +{ + (void) xchg((u32*)(APIC_BASE + reg), v); +} + +static inline u32 native_apic_read(unsigned long reg) +{ + return *((volatile u32 *)(APIC_BASE + reg)); +} + +extern void apic_wait_icr_idle(void); +extern u32 safe_apic_wait_icr_idle(void); +extern int get_physical_broadcast(void); + +#ifdef CONFIG_X86_GOOD_APIC +# define FORCE_READ_AROUND_WRITE 0 +# define apic_read_around(x) +# define apic_write_around(x, y) apic_write((x), (y)) +#else +# define FORCE_READ_AROUND_WRITE 1 +# define apic_read_around(x) apic_read(x) +# define apic_write_around(x, y) apic_write_atomic((x), (y)) +#endif + +static inline void ack_APIC_irq(void) +{ + /* + * ack_APIC_irq() actually gets compiled as a single instruction: + * - a single rmw on Pentium/82489DX + * - a single write on P6+ cores (CONFIG_X86_GOOD_APIC) + * ... yummie. + */ + + /* Docs say use 0 for future compatibility */ + apic_write_around(APIC_EOI, 0); +} + +extern int lapic_get_maxlvt(void); +extern void clear_local_APIC(void); +extern void connect_bsp_APIC(void); +extern void disconnect_bsp_APIC(int virt_wire_setup); +extern void disable_local_APIC(void); +extern void lapic_shutdown(void); +extern int verify_local_APIC(void); +extern void cache_APIC_registers(void); +extern void sync_Arb_IDs(void); +extern void init_bsp_APIC(void); +extern void setup_local_APIC(void); +extern void end_local_APIC_setup(void); +extern void init_apic_mappings(void); +extern void setup_boot_APIC_clock(void); +extern void setup_secondary_APIC_clock(void); +extern int APIC_init_uniprocessor(void); +extern void enable_NMI_through_LVT0(void); + +/* + * On 32bit this is mach-xxx local + */ +#ifdef CONFIG_X86_64 +extern void setup_apic_routing(void); +#endif + +extern u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask); +extern u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask); + +extern int apic_is_clustered_box(void); + +#else /* !CONFIG_X86_LOCAL_APIC */ +static inline void lapic_shutdown(void) { } +#define local_apic_timer_c2_ok 1 + +#endif /* !CONFIG_X86_LOCAL_APIC */ + +#endif /* __ASM_APIC_H */ diff --git a/include/asm-x86/apic_32.h b/include/asm-x86/apic_32.h deleted file mode 100644 index be158b27d54b..000000000000 --- a/include/asm-x86/apic_32.h +++ /dev/null @@ -1,127 +0,0 @@ -#ifndef __ASM_APIC_H -#define __ASM_APIC_H - -#include <linux/pm.h> -#include <linux/delay.h> -#include <asm/fixmap.h> -#include <asm/apicdef.h> -#include <asm/processor.h> -#include <asm/system.h> - -#define Dprintk(x...) - -/* - * Debugging macros - */ -#define APIC_QUIET 0 -#define APIC_VERBOSE 1 -#define APIC_DEBUG 2 - -extern int apic_verbosity; - -/* - * Define the default level of output to be very little - * This can be turned up by using apic=verbose for more - * information and apic=debug for _lots_ of information. - * apic_verbosity is defined in apic.c - */ -#define apic_printk(v, s, a...) do { \ - if ((v) <= apic_verbosity) \ - printk(s, ##a); \ - } while (0) - - -extern void generic_apic_probe(void); - -#ifdef CONFIG_X86_LOCAL_APIC - -/* - * Basic functions accessing APICs. - */ -#ifdef CONFIG_PARAVIRT -#include <asm/paravirt.h> -#else -#define apic_write native_apic_write -#define apic_write_atomic native_apic_write_atomic -#define apic_read native_apic_read -#define setup_boot_clock setup_boot_APIC_clock -#define setup_secondary_clock setup_secondary_APIC_clock -#endif - -static __inline fastcall void native_apic_write(unsigned long reg, - unsigned long v) -{ - *((volatile unsigned long *)(APIC_BASE+reg)) = v; -} - -static __inline fastcall void native_apic_write_atomic(unsigned long reg, - unsigned long v) -{ - xchg((volatile unsigned long *)(APIC_BASE+reg), v); -} - -static __inline fastcall unsigned long native_apic_read(unsigned long reg) -{ - return *((volatile unsigned long *)(APIC_BASE+reg)); -} - -void apic_wait_icr_idle(void); -unsigned long safe_apic_wait_icr_idle(void); -int get_physical_broadcast(void); - -#ifdef CONFIG_X86_GOOD_APIC -# define FORCE_READ_AROUND_WRITE 0 -# define apic_read_around(x) -# define apic_write_around(x,y) apic_write((x),(y)) -#else -# define FORCE_READ_AROUND_WRITE 1 -# define apic_read_around(x) apic_read(x) -# define apic_write_around(x,y) apic_write_atomic((x),(y)) -#endif - -static inline void ack_APIC_irq(void) -{ - /* - * ack_APIC_irq() actually gets compiled as a single instruction: - * - a single rmw on Pentium/82489DX - * - a single write on P6+ cores (CONFIG_X86_GOOD_APIC) - * ... yummie. - */ - - /* Docs say use 0 for future compatibility */ - apic_write_around(APIC_EOI, 0); -} - -extern int lapic_get_maxlvt(void); -extern void clear_local_APIC(void); -extern void connect_bsp_APIC (void); -extern void disconnect_bsp_APIC (int virt_wire_setup); -extern void disable_local_APIC (void); -extern void lapic_shutdown (void); -extern int verify_local_APIC (void); -extern void cache_APIC_registers (void); -extern void sync_Arb_IDs (void); -extern void init_bsp_APIC (void); -extern void setup_local_APIC (void); -extern void init_apic_mappings (void); -extern void smp_local_timer_interrupt (void); -extern void setup_boot_APIC_clock (void); -extern void setup_secondary_APIC_clock (void); -extern int APIC_init_uniprocessor (void); - -extern void enable_NMI_through_LVT0 (void * dummy); - -#define ARCH_APICTIMER_STOPS_ON_C3 1 - -extern int timer_over_8254; -extern int local_apic_timer_c2_ok; - -extern int local_apic_timer_disabled; - -#else /* !CONFIG_X86_LOCAL_APIC */ -static inline void lapic_shutdown(void) { } -#define local_apic_timer_c2_ok 1 - -#endif /* !CONFIG_X86_LOCAL_APIC */ - -#endif /* __ASM_APIC_H */ diff --git a/include/asm-x86/apic_64.h b/include/asm-x86/apic_64.h deleted file mode 100644 index 2747a11a2b19..000000000000 --- a/include/asm-x86/apic_64.h +++ /dev/null @@ -1,102 +0,0 @@ -#ifndef __ASM_APIC_H -#define __ASM_APIC_H - -#include <linux/pm.h> -#include <linux/delay.h> -#include <asm/fixmap.h> -#include <asm/apicdef.h> -#include <asm/system.h> - -#define Dprintk(x...) - -/* - * Debugging macros - */ -#define APIC_QUIET 0 -#define APIC_VERBOSE 1 -#define APIC_DEBUG 2 - -extern int apic_verbosity; -extern int apic_runs_main_timer; -extern int ioapic_force; -extern int disable_apic_timer; - -/* - * Define the default level of output to be very little - * This can be turned up by using apic=verbose for more - * information and apic=debug for _lots_ of information. - * apic_verbosity is defined in apic.c - */ -#define apic_printk(v, s, a...) do { \ - if ((v) <= apic_verbosity) \ - printk(s, ##a); \ - } while (0) - -struct pt_regs; - -/* - * Basic functions accessing APICs. - */ - -static __inline void apic_write(unsigned long reg, unsigned int v) -{ - *((volatile unsigned int *)(APIC_BASE+reg)) = v; -} - -static __inline unsigned int apic_read(unsigned long reg) -{ - return *((volatile unsigned int *)(APIC_BASE+reg)); -} - -extern void apic_wait_icr_idle(void); -extern unsigned int safe_apic_wait_icr_idle(void); - -static inline void ack_APIC_irq(void) -{ - /* - * ack_APIC_irq() actually gets compiled as a single instruction: - * - a single rmw on Pentium/82489DX - * - a single write on P6+ cores (CONFIG_X86_GOOD_APIC) - * ... yummie. - */ - - /* Docs say use 0 for future compatibility */ - apic_write(APIC_EOI, 0); -} - -extern int get_maxlvt (void); -extern void clear_local_APIC (void); -extern void connect_bsp_APIC (void); -extern void disconnect_bsp_APIC (int virt_wire_setup); -extern void disable_local_APIC (void); -extern void lapic_shutdown (void); -extern int verify_local_APIC (void); -extern void cache_APIC_registers (void); -extern void sync_Arb_IDs (void); -extern void init_bsp_APIC (void); -extern void setup_local_APIC (void); -extern void init_apic_mappings (void); -extern void smp_local_timer_interrupt (void); -extern void setup_boot_APIC_clock (void); -extern void setup_secondary_APIC_clock (void); -extern int APIC_init_uniprocessor (void); -extern void setup_apic_routing(void); - -extern void setup_APIC_extended_lvt(unsigned char lvt_off, unsigned char vector, - unsigned char msg_type, unsigned char mask); - -extern int apic_is_clustered_box(void); - -#define K8_APIC_EXT_LVT_BASE 0x500 -#define K8_APIC_EXT_INT_MSG_FIX 0x0 -#define K8_APIC_EXT_INT_MSG_SMI 0x2 -#define K8_APIC_EXT_INT_MSG_NMI 0x4 -#define K8_APIC_EXT_INT_MSG_EXT 0x7 -#define K8_APIC_EXT_LVT_ENTRY_THRESHOLD 0 - -#define ARCH_APICTIMER_STOPS_ON_C3 1 - -extern unsigned boot_cpu_id; -extern int local_apic_timer_c2_ok; - -#endif /* __ASM_APIC_H */ diff --git a/include/asm-x86/apicdef.h b/include/asm-x86/apicdef.h index 4542c220bf4d..550af7a6f88e 100644 --- a/include/asm-x86/apicdef.h +++ b/include/asm-x86/apicdef.h @@ -1,5 +1,413 @@ +#ifndef _ASM_X86_APICDEF_H +#define _ASM_X86_APICDEF_H + +/* + * Constants for various Intel APICs. (local APIC, IOAPIC, etc.) + * + * Alan Cox <Alan.Cox@linux.org>, 1995. + * Ingo Molnar <mingo@redhat.com>, 1999, 2000 + */ + +#define APIC_DEFAULT_PHYS_BASE 0xfee00000 + +#define APIC_ID 0x20 + +#ifdef CONFIG_X86_64 +# define APIC_ID_MASK (0xFFu<<24) +# define GET_APIC_ID(x) (((x)>>24)&0xFFu) +# define SET_APIC_ID(x) (((x)<<24)) +#endif + +#define APIC_LVR 0x30 +#define APIC_LVR_MASK 0xFF00FF +#define GET_APIC_VERSION(x) ((x)&0xFFu) +#define GET_APIC_MAXLVT(x) (((x)>>16)&0xFFu) +#define APIC_INTEGRATED(x) ((x)&0xF0u) +#define APIC_XAPIC(x) ((x) >= 0x14) +#define APIC_TASKPRI 0x80 +#define APIC_TPRI_MASK 0xFFu +#define APIC_ARBPRI 0x90 +#define APIC_ARBPRI_MASK 0xFFu +#define APIC_PROCPRI 0xA0 +#define APIC_EOI 0xB0 +#define APIC_EIO_ACK 0x0 +#define APIC_RRR 0xC0 +#define APIC_LDR 0xD0 +#define APIC_LDR_MASK (0xFFu<<24) +#define GET_APIC_LOGICAL_ID(x) (((x)>>24)&0xFFu) +#define SET_APIC_LOGICAL_ID(x) (((x)<<24)) +#define APIC_ALL_CPUS 0xFFu +#define APIC_DFR 0xE0 +#define APIC_DFR_CLUSTER 0x0FFFFFFFul +#define APIC_DFR_FLAT 0xFFFFFFFFul +#define APIC_SPIV 0xF0 +#define APIC_SPIV_FOCUS_DISABLED (1<<9) +#define APIC_SPIV_APIC_ENABLED (1<<8) +#define APIC_ISR 0x100 +#define APIC_ISR_NR 0x8 /* Number of 32 bit ISR registers. */ +#define APIC_TMR 0x180 +#define APIC_IRR 0x200 +#define APIC_ESR 0x280 +#define APIC_ESR_SEND_CS 0x00001 +#define APIC_ESR_RECV_CS 0x00002 +#define APIC_ESR_SEND_ACC 0x00004 +#define APIC_ESR_RECV_ACC 0x00008 +#define APIC_ESR_SENDILL 0x00020 +#define APIC_ESR_RECVILL 0x00040 +#define APIC_ESR_ILLREGA 0x00080 +#define APIC_ICR 0x300 +#define APIC_DEST_SELF 0x40000 +#define APIC_DEST_ALLINC 0x80000 +#define APIC_DEST_ALLBUT 0xC0000 +#define APIC_ICR_RR_MASK 0x30000 +#define APIC_ICR_RR_INVALID 0x00000 +#define APIC_ICR_RR_INPROG 0x10000 +#define APIC_ICR_RR_VALID 0x20000 +#define APIC_INT_LEVELTRIG 0x08000 +#define APIC_INT_ASSERT 0x04000 +#define APIC_ICR_BUSY 0x01000 +#define APIC_DEST_LOGICAL 0x00800 +#define APIC_DEST_PHYSICAL 0x00000 +#define APIC_DM_FIXED 0x00000 +#define APIC_DM_LOWEST 0x00100 +#define APIC_DM_SMI 0x00200 +#define APIC_DM_REMRD 0x00300 +#define APIC_DM_NMI 0x00400 +#define APIC_DM_INIT 0x00500 +#define APIC_DM_STARTUP 0x00600 +#define APIC_DM_EXTINT 0x00700 +#define APIC_VECTOR_MASK 0x000FF +#define APIC_ICR2 0x310 +#define GET_APIC_DEST_FIELD(x) (((x)>>24)&0xFF) +#define SET_APIC_DEST_FIELD(x) ((x)<<24) +#define APIC_LVTT 0x320 +#define APIC_LVTTHMR 0x330 +#define APIC_LVTPC 0x340 +#define APIC_LVT0 0x350 +#define APIC_LVT_TIMER_BASE_MASK (0x3<<18) +#define GET_APIC_TIMER_BASE(x) (((x)>>18)&0x3) +#define SET_APIC_TIMER_BASE(x) (((x)<<18)) +#define APIC_TIMER_BASE_CLKIN 0x0 +#define APIC_TIMER_BASE_TMBASE 0x1 +#define APIC_TIMER_BASE_DIV 0x2 +#define APIC_LVT_TIMER_PERIODIC (1<<17) +#define APIC_LVT_MASKED (1<<16) +#define APIC_LVT_LEVEL_TRIGGER (1<<15) +#define APIC_LVT_REMOTE_IRR (1<<14) +#define APIC_INPUT_POLARITY (1<<13) +#define APIC_SEND_PENDING (1<<12) +#define APIC_MODE_MASK 0x700 +#define GET_APIC_DELIVERY_MODE(x) (((x)>>8)&0x7) +#define SET_APIC_DELIVERY_MODE(x, y) (((x)&~0x700)|((y)<<8)) +#define APIC_MODE_FIXED 0x0 +#define APIC_MODE_NMI 0x4 +#define APIC_MODE_EXTINT 0x7 +#define APIC_LVT1 0x360 +#define APIC_LVTERR 0x370 +#define APIC_TMICT 0x380 +#define APIC_TMCCT 0x390 +#define APIC_TDCR 0x3E0 +#define APIC_TDR_DIV_TMBASE (1<<2) +#define APIC_TDR_DIV_1 0xB +#define APIC_TDR_DIV_2 0x0 +#define APIC_TDR_DIV_4 0x1 +#define APIC_TDR_DIV_8 0x2 +#define APIC_TDR_DIV_16 0x3 +#define APIC_TDR_DIV_32 0x8 +#define APIC_TDR_DIV_64 0x9 +#define APIC_TDR_DIV_128 0xA +#define APIC_EILVT0 0x500 +#define APIC_EILVT_NR_AMD_K8 1 /* Number of extended interrupts */ +#define APIC_EILVT_NR_AMD_10H 4 +#define APIC_EILVT_LVTOFF(x) (((x)>>4)&0xF) +#define APIC_EILVT_MSG_FIX 0x0 +#define APIC_EILVT_MSG_SMI 0x2 +#define APIC_EILVT_MSG_NMI 0x4 +#define APIC_EILVT_MSG_EXT 0x7 +#define APIC_EILVT_MASKED (1<<16) +#define APIC_EILVT1 0x510 +#define APIC_EILVT2 0x520 +#define APIC_EILVT3 0x530 + +#define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) + #ifdef CONFIG_X86_32 -# include "apicdef_32.h" +# define MAX_IO_APICS 64 #else -# include "apicdef_64.h" +# define MAX_IO_APICS 128 +# define MAX_LOCAL_APIC 256 +#endif + +/* + * All x86-64 systems are xAPIC compatible. + * In the following, "apicid" is a physical APIC ID. + */ +#define XAPIC_DEST_CPUS_SHIFT 4 +#define XAPIC_DEST_CPUS_MASK ((1u << XAPIC_DEST_CPUS_SHIFT) - 1) +#define XAPIC_DEST_CLUSTER_MASK (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT) +#define APIC_CLUSTER(apicid) ((apicid) & XAPIC_DEST_CLUSTER_MASK) +#define APIC_CLUSTERID(apicid) (APIC_CLUSTER(apicid) >> XAPIC_DEST_CPUS_SHIFT) +#define APIC_CPUID(apicid) ((apicid) & XAPIC_DEST_CPUS_MASK) +#define NUM_APIC_CLUSTERS ((BAD_APICID + 1) >> XAPIC_DEST_CPUS_SHIFT) + +/* + * the local APIC register structure, memory mapped. Not terribly well + * tested, but we might eventually use this one in the future - the + * problem why we cannot use it right now is the P5 APIC, it has an + * errata which cannot take 8-bit reads and writes, only 32-bit ones ... + */ +#define u32 unsigned int + +struct local_apic { + +/*000*/ struct { u32 __reserved[4]; } __reserved_01; + +/*010*/ struct { u32 __reserved[4]; } __reserved_02; + +/*020*/ struct { /* APIC ID Register */ + u32 __reserved_1 : 24, + phys_apic_id : 4, + __reserved_2 : 4; + u32 __reserved[3]; + } id; + +/*030*/ const + struct { /* APIC Version Register */ + u32 version : 8, + __reserved_1 : 8, + max_lvt : 8, + __reserved_2 : 8; + u32 __reserved[3]; + } version; + +/*040*/ struct { u32 __reserved[4]; } __reserved_03; + +/*050*/ struct { u32 __reserved[4]; } __reserved_04; + +/*060*/ struct { u32 __reserved[4]; } __reserved_05; + +/*070*/ struct { u32 __reserved[4]; } __reserved_06; + +/*080*/ struct { /* Task Priority Register */ + u32 priority : 8, + __reserved_1 : 24; + u32 __reserved_2[3]; + } tpr; + +/*090*/ const + struct { /* Arbitration Priority Register */ + u32 priority : 8, + __reserved_1 : 24; + u32 __reserved_2[3]; + } apr; + +/*0A0*/ const + struct { /* Processor Priority Register */ + u32 priority : 8, + __reserved_1 : 24; + u32 __reserved_2[3]; + } ppr; + +/*0B0*/ struct { /* End Of Interrupt Register */ + u32 eoi; + u32 __reserved[3]; + } eoi; + +/*0C0*/ struct { u32 __reserved[4]; } __reserved_07; + +/*0D0*/ struct { /* Logical Destination Register */ + u32 __reserved_1 : 24, + logical_dest : 8; + u32 __reserved_2[3]; + } ldr; + +/*0E0*/ struct { /* Destination Format Register */ + u32 __reserved_1 : 28, + model : 4; + u32 __reserved_2[3]; + } dfr; + +/*0F0*/ struct { /* Spurious Interrupt Vector Register */ + u32 spurious_vector : 8, + apic_enabled : 1, + focus_cpu : 1, + __reserved_2 : 22; + u32 __reserved_3[3]; + } svr; + +/*100*/ struct { /* In Service Register */ +/*170*/ u32 bitfield; + u32 __reserved[3]; + } isr [8]; + +/*180*/ struct { /* Trigger Mode Register */ +/*1F0*/ u32 bitfield; + u32 __reserved[3]; + } tmr [8]; + +/*200*/ struct { /* Interrupt Request Register */ +/*270*/ u32 bitfield; + u32 __reserved[3]; + } irr [8]; + +/*280*/ union { /* Error Status Register */ + struct { + u32 send_cs_error : 1, + receive_cs_error : 1, + send_accept_error : 1, + receive_accept_error : 1, + __reserved_1 : 1, + send_illegal_vector : 1, + receive_illegal_vector : 1, + illegal_register_address : 1, + __reserved_2 : 24; + u32 __reserved_3[3]; + } error_bits; + struct { + u32 errors; + u32 __reserved_3[3]; + } all_errors; + } esr; + +/*290*/ struct { u32 __reserved[4]; } __reserved_08; + +/*2A0*/ struct { u32 __reserved[4]; } __reserved_09; + +/*2B0*/ struct { u32 __reserved[4]; } __reserved_10; + +/*2C0*/ struct { u32 __reserved[4]; } __reserved_11; + +/*2D0*/ struct { u32 __reserved[4]; } __reserved_12; + +/*2E0*/ struct { u32 __reserved[4]; } __reserved_13; + +/*2F0*/ struct { u32 __reserved[4]; } __reserved_14; + +/*300*/ struct { /* Interrupt Command Register 1 */ + u32 vector : 8, + delivery_mode : 3, + destination_mode : 1, + delivery_status : 1, + __reserved_1 : 1, + level : 1, + trigger : 1, + __reserved_2 : 2, + shorthand : 2, + __reserved_3 : 12; + u32 __reserved_4[3]; + } icr1; + +/*310*/ struct { /* Interrupt Command Register 2 */ + union { + u32 __reserved_1 : 24, + phys_dest : 4, + __reserved_2 : 4; + u32 __reserved_3 : 24, + logical_dest : 8; + } dest; + u32 __reserved_4[3]; + } icr2; + +/*320*/ struct { /* LVT - Timer */ + u32 vector : 8, + __reserved_1 : 4, + delivery_status : 1, + __reserved_2 : 3, + mask : 1, + timer_mode : 1, + __reserved_3 : 14; + u32 __reserved_4[3]; + } lvt_timer; + +/*330*/ struct { /* LVT - Thermal Sensor */ + u32 vector : 8, + delivery_mode : 3, + __reserved_1 : 1, + delivery_status : 1, + __reserved_2 : 3, + mask : 1, + __reserved_3 : 15; + u32 __reserved_4[3]; + } lvt_thermal; + +/*340*/ struct { /* LVT - Performance Counter */ + u32 vector : 8, + delivery_mode : 3, + __reserved_1 : 1, + delivery_status : 1, + __reserved_2 : 3, + mask : 1, + __reserved_3 : 15; + u32 __reserved_4[3]; + } lvt_pc; + +/*350*/ struct { /* LVT - LINT0 */ + u32 vector : 8, + delivery_mode : 3, + __reserved_1 : 1, + delivery_status : 1, + polarity : 1, + remote_irr : 1, + trigger : 1, + mask : 1, + __reserved_2 : 15; + u32 __reserved_3[3]; + } lvt_lint0; + +/*360*/ struct { /* LVT - LINT1 */ + u32 vector : 8, + delivery_mode : 3, + __reserved_1 : 1, + delivery_status : 1, + polarity : 1, + remote_irr : 1, + trigger : 1, + mask : 1, + __reserved_2 : 15; + u32 __reserved_3[3]; + } lvt_lint1; + +/*370*/ struct { /* LVT - Error */ + u32 vector : 8, + __reserved_1 : 4, + delivery_status : 1, + __reserved_2 : 3, + mask : 1, + __reserved_3 : 15; + u32 __reserved_4[3]; + } lvt_error; + +/*380*/ struct { /* Timer Initial Count Register */ + u32 initial_count; + u32 __reserved_2[3]; + } timer_icr; + +/*390*/ const + struct { /* Timer Current Count Register */ + u32 curr_count; + u32 __reserved_2[3]; + } timer_ccr; + +/*3A0*/ struct { u32 __reserved[4]; } __reserved_16; + +/*3B0*/ struct { u32 __reserved[4]; } __reserved_17; + +/*3C0*/ struct { u32 __reserved[4]; } __reserved_18; + +/*3D0*/ struct { u32 __reserved[4]; } __reserved_19; + +/*3E0*/ struct { /* Timer Divide Configuration Register */ + u32 divisor : 4, + __reserved_1 : 28; + u32 __reserved_2[3]; + } timer_dcr; + +/*3F0*/ struct { u32 __reserved[4]; } __reserved_20; + +} __attribute__ ((packed)); + +#undef u32 + +#define BAD_APICID 0xFFu + #endif diff --git a/include/asm-x86/apicdef_32.h b/include/asm-x86/apicdef_32.h deleted file mode 100644 index 9f6995341fdc..000000000000 --- a/include/asm-x86/apicdef_32.h +++ /dev/null @@ -1,375 +0,0 @@ -#ifndef __ASM_APICDEF_H -#define __ASM_APICDEF_H - -/* - * Constants for various Intel APICs. (local APIC, IOAPIC, etc.) - * - * Alan Cox <Alan.Cox@linux.org>, 1995. - * Ingo Molnar <mingo@redhat.com>, 1999, 2000 - */ - -#define APIC_DEFAULT_PHYS_BASE 0xfee00000 - -#define APIC_ID 0x20 -#define APIC_LVR 0x30 -#define APIC_LVR_MASK 0xFF00FF -#define GET_APIC_VERSION(x) ((x)&0xFF) -#define GET_APIC_MAXLVT(x) (((x)>>16)&0xFF) -#define APIC_INTEGRATED(x) ((x)&0xF0) -#define APIC_XAPIC(x) ((x) >= 0x14) -#define APIC_TASKPRI 0x80 -#define APIC_TPRI_MASK 0xFF -#define APIC_ARBPRI 0x90 -#define APIC_ARBPRI_MASK 0xFF -#define APIC_PROCPRI 0xA0 -#define APIC_EOI 0xB0 -#define APIC_EIO_ACK 0x0 /* Write this to the EOI register */ -#define APIC_RRR 0xC0 -#define APIC_LDR 0xD0 -#define APIC_LDR_MASK (0xFF<<24) -#define GET_APIC_LOGICAL_ID(x) (((x)>>24)&0xFF) -#define SET_APIC_LOGICAL_ID(x) (((x)<<24)) -#define APIC_ALL_CPUS 0xFF -#define APIC_DFR 0xE0 -#define APIC_DFR_CLUSTER 0x0FFFFFFFul -#define APIC_DFR_FLAT 0xFFFFFFFFul -#define APIC_SPIV 0xF0 -#define APIC_SPIV_FOCUS_DISABLED (1<<9) -#define APIC_SPIV_APIC_ENABLED (1<<8) -#define APIC_ISR 0x100 -#define APIC_ISR_NR 0x8 /* Number of 32 bit ISR registers. */ -#define APIC_TMR 0x180 -#define APIC_IRR 0x200 -#define APIC_ESR 0x280 -#define APIC_ESR_SEND_CS 0x00001 -#define APIC_ESR_RECV_CS 0x00002 -#define APIC_ESR_SEND_ACC 0x00004 -#define APIC_ESR_RECV_ACC 0x00008 -#define APIC_ESR_SENDILL 0x00020 -#define APIC_ESR_RECVILL 0x00040 -#define APIC_ESR_ILLREGA 0x00080 -#define APIC_ICR 0x300 -#define APIC_DEST_SELF 0x40000 -#define APIC_DEST_ALLINC 0x80000 -#define APIC_DEST_ALLBUT 0xC0000 -#define APIC_ICR_RR_MASK 0x30000 -#define APIC_ICR_RR_INVALID 0x00000 -#define APIC_ICR_RR_INPROG 0x10000 -#define APIC_ICR_RR_VALID 0x20000 -#define APIC_INT_LEVELTRIG 0x08000 -#define APIC_INT_ASSERT 0x04000 -#define APIC_ICR_BUSY 0x01000 -#define APIC_DEST_LOGICAL 0x00800 -#define APIC_DM_FIXED 0x00000 -#define APIC_DM_LOWEST 0x00100 -#define APIC_DM_SMI 0x00200 -#define APIC_DM_REMRD 0x00300 -#define APIC_DM_NMI 0x00400 -#define APIC_DM_INIT 0x00500 -#define APIC_DM_STARTUP 0x00600 -#define APIC_DM_EXTINT 0x00700 -#define APIC_VECTOR_MASK 0x000FF -#define APIC_ICR2 0x310 -#define GET_APIC_DEST_FIELD(x) (((x)>>24)&0xFF) -#define SET_APIC_DEST_FIELD(x) ((x)<<24) -#define APIC_LVTT 0x320 -#define APIC_LVTTHMR 0x330 -#define APIC_LVTPC 0x340 -#define APIC_LVT0 0x350 -#define APIC_LVT_TIMER_BASE_MASK (0x3<<18) -#define GET_APIC_TIMER_BASE(x) (((x)>>18)&0x3) -#define SET_APIC_TIMER_BASE(x) (((x)<<18)) -#define APIC_TIMER_BASE_CLKIN 0x0 -#define APIC_TIMER_BASE_TMBASE 0x1 -#define APIC_TIMER_BASE_DIV 0x2 -#define APIC_LVT_TIMER_PERIODIC (1<<17) -#define APIC_LVT_MASKED (1<<16) -#define APIC_LVT_LEVEL_TRIGGER (1<<15) -#define APIC_LVT_REMOTE_IRR (1<<14) -#define APIC_INPUT_POLARITY (1<<13) -#define APIC_SEND_PENDING (1<<12) -#define APIC_MODE_MASK 0x700 -#define GET_APIC_DELIVERY_MODE(x) (((x)>>8)&0x7) -#define SET_APIC_DELIVERY_MODE(x,y) (((x)&~0x700)|((y)<<8)) -#define APIC_MODE_FIXED 0x0 -#define APIC_MODE_NMI 0x4 -#define APIC_MODE_EXTINT 0x7 -#define APIC_LVT1 0x360 -#define APIC_LVTERR 0x370 -#define APIC_TMICT 0x380 -#define APIC_TMCCT 0x390 -#define APIC_TDCR 0x3E0 -#define APIC_TDR_DIV_TMBASE (1<<2) -#define APIC_TDR_DIV_1 0xB -#define APIC_TDR_DIV_2 0x0 -#define APIC_TDR_DIV_4 0x1 -#define APIC_TDR_DIV_8 0x2 -#define APIC_TDR_DIV_16 0x3 -#define APIC_TDR_DIV_32 0x8 -#define APIC_TDR_DIV_64 0x9 -#define APIC_TDR_DIV_128 0xA - -#define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) - -#define MAX_IO_APICS 64 - -/* - * the local APIC register structure, memory mapped. Not terribly well - * tested, but we might eventually use this one in the future - the - * problem why we cannot use it right now is the P5 APIC, it has an - * errata which cannot take 8-bit reads and writes, only 32-bit ones ... - */ -#define u32 unsigned int - - -struct local_apic { - -/*000*/ struct { u32 __reserved[4]; } __reserved_01; - -/*010*/ struct { u32 __reserved[4]; } __reserved_02; - -/*020*/ struct { /* APIC ID Register */ - u32 __reserved_1 : 24, - phys_apic_id : 4, - __reserved_2 : 4; - u32 __reserved[3]; - } id; - -/*030*/ const - struct { /* APIC Version Register */ - u32 version : 8, - __reserved_1 : 8, - max_lvt : 8, - __reserved_2 : 8; - u32 __reserved[3]; - } version; - -/*040*/ struct { u32 __reserved[4]; } __reserved_03; - -/*050*/ struct { u32 __reserved[4]; } __reserved_04; - -/*060*/ struct { u32 __reserved[4]; } __reserved_05; - -/*070*/ struct { u32 __reserved[4]; } __reserved_06; - -/*080*/ struct { /* Task Priority Register */ - u32 priority : 8, - __reserved_1 : 24; - u32 __reserved_2[3]; - } tpr; - -/*090*/ const - struct { /* Arbitration Priority Register */ - u32 priority : 8, - __reserved_1 : 24; - u32 __reserved_2[3]; - } apr; - -/*0A0*/ const - struct { /* Processor Priority Register */ - u32 priority : 8, - __reserved_1 : 24; - u32 __reserved_2[3]; - } ppr; - -/*0B0*/ struct { /* End Of Interrupt Register */ - u32 eoi; - u32 __reserved[3]; - } eoi; - -/*0C0*/ struct { u32 __reserved[4]; } __reserved_07; - -/*0D0*/ struct { /* Logical Destination Register */ - u32 __reserved_1 : 24, - logical_dest : 8; - u32 __reserved_2[3]; - } ldr; - -/*0E0*/ struct { /* Destination Format Register */ - u32 __reserved_1 : 28, - model : 4; - u32 __reserved_2[3]; - } dfr; - -/*0F0*/ struct { /* Spurious Interrupt Vector Register */ - u32 spurious_vector : 8, - apic_enabled : 1, - focus_cpu : 1, - __reserved_2 : 22; - u32 __reserved_3[3]; - } svr; - -/*100*/ struct { /* In Service Register */ -/*170*/ u32 bitfield; - u32 __reserved[3]; - } isr [8]; - -/*180*/ struct { /* Trigger Mode Register */ -/*1F0*/ u32 bitfield; - u32 __reserved[3]; - } tmr [8]; - -/*200*/ struct { /* Interrupt Request Register */ -/*270*/ u32 bitfield; - u32 __reserved[3]; - } irr [8]; - -/*280*/ union { /* Error Status Register */ - struct { - u32 send_cs_error : 1, - receive_cs_error : 1, - send_accept_error : 1, - receive_accept_error : 1, - __reserved_1 : 1, - send_illegal_vector : 1, - receive_illegal_vector : 1, - illegal_register_address : 1, - __reserved_2 : 24; - u32 __reserved_3[3]; - } error_bits; - struct { - u32 errors; - u32 __reserved_3[3]; - } all_errors; - } esr; - -/*290*/ struct { u32 __reserved[4]; } __reserved_08; - -/*2A0*/ struct { u32 __reserved[4]; } __reserved_09; - -/*2B0*/ struct { u32 __reserved[4]; } __reserved_10; - -/*2C0*/ struct { u32 __reserved[4]; } __reserved_11; - -/*2D0*/ struct { u32 __reserved[4]; } __reserved_12; - -/*2E0*/ struct { u32 __reserved[4]; } __reserved_13; - -/*2F0*/ struct { u32 __reserved[4]; } __reserved_14; - -/*300*/ struct { /* Interrupt Command Register 1 */ - u32 vector : 8, - delivery_mode : 3, - destination_mode : 1, - delivery_status : 1, - __reserved_1 : 1, - level : 1, - trigger : 1, - __reserved_2 : 2, - shorthand : 2, - __reserved_3 : 12; - u32 __reserved_4[3]; - } icr1; - -/*310*/ struct { /* Interrupt Command Register 2 */ - union { - u32 __reserved_1 : 24, - phys_dest : 4, - __reserved_2 : 4; - u32 __reserved_3 : 24, - logical_dest : 8; - } dest; - u32 __reserved_4[3]; - } icr2; - -/*320*/ struct { /* LVT - Timer */ - u32 vector : 8, - __reserved_1 : 4, - delivery_status : 1, - __reserved_2 : 3, - mask : 1, - timer_mode : 1, - __reserved_3 : 14; - u32 __reserved_4[3]; - } lvt_timer; - -/*330*/ struct { /* LVT - Thermal Sensor */ - u32 vector : 8, - delivery_mode : 3, - __reserved_1 : 1, - delivery_status : 1, - __reserved_2 : 3, - mask : 1, - __reserved_3 : 15; - u32 __reserved_4[3]; - } lvt_thermal; - -/*340*/ struct { /* LVT - Performance Counter */ - u32 vector : 8, - delivery_mode : 3, - __reserved_1 : 1, - delivery_status : 1, - __reserved_2 : 3, - mask : 1, - __reserved_3 : 15; - u32 __reserved_4[3]; - } lvt_pc; - -/*350*/ struct { /* LVT - LINT0 */ - u32 vector : 8, - delivery_mode : 3, - __reserved_1 : 1, - delivery_status : 1, - polarity : 1, - remote_irr : 1, - trigger : 1, - mask : 1, - __reserved_2 : 15; - u32 __reserved_3[3]; - } lvt_lint0; - -/*360*/ struct { /* LVT - LINT1 */ - u32 vector : 8, - delivery_mode : 3, - __reserved_1 : 1, - delivery_status : 1, - polarity : 1, - remote_irr : 1, - trigger : 1, - mask : 1, - __reserved_2 : 15; - u32 __reserved_3[3]; - } lvt_lint1; - -/*370*/ struct { /* LVT - Error */ - u32 vector : 8, - __reserved_1 : 4, - delivery_status : 1, - __reserved_2 : 3, - mask : 1, - __reserved_3 : 15; - u32 __reserved_4[3]; - } lvt_error; - -/*380*/ struct { /* Timer Initial Count Register */ - u32 initial_count; - u32 __reserved_2[3]; - } timer_icr; - -/*390*/ const - struct { /* Timer Current Count Register */ - u32 curr_count; - u32 __reserved_2[3]; - } timer_ccr; - -/*3A0*/ struct { u32 __reserved[4]; } __reserved_16; - -/*3B0*/ struct { u32 __reserved[4]; } __reserved_17; - -/*3C0*/ struct { u32 __reserved[4]; } __reserved_18; - -/*3D0*/ struct { u32 __reserved[4]; } __reserved_19; - -/*3E0*/ struct { /* Timer Divide Configuration Register */ - u32 divisor : 4, - __reserved_1 : 28; - u32 __reserved_2[3]; - } timer_dcr; - -/*3F0*/ struct { u32 __reserved[4]; } __reserved_20; - -} __attribute__ ((packed)); - -#undef u32 - -#endif diff --git a/include/asm-x86/apicdef_64.h b/include/asm-x86/apicdef_64.h deleted file mode 100644 index 1dd40067c67c..000000000000 --- a/include/asm-x86/apicdef_64.h +++ /dev/null @@ -1,392 +0,0 @@ -#ifndef __ASM_APICDEF_H -#define __ASM_APICDEF_H - -/* - * Constants for various Intel APICs. (local APIC, IOAPIC, etc.) - * - * Alan Cox <Alan.Cox@linux.org>, 1995. - * Ingo Molnar <mingo@redhat.com>, 1999, 2000 - */ - -#define APIC_DEFAULT_PHYS_BASE 0xfee00000 - -#define APIC_ID 0x20 -#define APIC_ID_MASK (0xFFu<<24) -#define GET_APIC_ID(x) (((x)>>24)&0xFFu) -#define SET_APIC_ID(x) (((x)<<24)) -#define APIC_LVR 0x30 -#define APIC_LVR_MASK 0xFF00FF -#define GET_APIC_VERSION(x) ((x)&0xFFu) -#define GET_APIC_MAXLVT(x) (((x)>>16)&0xFFu) -#define APIC_INTEGRATED(x) ((x)&0xF0u) -#define APIC_TASKPRI 0x80 -#define APIC_TPRI_MASK 0xFFu -#define APIC_ARBPRI 0x90 -#define APIC_ARBPRI_MASK 0xFFu -#define APIC_PROCPRI 0xA0 -#define APIC_EOI 0xB0 -#define APIC_EIO_ACK 0x0 /* Write this to the EOI register */ -#define APIC_RRR 0xC0 -#define APIC_LDR 0xD0 -#define APIC_LDR_MASK (0xFFu<<24) -#define GET_APIC_LOGICAL_ID(x) (((x)>>24)&0xFFu) -#define SET_APIC_LOGICAL_ID(x) (((x)<<24)) -#define APIC_ALL_CPUS 0xFFu -#define APIC_DFR 0xE0 -#define APIC_DFR_CLUSTER 0x0FFFFFFFul -#define APIC_DFR_FLAT 0xFFFFFFFFul -#define APIC_SPIV 0xF0 -#define APIC_SPIV_FOCUS_DISABLED (1<<9) -#define APIC_SPIV_APIC_ENABLED (1<<8) -#define APIC_ISR 0x100 -#define APIC_ISR_NR 0x8 /* Number of 32 bit ISR registers. */ -#define APIC_TMR 0x180 -#define APIC_IRR 0x200 -#define APIC_ESR 0x280 -#define APIC_ESR_SEND_CS 0x00001 -#define APIC_ESR_RECV_CS 0x00002 -#define APIC_ESR_SEND_ACC 0x00004 -#define APIC_ESR_RECV_ACC 0x00008 -#define APIC_ESR_SENDILL 0x00020 -#define APIC_ESR_RECVILL 0x00040 -#define APIC_ESR_ILLREGA 0x00080 -#define APIC_ICR 0x300 -#define APIC_DEST_SELF 0x40000 -#define APIC_DEST_ALLINC 0x80000 -#define APIC_DEST_ALLBUT 0xC0000 -#define APIC_ICR_RR_MASK 0x30000 -#define APIC_ICR_RR_INVALID 0x00000 -#define APIC_ICR_RR_INPROG 0x10000 -#define APIC_ICR_RR_VALID 0x20000 -#define APIC_INT_LEVELTRIG 0x08000 -#define APIC_INT_ASSERT 0x04000 -#define APIC_ICR_BUSY 0x01000 -#define APIC_DEST_LOGICAL 0x00800 -#define APIC_DEST_PHYSICAL 0x00000 -#define APIC_DM_FIXED 0x00000 -#define APIC_DM_LOWEST 0x00100 -#define APIC_DM_SMI 0x00200 -#define APIC_DM_REMRD 0x00300 -#define APIC_DM_NMI 0x00400 -#define APIC_DM_INIT 0x00500 -#define APIC_DM_STARTUP 0x00600 -#define APIC_DM_EXTINT 0x00700 -#define APIC_VECTOR_MASK 0x000FF -#define APIC_ICR2 0x310 -#define GET_APIC_DEST_FIELD(x) (((x)>>24)&0xFF) -#define SET_APIC_DEST_FIELD(x) ((x)<<24) -#define APIC_LVTT 0x320 -#define APIC_LVTTHMR 0x330 -#define APIC_LVTPC 0x340 -#define APIC_LVT0 0x350 -#define APIC_LVT_TIMER_BASE_MASK (0x3<<18) -#define GET_APIC_TIMER_BASE(x) (((x)>>18)&0x3) -#define SET_APIC_TIMER_BASE(x) (((x)<<18)) -#define APIC_TIMER_BASE_CLKIN 0x0 -#define APIC_TIMER_BASE_TMBASE 0x1 -#define APIC_TIMER_BASE_DIV 0x2 -#define APIC_LVT_TIMER_PERIODIC (1<<17) -#define APIC_LVT_MASKED (1<<16) -#define APIC_LVT_LEVEL_TRIGGER (1<<15) -#define APIC_LVT_REMOTE_IRR (1<<14) -#define APIC_INPUT_POLARITY (1<<13) -#define APIC_SEND_PENDING (1<<12) -#define APIC_MODE_MASK 0x700 -#define GET_APIC_DELIVERY_MODE(x) (((x)>>8)&0x7) -#define SET_APIC_DELIVERY_MODE(x,y) (((x)&~0x700)|((y)<<8)) -#define APIC_MODE_FIXED 0x0 -#define APIC_MODE_NMI 0x4 -#define APIC_MODE_EXTINT 0x7 -#define APIC_LVT1 0x360 -#define APIC_LVTERR 0x370 -#define APIC_TMICT 0x380 -#define APIC_TMCCT 0x390 -#define APIC_TDCR 0x3E0 -#define APIC_TDR_DIV_TMBASE (1<<2) -#define APIC_TDR_DIV_1 0xB -#define APIC_TDR_DIV_2 0x0 -#define APIC_TDR_DIV_4 0x1 -#define APIC_TDR_DIV_8 0x2 -#define APIC_TDR_DIV_16 0x3 -#define APIC_TDR_DIV_32 0x8 -#define APIC_TDR_DIV_64 0x9 -#define APIC_TDR_DIV_128 0xA - -#define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) - -#define MAX_IO_APICS 128 -#define MAX_LOCAL_APIC 256 - -/* - * All x86-64 systems are xAPIC compatible. - * In the following, "apicid" is a physical APIC ID. - */ -#define XAPIC_DEST_CPUS_SHIFT 4 -#define XAPIC_DEST_CPUS_MASK ((1u << XAPIC_DEST_CPUS_SHIFT) - 1) -#define XAPIC_DEST_CLUSTER_MASK (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT) -#define APIC_CLUSTER(apicid) ((apicid) & XAPIC_DEST_CLUSTER_MASK) -#define APIC_CLUSTERID(apicid) (APIC_CLUSTER(apicid) >> XAPIC_DEST_CPUS_SHIFT) -#define APIC_CPUID(apicid) ((apicid) & XAPIC_DEST_CPUS_MASK) -#define NUM_APIC_CLUSTERS ((BAD_APICID + 1) >> XAPIC_DEST_CPUS_SHIFT) - -/* - * the local APIC register structure, memory mapped. Not terribly well - * tested, but we might eventually use this one in the future - the - * problem why we cannot use it right now is the P5 APIC, it has an - * errata which cannot take 8-bit reads and writes, only 32-bit ones ... - */ -#define u32 unsigned int - -struct local_apic { - -/*000*/ struct { u32 __reserved[4]; } __reserved_01; - -/*010*/ struct { u32 __reserved[4]; } __reserved_02; - -/*020*/ struct { /* APIC ID Register */ - u32 __reserved_1 : 24, - phys_apic_id : 4, - __reserved_2 : 4; - u32 __reserved[3]; - } id; - -/*030*/ const - struct { /* APIC Version Register */ - u32 version : 8, - __reserved_1 : 8, - max_lvt : 8, - __reserved_2 : 8; - u32 __reserved[3]; - } version; - -/*040*/ struct { u32 __reserved[4]; } __reserved_03; - -/*050*/ struct { u32 __reserved[4]; } __reserved_04; - -/*060*/ struct { u32 __reserved[4]; } __reserved_05; - -/*070*/ struct { u32 __reserved[4]; } __reserved_06; - -/*080*/ struct { /* Task Priority Register */ - u32 priority : 8, - __reserved_1 : 24; - u32 __reserved_2[3]; - } tpr; - -/*090*/ const - struct { /* Arbitration Priority Register */ - u32 priority : 8, - __reserved_1 : 24; - u32 __reserved_2[3]; - } apr; - -/*0A0*/ const - struct { /* Processor Priority Register */ - u32 priority : 8, - __reserved_1 : 24; - u32 __reserved_2[3]; - } ppr; - -/*0B0*/ struct { /* End Of Interrupt Register */ - u32 eoi; - u32 __reserved[3]; - } eoi; - -/*0C0*/ struct { u32 __reserved[4]; } __reserved_07; - -/*0D0*/ struct { /* Logical Destination Register */ - u32 __reserved_1 : 24, - logical_dest : 8; - u32 __reserved_2[3]; - } ldr; - -/*0E0*/ struct { /* Destination Format Register */ - u32 __reserved_1 : 28, - model : 4; - u32 __reserved_2[3]; - } dfr; - -/*0F0*/ struct { /* Spurious Interrupt Vector Register */ - u32 spurious_vector : 8, - apic_enabled : 1, - focus_cpu : 1, - __reserved_2 : 22; - u32 __reserved_3[3]; - } svr; - -/*100*/ struct { /* In Service Register */ -/*170*/ u32 bitfield; - u32 __reserved[3]; - } isr [8]; - -/*180*/ struct { /* Trigger Mode Register */ -/*1F0*/ u32 bitfield; - u32 __reserved[3]; - } tmr [8]; - -/*200*/ struct { /* Interrupt Request Register */ -/*270*/ u32 bitfield; - u32 __reserved[3]; - } irr [8]; - -/*280*/ union { /* Error Status Register */ - struct { - u32 send_cs_error : 1, - receive_cs_error : 1, - send_accept_error : 1, - receive_accept_error : 1, - __reserved_1 : 1, - send_illegal_vector : 1, - receive_illegal_vector : 1, - illegal_register_address : 1, - __reserved_2 : 24; - u32 __reserved_3[3]; - } error_bits; - struct { - u32 errors; - u32 __reserved_3[3]; - } all_errors; - } esr; - -/*290*/ struct { u32 __reserved[4]; } __reserved_08; - -/*2A0*/ struct { u32 __reserved[4]; } __reserved_09; - -/*2B0*/ struct { u32 __reserved[4]; } __reserved_10; - -/*2C0*/ struct { u32 __reserved[4]; } __reserved_11; - -/*2D0*/ struct { u32 __reserved[4]; } __reserved_12; - -/*2E0*/ struct { u32 __reserved[4]; } __reserved_13; - -/*2F0*/ struct { u32 __reserved[4]; } __reserved_14; - -/*300*/ struct { /* Interrupt Command Register 1 */ - u32 vector : 8, - delivery_mode : 3, - destination_mode : 1, - delivery_status : 1, - __reserved_1 : 1, - level : 1, - trigger : 1, - __reserved_2 : 2, - shorthand : 2, - __reserved_3 : 12; - u32 __reserved_4[3]; - } icr1; - -/*310*/ struct { /* Interrupt Command Register 2 */ - union { - u32 __reserved_1 : 24, - phys_dest : 4, - __reserved_2 : 4; - u32 __reserved_3 : 24, - logical_dest : 8; - } dest; - u32 __reserved_4[3]; - } icr2; - -/*320*/ struct { /* LVT - Timer */ - u32 vector : 8, - __reserved_1 : 4, - delivery_status : 1, - __reserved_2 : 3, - mask : 1, - timer_mode : 1, - __reserved_3 : 14; - u32 __reserved_4[3]; - } lvt_timer; - -/*330*/ struct { /* LVT - Thermal Sensor */ - u32 vector : 8, - delivery_mode : 3, - __reserved_1 : 1, - delivery_status : 1, - __reserved_2 : 3, - mask : 1, - __reserved_3 : 15; - u32 __reserved_4[3]; - } lvt_thermal; - -/*340*/ struct { /* LVT - Performance Counter */ - u32 vector : 8, - delivery_mode : 3, - __reserved_1 : 1, - delivery_status : 1, - __reserved_2 : 3, - mask : 1, - __reserved_3 : 15; - u32 __reserved_4[3]; - } lvt_pc; - -/*350*/ struct { /* LVT - LINT0 */ - u32 vector : 8, - delivery_mode : 3, - __reserved_1 : 1, - delivery_status : 1, - polarity : 1, - remote_irr : 1, - trigger : 1, - mask : 1, - __reserved_2 : 15; - u32 __reserved_3[3]; - } lvt_lint0; - -/*360*/ struct { /* LVT - LINT1 */ - u32 vector : 8, - delivery_mode : 3, - __reserved_1 : 1, - delivery_status : 1, - polarity : 1, - remote_irr : 1, - trigger : 1, - mask : 1, - __reserved_2 : 15; - u32 __reserved_3[3]; - } lvt_lint1; - -/*370*/ struct { /* LVT - Error */ - u32 vector : 8, - __reserved_1 : 4, - delivery_status : 1, - __reserved_2 : 3, - mask : 1, - __reserved_3 : 15; - u32 __reserved_4[3]; - } lvt_error; - -/*380*/ struct { /* Timer Initial Count Register */ - u32 initial_count; - u32 __reserved_2[3]; - } timer_icr; - -/*390*/ const - struct { /* Timer Current Count Register */ - u32 curr_count; - u32 __reserved_2[3]; - } timer_ccr; - -/*3A0*/ struct { u32 __reserved[4]; } __reserved_16; - -/*3B0*/ struct { u32 __reserved[4]; } __reserved_17; - -/*3C0*/ struct { u32 __reserved[4]; } __reserved_18; - -/*3D0*/ struct { u32 __reserved[4]; } __reserved_19; - -/*3E0*/ struct { /* Timer Divide Configuration Register */ - u32 divisor : 4, - __reserved_1 : 28; - u32 __reserved_2[3]; - } timer_dcr; - -/*3F0*/ struct { u32 __reserved[4]; } __reserved_20; - -} __attribute__ ((packed)); - -#undef u32 - -#define BAD_APICID 0xFFu - -#endif diff --git a/include/asm-x86/arch_hooks.h b/include/asm-x86/arch_hooks.h index a8c1fca9726d..768aee8a04ef 100644 --- a/include/asm-x86/arch_hooks.h +++ b/include/asm-x86/arch_hooks.h @@ -6,7 +6,7 @@ /* * linux/include/asm/arch_hooks.h * - * define the architecture specific hooks + * define the architecture specific hooks */ /* these aren't arch hooks, they are generic routines @@ -24,7 +24,4 @@ extern void trap_init_hook(void); extern void time_init_hook(void); extern void mca_nmi_hook(void); -extern int setup_early_printk(char *); -extern void early_printk(const char *fmt, ...) __attribute__((format(printf,1,2))); - #endif diff --git a/include/asm-x86/asm.h b/include/asm-x86/asm.h new file mode 100644 index 000000000000..1a6980a60fc6 --- /dev/null +++ b/include/asm-x86/asm.h @@ -0,0 +1,32 @@ +#ifndef _ASM_X86_ASM_H +#define _ASM_X86_ASM_H + +#ifdef CONFIG_X86_32 +/* 32 bits */ + +# define _ASM_PTR " .long " +# define _ASM_ALIGN " .balign 4 " +# define _ASM_MOV_UL " movl " + +# define _ASM_INC " incl " +# define _ASM_DEC " decl " +# define _ASM_ADD " addl " +# define _ASM_SUB " subl " +# define _ASM_XADD " xaddl " + +#else +/* 64 bits */ + +# define _ASM_PTR " .quad " +# define _ASM_ALIGN " .balign 8 " +# define _ASM_MOV_UL " movq " + +# define _ASM_INC " incq " +# define _ASM_DEC " decq " +# define _ASM_ADD " addq " +# define _ASM_SUB " subq " +# define _ASM_XADD " xaddq " + +#endif /* CONFIG_X86_32 */ + +#endif /* _ASM_X86_ASM_H */ diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h index 07e3f6d4fe47..1a23ce1a5697 100644 --- a/include/asm-x86/bitops.h +++ b/include/asm-x86/bitops.h @@ -1,5 +1,321 @@ +#ifndef _ASM_X86_BITOPS_H +#define _ASM_X86_BITOPS_H + +/* + * Copyright 1992, Linus Torvalds. + */ + +#ifndef _LINUX_BITOPS_H +#error only <linux/bitops.h> can be included directly +#endif + +#include <linux/compiler.h> +#include <asm/alternative.h> + +/* + * These have to be done with inline assembly: that way the bit-setting + * is guaranteed to be atomic. All bit operations return 0 if the bit + * was cleared before the operation and != 0 if it was not. + * + * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). + */ + +#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1) +/* Technically wrong, but this avoids compilation errors on some gcc + versions. */ +#define ADDR "=m" (*(volatile long *) addr) +#else +#define ADDR "+m" (*(volatile long *) addr) +#endif + +/** + * set_bit - Atomically set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * This function is atomic and may not be reordered. See __set_bit() + * if you do not require the atomic guarantees. + * + * Note: there are no guarantees that this function will not be reordered + * on non x86 architectures, so if you are writing portable code, + * make sure not to rely on its reordering guarantees. + * + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static inline void set_bit(int nr, volatile void *addr) +{ + asm volatile(LOCK_PREFIX "bts %1,%0" + : ADDR + : "Ir" (nr) : "memory"); +} + +/** + * __set_bit - Set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * Unlike set_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static inline void __set_bit(int nr, volatile void *addr) +{ + asm volatile("bts %1,%0" + : ADDR + : "Ir" (nr) : "memory"); +} + + +/** + * clear_bit - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * clear_bit() is atomic and may not be reordered. However, it does + * not contain a memory barrier, so if it is used for locking purposes, + * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() + * in order to ensure changes are visible on other processors. + */ +static inline void clear_bit(int nr, volatile void *addr) +{ + asm volatile(LOCK_PREFIX "btr %1,%0" + : ADDR + : "Ir" (nr)); +} + +/* + * clear_bit_unlock - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * clear_bit() is atomic and implies release semantics before the memory + * operation. It can be used for an unlock. + */ +static inline void clear_bit_unlock(unsigned nr, volatile void *addr) +{ + barrier(); + clear_bit(nr, addr); +} + +static inline void __clear_bit(int nr, volatile void *addr) +{ + asm volatile("btr %1,%0" : ADDR : "Ir" (nr)); +} + +/* + * __clear_bit_unlock - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * __clear_bit() is non-atomic and implies release semantics before the memory + * operation. It can be used for an unlock if no other CPUs can concurrently + * modify other bits in the word. + * + * No memory barrier is required here, because x86 cannot reorder stores past + * older loads. Same principle as spin_unlock. + */ +static inline void __clear_bit_unlock(unsigned nr, volatile void *addr) +{ + barrier(); + __clear_bit(nr, addr); +} + +#define smp_mb__before_clear_bit() barrier() +#define smp_mb__after_clear_bit() barrier() + +/** + * __change_bit - Toggle a bit in memory + * @nr: the bit to change + * @addr: the address to start counting from + * + * Unlike change_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static inline void __change_bit(int nr, volatile void *addr) +{ + asm volatile("btc %1,%0" : ADDR : "Ir" (nr)); +} + +/** + * change_bit - Toggle a bit in memory + * @nr: Bit to change + * @addr: Address to start counting from + * + * change_bit() is atomic and may not be reordered. + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static inline void change_bit(int nr, volatile void *addr) +{ + asm volatile(LOCK_PREFIX "btc %1,%0" + : ADDR : "Ir" (nr)); +} + +/** + * test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static inline int test_and_set_bit(int nr, volatile void *addr) +{ + int oldbit; + + asm volatile(LOCK_PREFIX "bts %2,%1\n\t" + "sbb %0,%0" + : "=r" (oldbit), ADDR + : "Ir" (nr) : "memory"); + + return oldbit; +} + +/** + * test_and_set_bit_lock - Set a bit and return its old value for lock + * @nr: Bit to set + * @addr: Address to count from + * + * This is the same as test_and_set_bit on x86. + */ +static inline int test_and_set_bit_lock(int nr, volatile void *addr) +{ + return test_and_set_bit(nr, addr); +} + +/** + * __test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + * If two examples of this operation race, one can appear to succeed + * but actually fail. You must protect multiple accesses with a lock. + */ +static inline int __test_and_set_bit(int nr, volatile void *addr) +{ + int oldbit; + + asm("bts %2,%1\n\t" + "sbb %0,%0" + : "=r" (oldbit), ADDR + : "Ir" (nr)); + return oldbit; +} + +/** + * test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to clear + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static inline int test_and_clear_bit(int nr, volatile void *addr) +{ + int oldbit; + + asm volatile(LOCK_PREFIX "btr %2,%1\n\t" + "sbb %0,%0" + : "=r" (oldbit), ADDR + : "Ir" (nr) : "memory"); + + return oldbit; +} + +/** + * __test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to clear + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + * If two examples of this operation race, one can appear to succeed + * but actually fail. You must protect multiple accesses with a lock. + */ +static inline int __test_and_clear_bit(int nr, volatile void *addr) +{ + int oldbit; + + asm volatile("btr %2,%1\n\t" + "sbb %0,%0" + : "=r" (oldbit), ADDR + : "Ir" (nr)); + return oldbit; +} + +/* WARNING: non atomic and it can be reordered! */ +static inline int __test_and_change_bit(int nr, volatile void *addr) +{ + int oldbit; + + asm volatile("btc %2,%1\n\t" + "sbb %0,%0" + : "=r" (oldbit), ADDR + : "Ir" (nr) : "memory"); + + return oldbit; +} + +/** + * test_and_change_bit - Change a bit and return its old value + * @nr: Bit to change + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static inline int test_and_change_bit(int nr, volatile void *addr) +{ + int oldbit; + + asm volatile(LOCK_PREFIX "btc %2,%1\n\t" + "sbb %0,%0" + : "=r" (oldbit), ADDR + : "Ir" (nr) : "memory"); + + return oldbit; +} + +static inline int constant_test_bit(int nr, const volatile void *addr) +{ + return ((1UL << (nr % BITS_PER_LONG)) & + (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0; +} + +static inline int variable_test_bit(int nr, volatile const void *addr) +{ + int oldbit; + + asm volatile("bt %2,%1\n\t" + "sbb %0,%0" + : "=r" (oldbit) + : "m" (*(unsigned long *)addr), "Ir" (nr)); + + return oldbit; +} + +#if 0 /* Fool kernel-doc since it doesn't do macros yet */ +/** + * test_bit - Determine whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static int test_bit(int nr, const volatile unsigned long *addr); +#endif + +#define test_bit(nr,addr) \ + (__builtin_constant_p(nr) ? \ + constant_test_bit((nr),(addr)) : \ + variable_test_bit((nr),(addr))) + +#undef ADDR + #ifdef CONFIG_X86_32 # include "bitops_32.h" #else # include "bitops_64.h" #endif + +#endif /* _ASM_X86_BITOPS_H */ diff --git a/include/asm-x86/bitops_32.h b/include/asm-x86/bitops_32.h index 0b40f6d20bea..e4d75fcf9c03 100644 --- a/include/asm-x86/bitops_32.h +++ b/include/asm-x86/bitops_32.h @@ -5,320 +5,12 @@ * Copyright 1992, Linus Torvalds. */ -#ifndef _LINUX_BITOPS_H -#error only <linux/bitops.h> can be included directly -#endif - -#include <linux/compiler.h> -#include <asm/alternative.h> - -/* - * These have to be done with inline assembly: that way the bit-setting - * is guaranteed to be atomic. All bit operations return 0 if the bit - * was cleared before the operation and != 0 if it was not. - * - * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). - */ - -#define ADDR (*(volatile long *) addr) - -/** - * set_bit - Atomically set a bit in memory - * @nr: the bit to set - * @addr: the address to start counting from - * - * This function is atomic and may not be reordered. See __set_bit() - * if you do not require the atomic guarantees. - * - * Note: there are no guarantees that this function will not be reordered - * on non x86 architectures, so if you are writing portable code, - * make sure not to rely on its reordering guarantees. - * - * Note that @nr may be almost arbitrarily large; this function is not - * restricted to acting on a single-word quantity. - */ -static inline void set_bit(int nr, volatile unsigned long * addr) -{ - __asm__ __volatile__( LOCK_PREFIX - "btsl %1,%0" - :"+m" (ADDR) - :"Ir" (nr)); -} - -/** - * __set_bit - Set a bit in memory - * @nr: the bit to set - * @addr: the address to start counting from - * - * Unlike set_bit(), this function is non-atomic and may be reordered. - * If it's called on the same region of memory simultaneously, the effect - * may be that only one operation succeeds. - */ -static inline void __set_bit(int nr, volatile unsigned long * addr) -{ - __asm__( - "btsl %1,%0" - :"+m" (ADDR) - :"Ir" (nr)); -} - -/** - * clear_bit - Clears a bit in memory - * @nr: Bit to clear - * @addr: Address to start counting from - * - * clear_bit() is atomic and may not be reordered. However, it does - * not contain a memory barrier, so if it is used for locking purposes, - * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() - * in order to ensure changes are visible on other processors. - */ -static inline void clear_bit(int nr, volatile unsigned long * addr) -{ - __asm__ __volatile__( LOCK_PREFIX - "btrl %1,%0" - :"+m" (ADDR) - :"Ir" (nr)); -} - -/* - * clear_bit_unlock - Clears a bit in memory - * @nr: Bit to clear - * @addr: Address to start counting from - * - * clear_bit() is atomic and implies release semantics before the memory - * operation. It can be used for an unlock. - */ -static inline void clear_bit_unlock(unsigned long nr, volatile unsigned long *addr) -{ - barrier(); - clear_bit(nr, addr); -} - -static inline void __clear_bit(int nr, volatile unsigned long * addr) -{ - __asm__ __volatile__( - "btrl %1,%0" - :"+m" (ADDR) - :"Ir" (nr)); -} - -/* - * __clear_bit_unlock - Clears a bit in memory - * @nr: Bit to clear - * @addr: Address to start counting from - * - * __clear_bit() is non-atomic and implies release semantics before the memory - * operation. It can be used for an unlock if no other CPUs can concurrently - * modify other bits in the word. - * - * No memory barrier is required here, because x86 cannot reorder stores past - * older loads. Same principle as spin_unlock. - */ -static inline void __clear_bit_unlock(unsigned long nr, volatile unsigned long *addr) -{ - barrier(); - __clear_bit(nr, addr); -} - -#define smp_mb__before_clear_bit() barrier() -#define smp_mb__after_clear_bit() barrier() - -/** - * __change_bit - Toggle a bit in memory - * @nr: the bit to change - * @addr: the address to start counting from - * - * Unlike change_bit(), this function is non-atomic and may be reordered. - * If it's called on the same region of memory simultaneously, the effect - * may be that only one operation succeeds. - */ -static inline void __change_bit(int nr, volatile unsigned long * addr) -{ - __asm__ __volatile__( - "btcl %1,%0" - :"+m" (ADDR) - :"Ir" (nr)); -} - -/** - * change_bit - Toggle a bit in memory - * @nr: Bit to change - * @addr: Address to start counting from - * - * change_bit() is atomic and may not be reordered. It may be - * reordered on other architectures than x86. - * Note that @nr may be almost arbitrarily large; this function is not - * restricted to acting on a single-word quantity. - */ -static inline void change_bit(int nr, volatile unsigned long * addr) -{ - __asm__ __volatile__( LOCK_PREFIX - "btcl %1,%0" - :"+m" (ADDR) - :"Ir" (nr)); -} - -/** - * test_and_set_bit - Set a bit and return its old value - * @nr: Bit to set - * @addr: Address to count from - * - * This operation is atomic and cannot be reordered. - * It may be reordered on other architectures than x86. - * It also implies a memory barrier. - */ -static inline int test_and_set_bit(int nr, volatile unsigned long * addr) -{ - int oldbit; - - __asm__ __volatile__( LOCK_PREFIX - "btsl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit),"+m" (ADDR) - :"Ir" (nr) : "memory"); - return oldbit; -} - -/** - * test_and_set_bit_lock - Set a bit and return its old value for lock - * @nr: Bit to set - * @addr: Address to count from - * - * This is the same as test_and_set_bit on x86. - */ -static inline int test_and_set_bit_lock(int nr, volatile unsigned long *addr) -{ - return test_and_set_bit(nr, addr); -} - -/** - * __test_and_set_bit - Set a bit and return its old value - * @nr: Bit to set - * @addr: Address to count from - * - * This operation is non-atomic and can be reordered. - * If two examples of this operation race, one can appear to succeed - * but actually fail. You must protect multiple accesses with a lock. - */ -static inline int __test_and_set_bit(int nr, volatile unsigned long * addr) -{ - int oldbit; - - __asm__( - "btsl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit),"+m" (ADDR) - :"Ir" (nr)); - return oldbit; -} - -/** - * test_and_clear_bit - Clear a bit and return its old value - * @nr: Bit to clear - * @addr: Address to count from - * - * This operation is atomic and cannot be reordered. - * It can be reorderdered on other architectures other than x86. - * It also implies a memory barrier. - */ -static inline int test_and_clear_bit(int nr, volatile unsigned long * addr) -{ - int oldbit; - - __asm__ __volatile__( LOCK_PREFIX - "btrl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit),"+m" (ADDR) - :"Ir" (nr) : "memory"); - return oldbit; -} - -/** - * __test_and_clear_bit - Clear a bit and return its old value - * @nr: Bit to clear - * @addr: Address to count from - * - * This operation is non-atomic and can be reordered. - * If two examples of this operation race, one can appear to succeed - * but actually fail. You must protect multiple accesses with a lock. - */ -static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) -{ - int oldbit; - - __asm__( - "btrl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit),"+m" (ADDR) - :"Ir" (nr)); - return oldbit; -} - -/* WARNING: non atomic and it can be reordered! */ -static inline int __test_and_change_bit(int nr, volatile unsigned long *addr) -{ - int oldbit; - - __asm__ __volatile__( - "btcl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit),"+m" (ADDR) - :"Ir" (nr) : "memory"); - return oldbit; -} - -/** - * test_and_change_bit - Change a bit and return its old value - * @nr: Bit to change - * @addr: Address to count from - * - * This operation is atomic and cannot be reordered. - * It also implies a memory barrier. - */ -static inline int test_and_change_bit(int nr, volatile unsigned long* addr) -{ - int oldbit; - - __asm__ __volatile__( LOCK_PREFIX - "btcl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit),"+m" (ADDR) - :"Ir" (nr) : "memory"); - return oldbit; -} - -#if 0 /* Fool kernel-doc since it doesn't do macros yet */ -/** - * test_bit - Determine whether a bit is set - * @nr: bit number to test - * @addr: Address to start counting from - */ -static int test_bit(int nr, const volatile void * addr); -#endif - -static __always_inline int constant_test_bit(int nr, const volatile unsigned long *addr) -{ - return ((1UL << (nr & 31)) & (addr[nr >> 5])) != 0; -} - -static inline int variable_test_bit(int nr, const volatile unsigned long * addr) -{ - int oldbit; - - __asm__ __volatile__( - "btl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit) - :"m" (ADDR),"Ir" (nr)); - return oldbit; -} - -#define test_bit(nr,addr) \ -(__builtin_constant_p(nr) ? \ - constant_test_bit((nr),(addr)) : \ - variable_test_bit((nr),(addr))) - -#undef ADDR - /** * find_first_zero_bit - find the first zero bit in a memory region * @addr: The address to start the search at * @size: The maximum size to search * - * Returns the bit-number of the first zero bit, not the number of the byte + * Returns the bit number of the first zero bit, not the number of the byte * containing a bit. */ static inline int find_first_zero_bit(const unsigned long *addr, unsigned size) @@ -348,7 +40,7 @@ static inline int find_first_zero_bit(const unsigned long *addr, unsigned size) /** * find_next_zero_bit - find the first zero bit in a memory region * @addr: The address to base the search on - * @offset: The bitnumber to start searching at + * @offset: The bit number to start searching at * @size: The maximum size to search */ int find_next_zero_bit(const unsigned long *addr, int size, int offset); @@ -372,7 +64,7 @@ static inline unsigned long __ffs(unsigned long word) * @addr: The address to start the search at * @size: The maximum size to search * - * Returns the bit-number of the first set bit, not the number of the byte + * Returns the bit number of the first set bit, not the number of the byte * containing a bit. */ static inline unsigned find_first_bit(const unsigned long *addr, unsigned size) @@ -391,7 +83,7 @@ static inline unsigned find_first_bit(const unsigned long *addr, unsigned size) /** * find_next_bit - find the first set bit in a memory region * @addr: The address to base the search on - * @offset: The bitnumber to start searching at + * @offset: The bit number to start searching at * @size: The maximum size to search */ int find_next_bit(const unsigned long *addr, int size, int offset); @@ -460,10 +152,10 @@ static inline int fls(int x) #include <asm-generic/bitops/ext2-non-atomic.h> -#define ext2_set_bit_atomic(lock,nr,addr) \ - test_and_set_bit((nr),(unsigned long*)addr) -#define ext2_clear_bit_atomic(lock,nr, addr) \ - test_and_clear_bit((nr),(unsigned long*)addr) +#define ext2_set_bit_atomic(lock, nr, addr) \ + test_and_set_bit((nr), (unsigned long *)addr) +#define ext2_clear_bit_atomic(lock, nr, addr) \ + test_and_clear_bit((nr), (unsigned long *)addr) #include <asm-generic/bitops/minix.h> diff --git a/include/asm-x86/bitops_64.h b/include/asm-x86/bitops_64.h index 766bcc0470a6..48adbf56ca60 100644 --- a/include/asm-x86/bitops_64.h +++ b/include/asm-x86/bitops_64.h @@ -5,303 +5,6 @@ * Copyright 1992, Linus Torvalds. */ -#ifndef _LINUX_BITOPS_H -#error only <linux/bitops.h> can be included directly -#endif - -#include <asm/alternative.h> - -#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1) -/* Technically wrong, but this avoids compilation errors on some gcc - versions. */ -#define ADDR "=m" (*(volatile long *) addr) -#else -#define ADDR "+m" (*(volatile long *) addr) -#endif - -/** - * set_bit - Atomically set a bit in memory - * @nr: the bit to set - * @addr: the address to start counting from - * - * This function is atomic and may not be reordered. See __set_bit() - * if you do not require the atomic guarantees. - * Note that @nr may be almost arbitrarily large; this function is not - * restricted to acting on a single-word quantity. - */ -static inline void set_bit(int nr, volatile void *addr) -{ - __asm__ __volatile__( LOCK_PREFIX - "btsl %1,%0" - :ADDR - :"dIr" (nr) : "memory"); -} - -/** - * __set_bit - Set a bit in memory - * @nr: the bit to set - * @addr: the address to start counting from - * - * Unlike set_bit(), this function is non-atomic and may be reordered. - * If it's called on the same region of memory simultaneously, the effect - * may be that only one operation succeeds. - */ -static inline void __set_bit(int nr, volatile void *addr) -{ - __asm__ volatile( - "btsl %1,%0" - :ADDR - :"dIr" (nr) : "memory"); -} - -/** - * clear_bit - Clears a bit in memory - * @nr: Bit to clear - * @addr: Address to start counting from - * - * clear_bit() is atomic and may not be reordered. However, it does - * not contain a memory barrier, so if it is used for locking purposes, - * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() - * in order to ensure changes are visible on other processors. - */ -static inline void clear_bit(int nr, volatile void *addr) -{ - __asm__ __volatile__( LOCK_PREFIX - "btrl %1,%0" - :ADDR - :"dIr" (nr)); -} - -/* - * clear_bit_unlock - Clears a bit in memory - * @nr: Bit to clear - * @addr: Address to start counting from - * - * clear_bit() is atomic and implies release semantics before the memory - * operation. It can be used for an unlock. - */ -static inline void clear_bit_unlock(unsigned long nr, volatile unsigned long *addr) -{ - barrier(); - clear_bit(nr, addr); -} - -static inline void __clear_bit(int nr, volatile void *addr) -{ - __asm__ __volatile__( - "btrl %1,%0" - :ADDR - :"dIr" (nr)); -} - -/* - * __clear_bit_unlock - Clears a bit in memory - * @nr: Bit to clear - * @addr: Address to start counting from - * - * __clear_bit() is non-atomic and implies release semantics before the memory - * operation. It can be used for an unlock if no other CPUs can concurrently - * modify other bits in the word. - * - * No memory barrier is required here, because x86 cannot reorder stores past - * older loads. Same principle as spin_unlock. - */ -static inline void __clear_bit_unlock(unsigned long nr, volatile unsigned long *addr) -{ - barrier(); - __clear_bit(nr, addr); -} - -#define smp_mb__before_clear_bit() barrier() -#define smp_mb__after_clear_bit() barrier() - -/** - * __change_bit - Toggle a bit in memory - * @nr: the bit to change - * @addr: the address to start counting from - * - * Unlike change_bit(), this function is non-atomic and may be reordered. - * If it's called on the same region of memory simultaneously, the effect - * may be that only one operation succeeds. - */ -static inline void __change_bit(int nr, volatile void *addr) -{ - __asm__ __volatile__( - "btcl %1,%0" - :ADDR - :"dIr" (nr)); -} - -/** - * change_bit - Toggle a bit in memory - * @nr: Bit to change - * @addr: Address to start counting from - * - * change_bit() is atomic and may not be reordered. - * Note that @nr may be almost arbitrarily large; this function is not - * restricted to acting on a single-word quantity. - */ -static inline void change_bit(int nr, volatile void *addr) -{ - __asm__ __volatile__( LOCK_PREFIX - "btcl %1,%0" - :ADDR - :"dIr" (nr)); -} - -/** - * test_and_set_bit - Set a bit and return its old value - * @nr: Bit to set - * @addr: Address to count from - * - * This operation is atomic and cannot be reordered. - * It also implies a memory barrier. - */ -static inline int test_and_set_bit(int nr, volatile void *addr) -{ - int oldbit; - - __asm__ __volatile__( LOCK_PREFIX - "btsl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit),ADDR - :"dIr" (nr) : "memory"); - return oldbit; -} - -/** - * test_and_set_bit_lock - Set a bit and return its old value for lock - * @nr: Bit to set - * @addr: Address to count from - * - * This is the same as test_and_set_bit on x86. - */ -static inline int test_and_set_bit_lock(int nr, volatile void *addr) -{ - return test_and_set_bit(nr, addr); -} - -/** - * __test_and_set_bit - Set a bit and return its old value - * @nr: Bit to set - * @addr: Address to count from - * - * This operation is non-atomic and can be reordered. - * If two examples of this operation race, one can appear to succeed - * but actually fail. You must protect multiple accesses with a lock. - */ -static inline int __test_and_set_bit(int nr, volatile void *addr) -{ - int oldbit; - - __asm__( - "btsl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit),ADDR - :"dIr" (nr)); - return oldbit; -} - -/** - * test_and_clear_bit - Clear a bit and return its old value - * @nr: Bit to clear - * @addr: Address to count from - * - * This operation is atomic and cannot be reordered. - * It also implies a memory barrier. - */ -static inline int test_and_clear_bit(int nr, volatile void *addr) -{ - int oldbit; - - __asm__ __volatile__( LOCK_PREFIX - "btrl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit),ADDR - :"dIr" (nr) : "memory"); - return oldbit; -} - -/** - * __test_and_clear_bit - Clear a bit and return its old value - * @nr: Bit to clear - * @addr: Address to count from - * - * This operation is non-atomic and can be reordered. - * If two examples of this operation race, one can appear to succeed - * but actually fail. You must protect multiple accesses with a lock. - */ -static inline int __test_and_clear_bit(int nr, volatile void *addr) -{ - int oldbit; - - __asm__( - "btrl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit),ADDR - :"dIr" (nr)); - return oldbit; -} - -/* WARNING: non atomic and it can be reordered! */ -static inline int __test_and_change_bit(int nr, volatile void *addr) -{ - int oldbit; - - __asm__ __volatile__( - "btcl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit),ADDR - :"dIr" (nr) : "memory"); - return oldbit; -} - -/** - * test_and_change_bit - Change a bit and return its old value - * @nr: Bit to change - * @addr: Address to count from - * - * This operation is atomic and cannot be reordered. - * It also implies a memory barrier. - */ -static inline int test_and_change_bit(int nr, volatile void *addr) -{ - int oldbit; - - __asm__ __volatile__( LOCK_PREFIX - "btcl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit),ADDR - :"dIr" (nr) : "memory"); - return oldbit; -} - -#if 0 /* Fool kernel-doc since it doesn't do macros yet */ -/** - * test_bit - Determine whether a bit is set - * @nr: bit number to test - * @addr: Address to start counting from - */ -static int test_bit(int nr, const volatile void *addr); -#endif - -static inline int constant_test_bit(int nr, const volatile void *addr) -{ - return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0; -} - -static inline int variable_test_bit(int nr, volatile const void *addr) -{ - int oldbit; - - __asm__ __volatile__( - "btl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit) - :"m" (*(volatile long *)addr),"dIr" (nr)); - return oldbit; -} - -#define test_bit(nr,addr) \ -(__builtin_constant_p(nr) ? \ - constant_test_bit((nr),(addr)) : \ - variable_test_bit((nr),(addr))) - -#undef ADDR - extern long find_first_zero_bit(const unsigned long *addr, unsigned long size); extern long find_next_zero_bit(const unsigned long *addr, long size, long offset); extern long find_first_bit(const unsigned long *addr, unsigned long size); diff --git a/include/asm-x86/bootparam.h b/include/asm-x86/bootparam.h index 19f3ddf2df4b..51151356840f 100644 --- a/include/asm-x86/bootparam.h +++ b/include/asm-x86/bootparam.h @@ -54,13 +54,14 @@ struct sys_desc_table { }; struct efi_info { - __u32 _pad1; + __u32 efi_loader_signature; __u32 efi_systab; __u32 efi_memdesc_size; __u32 efi_memdesc_version; __u32 efi_memmap; __u32 efi_memmap_size; - __u32 _pad2[2]; + __u32 efi_systab_hi; + __u32 efi_memmap_hi; }; /* The so-called "zeropage" */ diff --git a/include/asm-x86/bug.h b/include/asm-x86/bug.h index fd8bdc639c48..8d477a201392 100644 --- a/include/asm-x86/bug.h +++ b/include/asm-x86/bug.h @@ -33,9 +33,6 @@ } while(0) #endif -void out_of_line_bug(void); -#else /* CONFIG_BUG */ -static inline void out_of_line_bug(void) { } #endif /* !CONFIG_BUG */ #include <asm-generic/bug.h> diff --git a/include/asm-x86/bugs.h b/include/asm-x86/bugs.h index aac8317420af..3fcc30dc0731 100644 --- a/include/asm-x86/bugs.h +++ b/include/asm-x86/bugs.h @@ -1,6 +1,7 @@ #ifndef _ASM_X86_BUGS_H #define _ASM_X86_BUGS_H -void check_bugs(void); +extern void check_bugs(void); +extern int ppro_with_ram_bug(void); #endif /* _ASM_X86_BUGS_H */ diff --git a/include/asm-x86/cacheflush.h b/include/asm-x86/cacheflush.h index 9411a2d3f19c..8dd8c5e3cc7f 100644 --- a/include/asm-x86/cacheflush.h +++ b/include/asm-x86/cacheflush.h @@ -24,18 +24,35 @@ #define copy_from_user_page(vma, page, vaddr, dst, src, len) \ memcpy(dst, src, len) -void global_flush_tlb(void); -int change_page_attr(struct page *page, int numpages, pgprot_t prot); -int change_page_attr_addr(unsigned long addr, int numpages, pgprot_t prot); -void clflush_cache_range(void *addr, int size); - -#ifdef CONFIG_DEBUG_PAGEALLOC -/* internal debugging function */ -void kernel_map_pages(struct page *page, int numpages, int enable); -#endif +int __deprecated_for_modules change_page_attr(struct page *page, int numpages, + pgprot_t prot); + +int set_pages_uc(struct page *page, int numpages); +int set_pages_wb(struct page *page, int numpages); +int set_pages_x(struct page *page, int numpages); +int set_pages_nx(struct page *page, int numpages); +int set_pages_ro(struct page *page, int numpages); +int set_pages_rw(struct page *page, int numpages); + +int set_memory_uc(unsigned long addr, int numpages); +int set_memory_wb(unsigned long addr, int numpages); +int set_memory_x(unsigned long addr, int numpages); +int set_memory_nx(unsigned long addr, int numpages); +int set_memory_ro(unsigned long addr, int numpages); +int set_memory_rw(unsigned long addr, int numpages); +int set_memory_np(unsigned long addr, int numpages); + +void clflush_cache_range(void *addr, unsigned int size); #ifdef CONFIG_DEBUG_RODATA void mark_rodata_ro(void); #endif +#ifdef CONFIG_DEBUG_RODATA_TEST +void rodata_test(void); +#else +static inline void rodata_test(void) +{ +} +#endif #endif diff --git a/include/asm-x86/calling.h b/include/asm-x86/calling.h index 6f4f63af96e1..f13e62e2cb3e 100644 --- a/include/asm-x86/calling.h +++ b/include/asm-x86/calling.h @@ -1,162 +1,168 @@ -/* +/* * Some macros to handle stack frames in assembly. - */ + */ +#define R15 0 +#define R14 8 +#define R13 16 +#define R12 24 +#define RBP 32 +#define RBX 40 -#define R15 0 -#define R14 8 -#define R13 16 -#define R12 24 -#define RBP 32 -#define RBX 40 /* arguments: interrupts/non tracing syscalls only save upto here*/ -#define R11 48 -#define R10 56 -#define R9 64 -#define R8 72 -#define RAX 80 -#define RCX 88 -#define RDX 96 -#define RSI 104 -#define RDI 112 -#define ORIG_RAX 120 /* + error_code */ -/* end of arguments */ +#define R11 48 +#define R10 56 +#define R9 64 +#define R8 72 +#define RAX 80 +#define RCX 88 +#define RDX 96 +#define RSI 104 +#define RDI 112 +#define ORIG_RAX 120 /* + error_code */ +/* end of arguments */ + /* cpu exception frame or undefined in case of fast syscall. */ -#define RIP 128 -#define CS 136 -#define EFLAGS 144 -#define RSP 152 -#define SS 160 -#define ARGOFFSET R11 -#define SWFRAME ORIG_RAX +#define RIP 128 +#define CS 136 +#define EFLAGS 144 +#define RSP 152 +#define SS 160 + +#define ARGOFFSET R11 +#define SWFRAME ORIG_RAX - .macro SAVE_ARGS addskip=0,norcx=0,nor891011=0 - subq $9*8+\addskip,%rsp + .macro SAVE_ARGS addskip=0, norcx=0, nor891011=0 + subq $9*8+\addskip, %rsp CFI_ADJUST_CFA_OFFSET 9*8+\addskip - movq %rdi,8*8(%rsp) - CFI_REL_OFFSET rdi,8*8 - movq %rsi,7*8(%rsp) - CFI_REL_OFFSET rsi,7*8 - movq %rdx,6*8(%rsp) - CFI_REL_OFFSET rdx,6*8 + movq %rdi, 8*8(%rsp) + CFI_REL_OFFSET rdi, 8*8 + movq %rsi, 7*8(%rsp) + CFI_REL_OFFSET rsi, 7*8 + movq %rdx, 6*8(%rsp) + CFI_REL_OFFSET rdx, 6*8 .if \norcx .else - movq %rcx,5*8(%rsp) - CFI_REL_OFFSET rcx,5*8 + movq %rcx, 5*8(%rsp) + CFI_REL_OFFSET rcx, 5*8 .endif - movq %rax,4*8(%rsp) - CFI_REL_OFFSET rax,4*8 + movq %rax, 4*8(%rsp) + CFI_REL_OFFSET rax, 4*8 .if \nor891011 .else - movq %r8,3*8(%rsp) - CFI_REL_OFFSET r8,3*8 - movq %r9,2*8(%rsp) - CFI_REL_OFFSET r9,2*8 - movq %r10,1*8(%rsp) - CFI_REL_OFFSET r10,1*8 - movq %r11,(%rsp) - CFI_REL_OFFSET r11,0*8 + movq %r8, 3*8(%rsp) + CFI_REL_OFFSET r8, 3*8 + movq %r9, 2*8(%rsp) + CFI_REL_OFFSET r9, 2*8 + movq %r10, 1*8(%rsp) + CFI_REL_OFFSET r10, 1*8 + movq %r11, (%rsp) + CFI_REL_OFFSET r11, 0*8 .endif .endm -#define ARG_SKIP 9*8 - .macro RESTORE_ARGS skiprax=0,addskip=0,skiprcx=0,skipr11=0,skipr8910=0,skiprdx=0 +#define ARG_SKIP 9*8 + + .macro RESTORE_ARGS skiprax=0, addskip=0, skiprcx=0, skipr11=0, \ + skipr8910=0, skiprdx=0 .if \skipr11 .else - movq (%rsp),%r11 + movq (%rsp), %r11 CFI_RESTORE r11 .endif .if \skipr8910 .else - movq 1*8(%rsp),%r10 + movq 1*8(%rsp), %r10 CFI_RESTORE r10 - movq 2*8(%rsp),%r9 + movq 2*8(%rsp), %r9 CFI_RESTORE r9 - movq 3*8(%rsp),%r8 + movq 3*8(%rsp), %r8 CFI_RESTORE r8 .endif .if \skiprax .else - movq 4*8(%rsp),%rax + movq 4*8(%rsp), %rax CFI_RESTORE rax .endif .if \skiprcx .else - movq 5*8(%rsp),%rcx + movq 5*8(%rsp), %rcx CFI_RESTORE rcx .endif .if \skiprdx .else - movq 6*8(%rsp),%rdx + movq 6*8(%rsp), %rdx CFI_RESTORE rdx .endif - movq 7*8(%rsp),%rsi + movq 7*8(%rsp), %rsi CFI_RESTORE rsi - movq 8*8(%rsp),%rdi + movq 8*8(%rsp), %rdi CFI_RESTORE rdi .if ARG_SKIP+\addskip > 0 - addq $ARG_SKIP+\addskip,%rsp + addq $ARG_SKIP+\addskip, %rsp CFI_ADJUST_CFA_OFFSET -(ARG_SKIP+\addskip) .endif - .endm + .endm .macro LOAD_ARGS offset - movq \offset(%rsp),%r11 - movq \offset+8(%rsp),%r10 - movq \offset+16(%rsp),%r9 - movq \offset+24(%rsp),%r8 - movq \offset+40(%rsp),%rcx - movq \offset+48(%rsp),%rdx - movq \offset+56(%rsp),%rsi - movq \offset+64(%rsp),%rdi - movq \offset+72(%rsp),%rax + movq \offset(%rsp), %r11 + movq \offset+8(%rsp), %r10 + movq \offset+16(%rsp), %r9 + movq \offset+24(%rsp), %r8 + movq \offset+40(%rsp), %rcx + movq \offset+48(%rsp), %rdx + movq \offset+56(%rsp), %rsi + movq \offset+64(%rsp), %rdi + movq \offset+72(%rsp), %rax .endm - -#define REST_SKIP 6*8 + +#define REST_SKIP 6*8 + .macro SAVE_REST - subq $REST_SKIP,%rsp + subq $REST_SKIP, %rsp CFI_ADJUST_CFA_OFFSET REST_SKIP - movq %rbx,5*8(%rsp) - CFI_REL_OFFSET rbx,5*8 - movq %rbp,4*8(%rsp) - CFI_REL_OFFSET rbp,4*8 - movq %r12,3*8(%rsp) - CFI_REL_OFFSET r12,3*8 - movq %r13,2*8(%rsp) - CFI_REL_OFFSET r13,2*8 - movq %r14,1*8(%rsp) - CFI_REL_OFFSET r14,1*8 - movq %r15,(%rsp) - CFI_REL_OFFSET r15,0*8 - .endm + movq %rbx, 5*8(%rsp) + CFI_REL_OFFSET rbx, 5*8 + movq %rbp, 4*8(%rsp) + CFI_REL_OFFSET rbp, 4*8 + movq %r12, 3*8(%rsp) + CFI_REL_OFFSET r12, 3*8 + movq %r13, 2*8(%rsp) + CFI_REL_OFFSET r13, 2*8 + movq %r14, 1*8(%rsp) + CFI_REL_OFFSET r14, 1*8 + movq %r15, (%rsp) + CFI_REL_OFFSET r15, 0*8 + .endm .macro RESTORE_REST - movq (%rsp),%r15 + movq (%rsp), %r15 CFI_RESTORE r15 - movq 1*8(%rsp),%r14 + movq 1*8(%rsp), %r14 CFI_RESTORE r14 - movq 2*8(%rsp),%r13 + movq 2*8(%rsp), %r13 CFI_RESTORE r13 - movq 3*8(%rsp),%r12 + movq 3*8(%rsp), %r12 CFI_RESTORE r12 - movq 4*8(%rsp),%rbp + movq 4*8(%rsp), %rbp CFI_RESTORE rbp - movq 5*8(%rsp),%rbx + movq 5*8(%rsp), %rbx CFI_RESTORE rbx - addq $REST_SKIP,%rsp + addq $REST_SKIP, %rsp CFI_ADJUST_CFA_OFFSET -(REST_SKIP) .endm - + .macro SAVE_ALL SAVE_ARGS SAVE_REST .endm - + .macro RESTORE_ALL addskip=0 RESTORE_REST - RESTORE_ARGS 0,\addskip + RESTORE_ARGS 0, \addskip .endm .macro icebp .byte 0xf1 .endm + diff --git a/include/asm-x86/checksum_64.h b/include/asm-x86/checksum_64.h index 419fe88a0342..e5f79997decc 100644 --- a/include/asm-x86/checksum_64.h +++ b/include/asm-x86/checksum_64.h @@ -4,7 +4,7 @@ /* * Checksums for x86-64 * Copyright 2002 by Andi Kleen, SuSE Labs - * with some code from asm-i386/checksum.h + * with some code from asm-x86/checksum.h */ #include <linux/compiler.h> diff --git a/include/asm-x86/cmpxchg_32.h b/include/asm-x86/cmpxchg_32.h index f86ede28f6dc..cea1dae288a7 100644 --- a/include/asm-x86/cmpxchg_32.h +++ b/include/asm-x86/cmpxchg_32.h @@ -105,15 +105,24 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz #ifdef CONFIG_X86_CMPXCHG #define __HAVE_ARCH_CMPXCHG 1 -#define cmpxchg(ptr,o,n)\ - ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ - (unsigned long)(n),sizeof(*(ptr)))) -#define sync_cmpxchg(ptr,o,n)\ - ((__typeof__(*(ptr)))__sync_cmpxchg((ptr),(unsigned long)(o),\ - (unsigned long)(n),sizeof(*(ptr)))) -#define cmpxchg_local(ptr,o,n)\ - ((__typeof__(*(ptr)))__cmpxchg_local((ptr),(unsigned long)(o),\ - (unsigned long)(n),sizeof(*(ptr)))) +#define cmpxchg(ptr, o, n) \ + ((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o), \ + (unsigned long)(n), sizeof(*(ptr)))) +#define sync_cmpxchg(ptr, o, n) \ + ((__typeof__(*(ptr)))__sync_cmpxchg((ptr), (unsigned long)(o), \ + (unsigned long)(n), sizeof(*(ptr)))) +#define cmpxchg_local(ptr, o, n) \ + ((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o), \ + (unsigned long)(n), sizeof(*(ptr)))) +#endif + +#ifdef CONFIG_X86_CMPXCHG64 +#define cmpxchg64(ptr, o, n) \ + ((__typeof__(*(ptr)))__cmpxchg64((ptr), (unsigned long long)(o), \ + (unsigned long long)(n))) +#define cmpxchg64_local(ptr, o, n) \ + ((__typeof__(*(ptr)))__cmpxchg64_local((ptr), (unsigned long long)(o),\ + (unsigned long long)(n))) #endif static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, @@ -203,6 +212,34 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr, return old; } +static inline unsigned long long __cmpxchg64(volatile void *ptr, + unsigned long long old, unsigned long long new) +{ + unsigned long long prev; + __asm__ __volatile__(LOCK_PREFIX "cmpxchg8b %3" + : "=A"(prev) + : "b"((unsigned long)new), + "c"((unsigned long)(new >> 32)), + "m"(*__xg(ptr)), + "0"(old) + : "memory"); + return prev; +} + +static inline unsigned long long __cmpxchg64_local(volatile void *ptr, + unsigned long long old, unsigned long long new) +{ + unsigned long long prev; + __asm__ __volatile__("cmpxchg8b %3" + : "=A"(prev) + : "b"((unsigned long)new), + "c"((unsigned long)(new >> 32)), + "m"(*__xg(ptr)), + "0"(old) + : "memory"); + return prev; +} + #ifndef CONFIG_X86_CMPXCHG /* * Building a kernel capable running on 80386. It may be necessary to @@ -228,7 +265,7 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old, return old; } -#define cmpxchg(ptr,o,n) \ +#define cmpxchg(ptr, o, n) \ ({ \ __typeof__(*(ptr)) __ret; \ if (likely(boot_cpu_data.x86 > 3)) \ @@ -239,7 +276,7 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old, (unsigned long)(n), sizeof(*(ptr))); \ __ret; \ }) -#define cmpxchg_local(ptr,o,n) \ +#define cmpxchg_local(ptr, o, n) \ ({ \ __typeof__(*(ptr)) __ret; \ if (likely(boot_cpu_data.x86 > 3)) \ @@ -252,38 +289,37 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old, }) #endif -static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long long old, - unsigned long long new) -{ - unsigned long long prev; - __asm__ __volatile__(LOCK_PREFIX "cmpxchg8b %3" - : "=A"(prev) - : "b"((unsigned long)new), - "c"((unsigned long)(new >> 32)), - "m"(*__xg(ptr)), - "0"(old) - : "memory"); - return prev; -} +#ifndef CONFIG_X86_CMPXCHG64 +/* + * Building a kernel capable running on 80386 and 80486. It may be necessary + * to simulate the cmpxchg8b on the 80386 and 80486 CPU. + */ -static inline unsigned long long __cmpxchg64_local(volatile void *ptr, - unsigned long long old, unsigned long long new) -{ - unsigned long long prev; - __asm__ __volatile__("cmpxchg8b %3" - : "=A"(prev) - : "b"((unsigned long)new), - "c"((unsigned long)(new >> 32)), - "m"(*__xg(ptr)), - "0"(old) - : "memory"); - return prev; -} +extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64); + +#define cmpxchg64(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + if (likely(boot_cpu_data.x86 > 4)) \ + __ret = __cmpxchg64((ptr), (unsigned long long)(o), \ + (unsigned long long)(n)); \ + else \ + __ret = cmpxchg_486_u64((ptr), (unsigned long long)(o), \ + (unsigned long long)(n)); \ + __ret; \ +}) +#define cmpxchg64_local(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + if (likely(boot_cpu_data.x86 > 4)) \ + __ret = __cmpxchg64_local((ptr), (unsigned long long)(o), \ + (unsigned long long)(n)); \ + else \ + __ret = cmpxchg_486_u64((ptr), (unsigned long long)(o), \ + (unsigned long long)(n)); \ + __ret; \ +}) + +#endif -#define cmpxchg64(ptr,o,n)\ - ((__typeof__(*(ptr)))__cmpxchg64((ptr),(unsigned long long)(o),\ - (unsigned long long)(n))) -#define cmpxchg64_local(ptr,o,n)\ - ((__typeof__(*(ptr)))__cmpxchg64_local((ptr),(unsigned long long)(o),\ - (unsigned long long)(n))) #endif diff --git a/include/asm-x86/compat.h b/include/asm-x86/compat.h index 66ba7987184a..b270ee04959e 100644 --- a/include/asm-x86/compat.h +++ b/include/asm-x86/compat.h @@ -207,7 +207,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) static __inline__ void __user *compat_alloc_user_space(long len) { struct pt_regs *regs = task_pt_regs(current); - return (void __user *)regs->rsp - len; + return (void __user *)regs->sp - len; } static inline int is_compat_task(void) diff --git a/include/asm-x86/cpu.h b/include/asm-x86/cpu.h index b1bc7b1b64b0..85ece5f10e9e 100644 --- a/include/asm-x86/cpu.h +++ b/include/asm-x86/cpu.h @@ -7,7 +7,7 @@ #include <linux/nodemask.h> #include <linux/percpu.h> -struct i386_cpu { +struct x86_cpu { struct cpu cpu; }; extern int arch_register_cpu(int num); diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h index b7160a4598d7..3fb7dfa7fc91 100644 --- a/include/asm-x86/cpufeature.h +++ b/include/asm-x86/cpufeature.h @@ -1,5 +1,207 @@ -#ifdef CONFIG_X86_32 -# include "cpufeature_32.h" +/* + * Defines x86 CPU feature bits + */ +#ifndef _ASM_X86_CPUFEATURE_H +#define _ASM_X86_CPUFEATURE_H + +#ifndef __ASSEMBLY__ +#include <linux/bitops.h> +#endif +#include <asm/required-features.h> + +#define NCAPINTS 8 /* N 32-bit words worth of info */ + +/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */ +#define X86_FEATURE_FPU (0*32+ 0) /* Onboard FPU */ +#define X86_FEATURE_VME (0*32+ 1) /* Virtual Mode Extensions */ +#define X86_FEATURE_DE (0*32+ 2) /* Debugging Extensions */ +#define X86_FEATURE_PSE (0*32+ 3) /* Page Size Extensions */ +#define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */ +#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers, RDMSR, WRMSR */ +#define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extensions */ +#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Architecture */ +#define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */ +#define X86_FEATURE_APIC (0*32+ 9) /* Onboard APIC */ +#define X86_FEATURE_SEP (0*32+11) /* SYSENTER/SYSEXIT */ +#define X86_FEATURE_MTRR (0*32+12) /* Memory Type Range Registers */ +#define X86_FEATURE_PGE (0*32+13) /* Page Global Enable */ +#define X86_FEATURE_MCA (0*32+14) /* Machine Check Architecture */ +#define X86_FEATURE_CMOV (0*32+15) /* CMOV instruction (FCMOVCC and FCOMI too if FPU present) */ +#define X86_FEATURE_PAT (0*32+16) /* Page Attribute Table */ +#define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */ +#define X86_FEATURE_PN (0*32+18) /* Processor serial number */ +#define X86_FEATURE_CLFLSH (0*32+19) /* Supports the CLFLUSH instruction */ +#define X86_FEATURE_DS (0*32+21) /* Debug Store */ +#define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */ +#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ +#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */ + /* of FPU context), and CR4.OSFXSR available */ +#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */ +#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */ +#define X86_FEATURE_SELFSNOOP (0*32+27) /* CPU self snoop */ +#define X86_FEATURE_HT (0*32+28) /* Hyper-Threading */ +#define X86_FEATURE_ACC (0*32+29) /* Automatic clock control */ +#define X86_FEATURE_IA64 (0*32+30) /* IA-64 processor */ + +/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ +/* Don't duplicate feature flags which are redundant with Intel! */ +#define X86_FEATURE_SYSCALL (1*32+11) /* SYSCALL/SYSRET */ +#define X86_FEATURE_MP (1*32+19) /* MP Capable. */ +#define X86_FEATURE_NX (1*32+20) /* Execute Disable */ +#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ +#define X86_FEATURE_RDTSCP (1*32+27) /* RDTSCP */ +#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */ +#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */ +#define X86_FEATURE_3DNOW (1*32+31) /* 3DNow! */ + +/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ +#define X86_FEATURE_RECOVERY (2*32+ 0) /* CPU in recovery mode */ +#define X86_FEATURE_LONGRUN (2*32+ 1) /* Longrun power control */ +#define X86_FEATURE_LRTI (2*32+ 3) /* LongRun table interface */ + +/* Other features, Linux-defined mapping, word 3 */ +/* This range is used for feature bits which conflict or are synthesized */ +#define X86_FEATURE_CXMMX (3*32+ 0) /* Cyrix MMX extensions */ +#define X86_FEATURE_K6_MTRR (3*32+ 1) /* AMD K6 nonstandard MTRRs */ +#define X86_FEATURE_CYRIX_ARR (3*32+ 2) /* Cyrix ARRs (= MTRRs) */ +#define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */ +/* cpu types for specific tunings: */ +#define X86_FEATURE_K8 (3*32+ 4) /* Opteron, Athlon64 */ +#define X86_FEATURE_K7 (3*32+ 5) /* Athlon */ +#define X86_FEATURE_P3 (3*32+ 6) /* P3 */ +#define X86_FEATURE_P4 (3*32+ 7) /* P4 */ +#define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */ +#define X86_FEATURE_UP (3*32+ 9) /* smp kernel running on up */ +#define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* FXSAVE leaks FOP/FIP/FOP */ +#define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */ +#define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */ +#define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */ +/* 14 free */ +/* 15 free */ +#define X86_FEATURE_REP_GOOD (3*32+16) /* rep microcode works well on this CPU */ +#define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* Mfence synchronizes RDTSC */ +#define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* Lfence synchronizes RDTSC */ + +/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ +#define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ +#define X86_FEATURE_MWAIT (4*32+ 3) /* Monitor/Mwait support */ +#define X86_FEATURE_DSCPL (4*32+ 4) /* CPL Qualified Debug Store */ +#define X86_FEATURE_EST (4*32+ 7) /* Enhanced SpeedStep */ +#define X86_FEATURE_TM2 (4*32+ 8) /* Thermal Monitor 2 */ +#define X86_FEATURE_CID (4*32+10) /* Context ID */ +#define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */ +#define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */ +#define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */ + +/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ +#define X86_FEATURE_XSTORE (5*32+ 2) /* on-CPU RNG present (xstore insn) */ +#define X86_FEATURE_XSTORE_EN (5*32+ 3) /* on-CPU RNG enabled */ +#define X86_FEATURE_XCRYPT (5*32+ 6) /* on-CPU crypto (xcrypt insn) */ +#define X86_FEATURE_XCRYPT_EN (5*32+ 7) /* on-CPU crypto enabled */ +#define X86_FEATURE_ACE2 (5*32+ 8) /* Advanced Cryptography Engine v2 */ +#define X86_FEATURE_ACE2_EN (5*32+ 9) /* ACE v2 enabled */ +#define X86_FEATURE_PHE (5*32+ 10) /* PadLock Hash Engine */ +#define X86_FEATURE_PHE_EN (5*32+ 11) /* PHE enabled */ +#define X86_FEATURE_PMM (5*32+ 12) /* PadLock Montgomery Multiplier */ +#define X86_FEATURE_PMM_EN (5*32+ 13) /* PMM enabled */ + +/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */ +#define X86_FEATURE_LAHF_LM (6*32+ 0) /* LAHF/SAHF in long mode */ +#define X86_FEATURE_CMP_LEGACY (6*32+ 1) /* If yes HyperThreading not valid */ + +/* + * Auxiliary flags: Linux defined - For features scattered in various + * CPUID levels like 0x6, 0xA etc + */ +#define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */ + +#define cpu_has(c, bit) \ + (__builtin_constant_p(bit) && \ + ( (((bit)>>5)==0 && (1UL<<((bit)&31) & REQUIRED_MASK0)) || \ + (((bit)>>5)==1 && (1UL<<((bit)&31) & REQUIRED_MASK1)) || \ + (((bit)>>5)==2 && (1UL<<((bit)&31) & REQUIRED_MASK2)) || \ + (((bit)>>5)==3 && (1UL<<((bit)&31) & REQUIRED_MASK3)) || \ + (((bit)>>5)==4 && (1UL<<((bit)&31) & REQUIRED_MASK4)) || \ + (((bit)>>5)==5 && (1UL<<((bit)&31) & REQUIRED_MASK5)) || \ + (((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) || \ + (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) ) \ + ? 1 : \ + test_bit(bit, (unsigned long *)((c)->x86_capability))) +#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit) + +#define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability)) +#define clear_cpu_cap(c, bit) clear_bit(bit, (unsigned long *)((c)->x86_capability)) +#define setup_clear_cpu_cap(bit) do { \ + clear_cpu_cap(&boot_cpu_data, bit); \ + set_bit(bit, cleared_cpu_caps); \ +} while (0) +#define setup_force_cpu_cap(bit) do { \ + set_cpu_cap(&boot_cpu_data, bit); \ + clear_bit(bit, cleared_cpu_caps); \ +} while (0) + +#define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU) +#define cpu_has_vme boot_cpu_has(X86_FEATURE_VME) +#define cpu_has_de boot_cpu_has(X86_FEATURE_DE) +#define cpu_has_pse boot_cpu_has(X86_FEATURE_PSE) +#define cpu_has_tsc boot_cpu_has(X86_FEATURE_TSC) +#define cpu_has_pae boot_cpu_has(X86_FEATURE_PAE) +#define cpu_has_pge boot_cpu_has(X86_FEATURE_PGE) +#define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC) +#define cpu_has_sep boot_cpu_has(X86_FEATURE_SEP) +#define cpu_has_mtrr boot_cpu_has(X86_FEATURE_MTRR) +#define cpu_has_mmx boot_cpu_has(X86_FEATURE_MMX) +#define cpu_has_fxsr boot_cpu_has(X86_FEATURE_FXSR) +#define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM) +#define cpu_has_xmm2 boot_cpu_has(X86_FEATURE_XMM2) +#define cpu_has_xmm3 boot_cpu_has(X86_FEATURE_XMM3) +#define cpu_has_ht boot_cpu_has(X86_FEATURE_HT) +#define cpu_has_mp boot_cpu_has(X86_FEATURE_MP) +#define cpu_has_nx boot_cpu_has(X86_FEATURE_NX) +#define cpu_has_k6_mtrr boot_cpu_has(X86_FEATURE_K6_MTRR) +#define cpu_has_cyrix_arr boot_cpu_has(X86_FEATURE_CYRIX_ARR) +#define cpu_has_centaur_mcr boot_cpu_has(X86_FEATURE_CENTAUR_MCR) +#define cpu_has_xstore boot_cpu_has(X86_FEATURE_XSTORE) +#define cpu_has_xstore_enabled boot_cpu_has(X86_FEATURE_XSTORE_EN) +#define cpu_has_xcrypt boot_cpu_has(X86_FEATURE_XCRYPT) +#define cpu_has_xcrypt_enabled boot_cpu_has(X86_FEATURE_XCRYPT_EN) +#define cpu_has_ace2 boot_cpu_has(X86_FEATURE_ACE2) +#define cpu_has_ace2_enabled boot_cpu_has(X86_FEATURE_ACE2_EN) +#define cpu_has_phe boot_cpu_has(X86_FEATURE_PHE) +#define cpu_has_phe_enabled boot_cpu_has(X86_FEATURE_PHE_EN) +#define cpu_has_pmm boot_cpu_has(X86_FEATURE_PMM) +#define cpu_has_pmm_enabled boot_cpu_has(X86_FEATURE_PMM_EN) +#define cpu_has_ds boot_cpu_has(X86_FEATURE_DS) +#define cpu_has_pebs boot_cpu_has(X86_FEATURE_PEBS) +#define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLSH) +#define cpu_has_bts boot_cpu_has(X86_FEATURE_BTS) + +#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) +# define cpu_has_invlpg 1 #else -# include "cpufeature_64.h" +# define cpu_has_invlpg (boot_cpu_data.x86 > 3) #endif + +#ifdef CONFIG_X86_64 + +#undef cpu_has_vme +#define cpu_has_vme 0 + +#undef cpu_has_pae +#define cpu_has_pae ___BUG___ + +#undef cpu_has_mp +#define cpu_has_mp 1 + +#undef cpu_has_k6_mtrr +#define cpu_has_k6_mtrr 0 + +#undef cpu_has_cyrix_arr +#define cpu_has_cyrix_arr 0 + +#undef cpu_has_centaur_mcr +#define cpu_has_centaur_mcr 0 + +#endif /* CONFIG_X86_64 */ + +#endif /* _ASM_X86_CPUFEATURE_H */ diff --git a/include/asm-x86/cpufeature_32.h b/include/asm-x86/cpufeature_32.h deleted file mode 100644 index f17e688dfb05..000000000000 --- a/include/asm-x86/cpufeature_32.h +++ /dev/null @@ -1,176 +0,0 @@ -/* - * cpufeature.h - * - * Defines x86 CPU feature bits - */ - -#ifndef __ASM_I386_CPUFEATURE_H -#define __ASM_I386_CPUFEATURE_H - -#ifndef __ASSEMBLY__ -#include <linux/bitops.h> -#endif -#include <asm/required-features.h> - -#define NCAPINTS 8 /* N 32-bit words worth of info */ - -/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */ -#define X86_FEATURE_FPU (0*32+ 0) /* Onboard FPU */ -#define X86_FEATURE_VME (0*32+ 1) /* Virtual Mode Extensions */ -#define X86_FEATURE_DE (0*32+ 2) /* Debugging Extensions */ -#define X86_FEATURE_PSE (0*32+ 3) /* Page Size Extensions */ -#define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */ -#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers, RDMSR, WRMSR */ -#define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extensions */ -#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Architecture */ -#define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */ -#define X86_FEATURE_APIC (0*32+ 9) /* Onboard APIC */ -#define X86_FEATURE_SEP (0*32+11) /* SYSENTER/SYSEXIT */ -#define X86_FEATURE_MTRR (0*32+12) /* Memory Type Range Registers */ -#define X86_FEATURE_PGE (0*32+13) /* Page Global Enable */ -#define X86_FEATURE_MCA (0*32+14) /* Machine Check Architecture */ -#define X86_FEATURE_CMOV (0*32+15) /* CMOV instruction (FCMOVCC and FCOMI too if FPU present) */ -#define X86_FEATURE_PAT (0*32+16) /* Page Attribute Table */ -#define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */ -#define X86_FEATURE_PN (0*32+18) /* Processor serial number */ -#define X86_FEATURE_CLFLSH (0*32+19) /* Supports the CLFLUSH instruction */ -#define X86_FEATURE_DS (0*32+21) /* Debug Store */ -#define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */ -#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ -#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */ - /* of FPU context), and CR4.OSFXSR available */ -#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */ -#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */ -#define X86_FEATURE_SELFSNOOP (0*32+27) /* CPU self snoop */ -#define X86_FEATURE_HT (0*32+28) /* Hyper-Threading */ -#define X86_FEATURE_ACC (0*32+29) /* Automatic clock control */ -#define X86_FEATURE_IA64 (0*32+30) /* IA-64 processor */ - -/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ -/* Don't duplicate feature flags which are redundant with Intel! */ -#define X86_FEATURE_SYSCALL (1*32+11) /* SYSCALL/SYSRET */ -#define X86_FEATURE_MP (1*32+19) /* MP Capable. */ -#define X86_FEATURE_NX (1*32+20) /* Execute Disable */ -#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ -#define X86_FEATURE_RDTSCP (1*32+27) /* RDTSCP */ -#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */ -#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */ -#define X86_FEATURE_3DNOW (1*32+31) /* 3DNow! */ - -/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ -#define X86_FEATURE_RECOVERY (2*32+ 0) /* CPU in recovery mode */ -#define X86_FEATURE_LONGRUN (2*32+ 1) /* Longrun power control */ -#define X86_FEATURE_LRTI (2*32+ 3) /* LongRun table interface */ - -/* Other features, Linux-defined mapping, word 3 */ -/* This range is used for feature bits which conflict or are synthesized */ -#define X86_FEATURE_CXMMX (3*32+ 0) /* Cyrix MMX extensions */ -#define X86_FEATURE_K6_MTRR (3*32+ 1) /* AMD K6 nonstandard MTRRs */ -#define X86_FEATURE_CYRIX_ARR (3*32+ 2) /* Cyrix ARRs (= MTRRs) */ -#define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */ -/* cpu types for specific tunings: */ -#define X86_FEATURE_K8 (3*32+ 4) /* Opteron, Athlon64 */ -#define X86_FEATURE_K7 (3*32+ 5) /* Athlon */ -#define X86_FEATURE_P3 (3*32+ 6) /* P3 */ -#define X86_FEATURE_P4 (3*32+ 7) /* P4 */ -#define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */ -#define X86_FEATURE_UP (3*32+ 9) /* smp kernel running on up */ -#define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* FXSAVE leaks FOP/FIP/FOP */ -#define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */ -#define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */ -#define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */ -/* 14 free */ -#define X86_FEATURE_SYNC_RDTSC (3*32+15) /* RDTSC synchronizes the CPU */ -#define X86_FEATURE_REP_GOOD (3*32+16) /* rep microcode works well on this CPU */ - -/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ -#define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ -#define X86_FEATURE_MWAIT (4*32+ 3) /* Monitor/Mwait support */ -#define X86_FEATURE_DSCPL (4*32+ 4) /* CPL Qualified Debug Store */ -#define X86_FEATURE_EST (4*32+ 7) /* Enhanced SpeedStep */ -#define X86_FEATURE_TM2 (4*32+ 8) /* Thermal Monitor 2 */ -#define X86_FEATURE_CID (4*32+10) /* Context ID */ -#define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */ -#define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */ -#define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */ - -/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ -#define X86_FEATURE_XSTORE (5*32+ 2) /* on-CPU RNG present (xstore insn) */ -#define X86_FEATURE_XSTORE_EN (5*32+ 3) /* on-CPU RNG enabled */ -#define X86_FEATURE_XCRYPT (5*32+ 6) /* on-CPU crypto (xcrypt insn) */ -#define X86_FEATURE_XCRYPT_EN (5*32+ 7) /* on-CPU crypto enabled */ -#define X86_FEATURE_ACE2 (5*32+ 8) /* Advanced Cryptography Engine v2 */ -#define X86_FEATURE_ACE2_EN (5*32+ 9) /* ACE v2 enabled */ -#define X86_FEATURE_PHE (5*32+ 10) /* PadLock Hash Engine */ -#define X86_FEATURE_PHE_EN (5*32+ 11) /* PHE enabled */ -#define X86_FEATURE_PMM (5*32+ 12) /* PadLock Montgomery Multiplier */ -#define X86_FEATURE_PMM_EN (5*32+ 13) /* PMM enabled */ - -/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */ -#define X86_FEATURE_LAHF_LM (6*32+ 0) /* LAHF/SAHF in long mode */ -#define X86_FEATURE_CMP_LEGACY (6*32+ 1) /* If yes HyperThreading not valid */ - -/* - * Auxiliary flags: Linux defined - For features scattered in various - * CPUID levels like 0x6, 0xA etc - */ -#define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */ - -#define cpu_has(c, bit) \ - (__builtin_constant_p(bit) && \ - ( (((bit)>>5)==0 && (1UL<<((bit)&31) & REQUIRED_MASK0)) || \ - (((bit)>>5)==1 && (1UL<<((bit)&31) & REQUIRED_MASK1)) || \ - (((bit)>>5)==2 && (1UL<<((bit)&31) & REQUIRED_MASK2)) || \ - (((bit)>>5)==3 && (1UL<<((bit)&31) & REQUIRED_MASK3)) || \ - (((bit)>>5)==4 && (1UL<<((bit)&31) & REQUIRED_MASK4)) || \ - (((bit)>>5)==5 && (1UL<<((bit)&31) & REQUIRED_MASK5)) || \ - (((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) || \ - (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) ) \ - ? 1 : \ - test_bit(bit, (c)->x86_capability)) -#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit) - -#define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU) -#define cpu_has_vme boot_cpu_has(X86_FEATURE_VME) -#define cpu_has_de boot_cpu_has(X86_FEATURE_DE) -#define cpu_has_pse boot_cpu_has(X86_FEATURE_PSE) -#define cpu_has_tsc boot_cpu_has(X86_FEATURE_TSC) -#define cpu_has_pae boot_cpu_has(X86_FEATURE_PAE) -#define cpu_has_pge boot_cpu_has(X86_FEATURE_PGE) -#define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC) -#define cpu_has_sep boot_cpu_has(X86_FEATURE_SEP) -#define cpu_has_mtrr boot_cpu_has(X86_FEATURE_MTRR) -#define cpu_has_mmx boot_cpu_has(X86_FEATURE_MMX) -#define cpu_has_fxsr boot_cpu_has(X86_FEATURE_FXSR) -#define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM) -#define cpu_has_xmm2 boot_cpu_has(X86_FEATURE_XMM2) -#define cpu_has_xmm3 boot_cpu_has(X86_FEATURE_XMM3) -#define cpu_has_ht boot_cpu_has(X86_FEATURE_HT) -#define cpu_has_mp boot_cpu_has(X86_FEATURE_MP) -#define cpu_has_nx boot_cpu_has(X86_FEATURE_NX) -#define cpu_has_k6_mtrr boot_cpu_has(X86_FEATURE_K6_MTRR) -#define cpu_has_cyrix_arr boot_cpu_has(X86_FEATURE_CYRIX_ARR) -#define cpu_has_centaur_mcr boot_cpu_has(X86_FEATURE_CENTAUR_MCR) -#define cpu_has_xstore boot_cpu_has(X86_FEATURE_XSTORE) -#define cpu_has_xstore_enabled boot_cpu_has(X86_FEATURE_XSTORE_EN) -#define cpu_has_xcrypt boot_cpu_has(X86_FEATURE_XCRYPT) -#define cpu_has_xcrypt_enabled boot_cpu_has(X86_FEATURE_XCRYPT_EN) -#define cpu_has_ace2 boot_cpu_has(X86_FEATURE_ACE2) -#define cpu_has_ace2_enabled boot_cpu_has(X86_FEATURE_ACE2_EN) -#define cpu_has_phe boot_cpu_has(X86_FEATURE_PHE) -#define cpu_has_phe_enabled boot_cpu_has(X86_FEATURE_PHE_EN) -#define cpu_has_pmm boot_cpu_has(X86_FEATURE_PMM) -#define cpu_has_pmm_enabled boot_cpu_has(X86_FEATURE_PMM_EN) -#define cpu_has_ds boot_cpu_has(X86_FEATURE_DS) -#define cpu_has_pebs boot_cpu_has(X86_FEATURE_PEBS) -#define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLSH) -#define cpu_has_bts boot_cpu_has(X86_FEATURE_BTS) - -#endif /* __ASM_I386_CPUFEATURE_H */ - -/* - * Local Variables: - * mode:c - * comment-column:42 - * End: - */ diff --git a/include/asm-x86/cpufeature_64.h b/include/asm-x86/cpufeature_64.h deleted file mode 100644 index e18496b7b850..000000000000 --- a/include/asm-x86/cpufeature_64.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * cpufeature_32.h - * - * Defines x86 CPU feature bits - */ - -#ifndef __ASM_X8664_CPUFEATURE_H -#define __ASM_X8664_CPUFEATURE_H - -#include "cpufeature_32.h" - -#undef cpu_has_vme -#define cpu_has_vme 0 - -#undef cpu_has_pae -#define cpu_has_pae ___BUG___ - -#undef cpu_has_mp -#define cpu_has_mp 1 /* XXX */ - -#undef cpu_has_k6_mtrr -#define cpu_has_k6_mtrr 0 - -#undef cpu_has_cyrix_arr -#define cpu_has_cyrix_arr 0 - -#undef cpu_has_centaur_mcr -#define cpu_has_centaur_mcr 0 - -#endif /* __ASM_X8664_CPUFEATURE_H */ diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h index 6065c5092265..5b6a05d3a771 100644 --- a/include/asm-x86/desc.h +++ b/include/asm-x86/desc.h @@ -1,5 +1,381 @@ +#ifndef _ASM_DESC_H_ +#define _ASM_DESC_H_ + +#ifndef __ASSEMBLY__ +#include <asm/desc_defs.h> +#include <asm/ldt.h> +#include <asm/mmu.h> +#include <linux/smp.h> + +static inline void fill_ldt(struct desc_struct *desc, + const struct user_desc *info) +{ + desc->limit0 = info->limit & 0x0ffff; + desc->base0 = info->base_addr & 0x0000ffff; + + desc->base1 = (info->base_addr & 0x00ff0000) >> 16; + desc->type = (info->read_exec_only ^ 1) << 1; + desc->type |= info->contents << 2; + desc->s = 1; + desc->dpl = 0x3; + desc->p = info->seg_not_present ^ 1; + desc->limit = (info->limit & 0xf0000) >> 16; + desc->avl = info->useable; + desc->d = info->seg_32bit; + desc->g = info->limit_in_pages; + desc->base2 = (info->base_addr & 0xff000000) >> 24; +} + +extern struct desc_ptr idt_descr; +extern gate_desc idt_table[]; + +#ifdef CONFIG_X86_64 +extern struct desc_struct cpu_gdt_table[GDT_ENTRIES]; +extern struct desc_ptr cpu_gdt_descr[]; +/* the cpu gdt accessor */ +#define get_cpu_gdt_table(x) ((struct desc_struct *)cpu_gdt_descr[x].address) + +static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func, + unsigned dpl, unsigned ist, unsigned seg) +{ + gate->offset_low = PTR_LOW(func); + gate->segment = __KERNEL_CS; + gate->ist = ist; + gate->p = 1; + gate->dpl = dpl; + gate->zero0 = 0; + gate->zero1 = 0; + gate->type = type; + gate->offset_middle = PTR_MIDDLE(func); + gate->offset_high = PTR_HIGH(func); +} + +#else +struct gdt_page { + struct desc_struct gdt[GDT_ENTRIES]; +} __attribute__((aligned(PAGE_SIZE))); +DECLARE_PER_CPU(struct gdt_page, gdt_page); + +static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) +{ + return per_cpu(gdt_page, cpu).gdt; +} + +static inline void pack_gate(gate_desc *gate, unsigned char type, + unsigned long base, unsigned dpl, unsigned flags, unsigned short seg) + +{ + gate->a = (seg << 16) | (base & 0xffff); + gate->b = (base & 0xffff0000) | + (((0x80 | type | (dpl << 5)) & 0xff) << 8); +} + +#endif + +static inline int desc_empty(const void *ptr) +{ + const u32 *desc = ptr; + return !(desc[0] | desc[1]); +} + +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else +#define load_TR_desc() native_load_tr_desc() +#define load_gdt(dtr) native_load_gdt(dtr) +#define load_idt(dtr) native_load_idt(dtr) +#define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr)) +#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt)) + +#define store_gdt(dtr) native_store_gdt(dtr) +#define store_idt(dtr) native_store_idt(dtr) +#define store_tr(tr) (tr = native_store_tr()) +#define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt)) + +#define load_TLS(t, cpu) native_load_tls(t, cpu) +#define set_ldt native_set_ldt + +#define write_ldt_entry(dt, entry, desc) \ + native_write_ldt_entry(dt, entry, desc) +#define write_gdt_entry(dt, entry, desc, type) \ + native_write_gdt_entry(dt, entry, desc, type) +#define write_idt_entry(dt, entry, g) native_write_idt_entry(dt, entry, g) +#endif + +static inline void native_write_idt_entry(gate_desc *idt, int entry, + const gate_desc *gate) +{ + memcpy(&idt[entry], gate, sizeof(*gate)); +} + +static inline void native_write_ldt_entry(struct desc_struct *ldt, int entry, + const void *desc) +{ + memcpy(&ldt[entry], desc, 8); +} + +static inline void native_write_gdt_entry(struct desc_struct *gdt, int entry, + const void *desc, int type) +{ + unsigned int size; + switch (type) { + case DESC_TSS: + size = sizeof(tss_desc); + break; + case DESC_LDT: + size = sizeof(ldt_desc); + break; + default: + size = sizeof(struct desc_struct); + break; + } + memcpy(&gdt[entry], desc, size); +} + +static inline void pack_descriptor(struct desc_struct *desc, unsigned long base, + unsigned long limit, unsigned char type, + unsigned char flags) +{ + desc->a = ((base & 0xffff) << 16) | (limit & 0xffff); + desc->b = (base & 0xff000000) | ((base & 0xff0000) >> 16) | + (limit & 0x000f0000) | ((type & 0xff) << 8) | + ((flags & 0xf) << 20); + desc->p = 1; +} + + +static inline void set_tssldt_descriptor(void *d, unsigned long addr, + unsigned type, unsigned size) +{ +#ifdef CONFIG_X86_64 + struct ldttss_desc64 *desc = d; + memset(desc, 0, sizeof(*desc)); + desc->limit0 = size & 0xFFFF; + desc->base0 = PTR_LOW(addr); + desc->base1 = PTR_MIDDLE(addr) & 0xFF; + desc->type = type; + desc->p = 1; + desc->limit1 = (size >> 16) & 0xF; + desc->base2 = (PTR_MIDDLE(addr) >> 8) & 0xFF; + desc->base3 = PTR_HIGH(addr); +#else + + pack_descriptor((struct desc_struct *)d, addr, size, 0x80 | type, 0); +#endif +} + +static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr) +{ + struct desc_struct *d = get_cpu_gdt_table(cpu); + tss_desc tss; + + /* + * sizeof(unsigned long) coming from an extra "long" at the end + * of the iobitmap. See tss_struct definition in processor.h + * + * -1? seg base+limit should be pointing to the address of the + * last valid byte + */ + set_tssldt_descriptor(&tss, (unsigned long)addr, DESC_TSS, + IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1); + write_gdt_entry(d, entry, &tss, DESC_TSS); +} + +#define set_tss_desc(cpu, addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr) + +static inline void native_set_ldt(const void *addr, unsigned int entries) +{ + if (likely(entries == 0)) + __asm__ __volatile__("lldt %w0"::"q" (0)); + else { + unsigned cpu = smp_processor_id(); + ldt_desc ldt; + + set_tssldt_descriptor(&ldt, (unsigned long)addr, + DESC_LDT, entries * sizeof(ldt) - 1); + write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, + &ldt, DESC_LDT); + __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8)); + } +} + +static inline void native_load_tr_desc(void) +{ + asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); +} + +static inline void native_load_gdt(const struct desc_ptr *dtr) +{ + asm volatile("lgdt %0"::"m" (*dtr)); +} + +static inline void native_load_idt(const struct desc_ptr *dtr) +{ + asm volatile("lidt %0"::"m" (*dtr)); +} + +static inline void native_store_gdt(struct desc_ptr *dtr) +{ + asm volatile("sgdt %0":"=m" (*dtr)); +} + +static inline void native_store_idt(struct desc_ptr *dtr) +{ + asm volatile("sidt %0":"=m" (*dtr)); +} + +static inline unsigned long native_store_tr(void) +{ + unsigned long tr; + asm volatile("str %0":"=r" (tr)); + return tr; +} + +static inline void native_load_tls(struct thread_struct *t, unsigned int cpu) +{ + unsigned int i; + struct desc_struct *gdt = get_cpu_gdt_table(cpu); + + for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) + gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]; +} + +#define _LDT_empty(info) (\ + (info)->base_addr == 0 && \ + (info)->limit == 0 && \ + (info)->contents == 0 && \ + (info)->read_exec_only == 1 && \ + (info)->seg_32bit == 0 && \ + (info)->limit_in_pages == 0 && \ + (info)->seg_not_present == 1 && \ + (info)->useable == 0) + +#ifdef CONFIG_X86_64 +#define LDT_empty(info) (_LDT_empty(info) && ((info)->lm == 0)) +#else +#define LDT_empty(info) (_LDT_empty(info)) +#endif + +static inline void clear_LDT(void) +{ + set_ldt(NULL, 0); +} + +/* + * load one particular LDT into the current CPU + */ +static inline void load_LDT_nolock(mm_context_t *pc) +{ + set_ldt(pc->ldt, pc->size); +} + +static inline void load_LDT(mm_context_t *pc) +{ + preempt_disable(); + load_LDT_nolock(pc); + preempt_enable(); +} + +static inline unsigned long get_desc_base(const struct desc_struct *desc) +{ + return desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24); +} + +static inline unsigned long get_desc_limit(const struct desc_struct *desc) +{ + return desc->limit0 | (desc->limit << 16); +} + +static inline void _set_gate(int gate, unsigned type, void *addr, + unsigned dpl, unsigned ist, unsigned seg) +{ + gate_desc s; + pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg); + /* + * does not need to be atomic because it is only done once at + * setup time + */ + write_idt_entry(idt_table, gate, &s); +} + +/* + * This needs to use 'idt_table' rather than 'idt', and + * thus use the _nonmapped_ version of the IDT, as the + * Pentium F0 0F bugfix can have resulted in the mapped + * IDT being write-protected. + */ +static inline void set_intr_gate(unsigned int n, void *addr) +{ + BUG_ON((unsigned)n > 0xFF); + _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS); +} + +/* + * This routine sets up an interrupt gate at directory privilege level 3. + */ +static inline void set_system_intr_gate(unsigned int n, void *addr) +{ + BUG_ON((unsigned)n > 0xFF); + _set_gate(n, GATE_INTERRUPT, addr, 0x3, 0, __KERNEL_CS); +} + +static inline void set_trap_gate(unsigned int n, void *addr) +{ + BUG_ON((unsigned)n > 0xFF); + _set_gate(n, GATE_TRAP, addr, 0, 0, __KERNEL_CS); +} + +static inline void set_system_gate(unsigned int n, void *addr) +{ + BUG_ON((unsigned)n > 0xFF); #ifdef CONFIG_X86_32 -# include "desc_32.h" + _set_gate(n, GATE_TRAP, addr, 0x3, 0, __KERNEL_CS); +#else + _set_gate(n, GATE_INTERRUPT, addr, 0x3, 0, __KERNEL_CS); +#endif +} + +static inline void set_task_gate(unsigned int n, unsigned int gdt_entry) +{ + BUG_ON((unsigned)n > 0xFF); + _set_gate(n, GATE_TASK, (void *)0, 0, 0, (gdt_entry<<3)); +} + +static inline void set_intr_gate_ist(int n, void *addr, unsigned ist) +{ + BUG_ON((unsigned)n > 0xFF); + _set_gate(n, GATE_INTERRUPT, addr, 0, ist, __KERNEL_CS); +} + +static inline void set_system_gate_ist(int n, void *addr, unsigned ist) +{ + BUG_ON((unsigned)n > 0xFF); + _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS); +} + #else -# include "desc_64.h" +/* + * GET_DESC_BASE reads the descriptor base of the specified segment. + * + * Args: + * idx - descriptor index + * gdt - GDT pointer + * base - 32bit register to which the base will be written + * lo_w - lo word of the "base" register + * lo_b - lo byte of the "base" register + * hi_b - hi byte of the low word of the "base" register + * + * Example: + * GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah) + * Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax. + */ +#define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \ + movb idx*8+4(gdt), lo_b; \ + movb idx*8+7(gdt), hi_b; \ + shll $16, base; \ + movw idx*8+2(gdt), lo_w; + + +#endif /* __ASSEMBLY__ */ + #endif diff --git a/include/asm-x86/desc_32.h b/include/asm-x86/desc_32.h deleted file mode 100644 index c547403f341d..000000000000 --- a/include/asm-x86/desc_32.h +++ /dev/null @@ -1,244 +0,0 @@ -#ifndef __ARCH_DESC_H -#define __ARCH_DESC_H - -#include <asm/ldt.h> -#include <asm/segment.h> - -#ifndef __ASSEMBLY__ - -#include <linux/preempt.h> -#include <linux/smp.h> -#include <linux/percpu.h> - -#include <asm/mmu.h> - -struct Xgt_desc_struct { - unsigned short size; - unsigned long address __attribute__((packed)); - unsigned short pad; -} __attribute__ ((packed)); - -struct gdt_page -{ - struct desc_struct gdt[GDT_ENTRIES]; -} __attribute__((aligned(PAGE_SIZE))); -DECLARE_PER_CPU(struct gdt_page, gdt_page); - -static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) -{ - return per_cpu(gdt_page, cpu).gdt; -} - -extern struct Xgt_desc_struct idt_descr; -extern struct desc_struct idt_table[]; -extern void set_intr_gate(unsigned int irq, void * addr); - -static inline void pack_descriptor(__u32 *a, __u32 *b, - unsigned long base, unsigned long limit, unsigned char type, unsigned char flags) -{ - *a = ((base & 0xffff) << 16) | (limit & 0xffff); - *b = (base & 0xff000000) | ((base & 0xff0000) >> 16) | - (limit & 0x000f0000) | ((type & 0xff) << 8) | ((flags & 0xf) << 20); -} - -static inline void pack_gate(__u32 *a, __u32 *b, - unsigned long base, unsigned short seg, unsigned char type, unsigned char flags) -{ - *a = (seg << 16) | (base & 0xffff); - *b = (base & 0xffff0000) | ((type & 0xff) << 8) | (flags & 0xff); -} - -#define DESCTYPE_LDT 0x82 /* present, system, DPL-0, LDT */ -#define DESCTYPE_TSS 0x89 /* present, system, DPL-0, 32-bit TSS */ -#define DESCTYPE_TASK 0x85 /* present, system, DPL-0, task gate */ -#define DESCTYPE_INT 0x8e /* present, system, DPL-0, interrupt gate */ -#define DESCTYPE_TRAP 0x8f /* present, system, DPL-0, trap gate */ -#define DESCTYPE_DPL3 0x60 /* DPL-3 */ -#define DESCTYPE_S 0x10 /* !system */ - -#ifdef CONFIG_PARAVIRT -#include <asm/paravirt.h> -#else -#define load_TR_desc() native_load_tr_desc() -#define load_gdt(dtr) native_load_gdt(dtr) -#define load_idt(dtr) native_load_idt(dtr) -#define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr)) -#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt)) - -#define store_gdt(dtr) native_store_gdt(dtr) -#define store_idt(dtr) native_store_idt(dtr) -#define store_tr(tr) (tr = native_store_tr()) -#define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt)) - -#define load_TLS(t, cpu) native_load_tls(t, cpu) -#define set_ldt native_set_ldt - -#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) -#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) -#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) -#endif - -static inline void write_dt_entry(struct desc_struct *dt, - int entry, u32 entry_low, u32 entry_high) -{ - dt[entry].a = entry_low; - dt[entry].b = entry_high; -} - -static inline void native_set_ldt(const void *addr, unsigned int entries) -{ - if (likely(entries == 0)) - __asm__ __volatile__("lldt %w0"::"q" (0)); - else { - unsigned cpu = smp_processor_id(); - __u32 a, b; - - pack_descriptor(&a, &b, (unsigned long)addr, - entries * sizeof(struct desc_struct) - 1, - DESCTYPE_LDT, 0); - write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b); - __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8)); - } -} - - -static inline void native_load_tr_desc(void) -{ - asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); -} - -static inline void native_load_gdt(const struct Xgt_desc_struct *dtr) -{ - asm volatile("lgdt %0"::"m" (*dtr)); -} - -static inline void native_load_idt(const struct Xgt_desc_struct *dtr) -{ - asm volatile("lidt %0"::"m" (*dtr)); -} - -static inline void native_store_gdt(struct Xgt_desc_struct *dtr) -{ - asm ("sgdt %0":"=m" (*dtr)); -} - -static inline void native_store_idt(struct Xgt_desc_struct *dtr) -{ - asm ("sidt %0":"=m" (*dtr)); -} - -static inline unsigned long native_store_tr(void) -{ - unsigned long tr; - asm ("str %0":"=r" (tr)); - return tr; -} - -static inline void native_load_tls(struct thread_struct *t, unsigned int cpu) -{ - unsigned int i; - struct desc_struct *gdt = get_cpu_gdt_table(cpu); - - for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) - gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]; -} - -static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg) -{ - __u32 a, b; - pack_gate(&a, &b, (unsigned long)addr, seg, type, 0); - write_idt_entry(idt_table, gate, a, b); -} - -static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const void *addr) -{ - __u32 a, b; - pack_descriptor(&a, &b, (unsigned long)addr, - offsetof(struct tss_struct, __cacheline_filler) - 1, - DESCTYPE_TSS, 0); - write_gdt_entry(get_cpu_gdt_table(cpu), entry, a, b); -} - - -#define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr) - -#define LDT_entry_a(info) \ - ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff)) - -#define LDT_entry_b(info) \ - (((info)->base_addr & 0xff000000) | \ - (((info)->base_addr & 0x00ff0000) >> 16) | \ - ((info)->limit & 0xf0000) | \ - (((info)->read_exec_only ^ 1) << 9) | \ - ((info)->contents << 10) | \ - (((info)->seg_not_present ^ 1) << 15) | \ - ((info)->seg_32bit << 22) | \ - ((info)->limit_in_pages << 23) | \ - ((info)->useable << 20) | \ - 0x7000) - -#define LDT_empty(info) (\ - (info)->base_addr == 0 && \ - (info)->limit == 0 && \ - (info)->contents == 0 && \ - (info)->read_exec_only == 1 && \ - (info)->seg_32bit == 0 && \ - (info)->limit_in_pages == 0 && \ - (info)->seg_not_present == 1 && \ - (info)->useable == 0 ) - -static inline void clear_LDT(void) -{ - set_ldt(NULL, 0); -} - -/* - * load one particular LDT into the current CPU - */ -static inline void load_LDT_nolock(mm_context_t *pc) -{ - set_ldt(pc->ldt, pc->size); -} - -static inline void load_LDT(mm_context_t *pc) -{ - preempt_disable(); - load_LDT_nolock(pc); - preempt_enable(); -} - -static inline unsigned long get_desc_base(unsigned long *desc) -{ - unsigned long base; - base = ((desc[0] >> 16) & 0x0000ffff) | - ((desc[1] << 16) & 0x00ff0000) | - (desc[1] & 0xff000000); - return base; -} - -#else /* __ASSEMBLY__ */ - -/* - * GET_DESC_BASE reads the descriptor base of the specified segment. - * - * Args: - * idx - descriptor index - * gdt - GDT pointer - * base - 32bit register to which the base will be written - * lo_w - lo word of the "base" register - * lo_b - lo byte of the "base" register - * hi_b - hi byte of the low word of the "base" register - * - * Example: - * GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah) - * Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax. - */ -#define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \ - movb idx*8+4(gdt), lo_b; \ - movb idx*8+7(gdt), hi_b; \ - shll $16, base; \ - movw idx*8+2(gdt), lo_w; - -#endif /* !__ASSEMBLY__ */ - -#endif diff --git a/include/asm-x86/desc_64.h b/include/asm-x86/desc_64.h index 7d9c938e69fd..8b137891791f 100644 --- a/include/asm-x86/desc_64.h +++ b/include/asm-x86/desc_64.h @@ -1,204 +1 @@ -/* Written 2000 by Andi Kleen */ -#ifndef __ARCH_DESC_H -#define __ARCH_DESC_H -#include <linux/threads.h> -#include <asm/ldt.h> - -#ifndef __ASSEMBLY__ - -#include <linux/string.h> -#include <linux/smp.h> -#include <asm/desc_defs.h> - -#include <asm/segment.h> -#include <asm/mmu.h> - -extern struct desc_struct cpu_gdt_table[GDT_ENTRIES]; - -#define load_TR_desc() asm volatile("ltr %w0"::"r" (GDT_ENTRY_TSS*8)) -#define load_LDT_desc() asm volatile("lldt %w0"::"r" (GDT_ENTRY_LDT*8)) -#define clear_LDT() asm volatile("lldt %w0"::"r" (0)) - -static inline unsigned long __store_tr(void) -{ - unsigned long tr; - - asm volatile ("str %w0":"=r" (tr)); - return tr; -} - -#define store_tr(tr) (tr) = __store_tr() - -/* - * This is the ldt that every process will get unless we need - * something other than this. - */ -extern struct desc_struct default_ldt[]; -extern struct gate_struct idt_table[]; -extern struct desc_ptr cpu_gdt_descr[]; - -/* the cpu gdt accessor */ -#define cpu_gdt(_cpu) ((struct desc_struct *)cpu_gdt_descr[_cpu].address) - -static inline void load_gdt(const struct desc_ptr *ptr) -{ - asm volatile("lgdt %w0"::"m" (*ptr)); -} - -static inline void store_gdt(struct desc_ptr *ptr) -{ - asm("sgdt %w0":"=m" (*ptr)); -} - -static inline void _set_gate(void *adr, unsigned type, unsigned long func, unsigned dpl, unsigned ist) -{ - struct gate_struct s; - s.offset_low = PTR_LOW(func); - s.segment = __KERNEL_CS; - s.ist = ist; - s.p = 1; - s.dpl = dpl; - s.zero0 = 0; - s.zero1 = 0; - s.type = type; - s.offset_middle = PTR_MIDDLE(func); - s.offset_high = PTR_HIGH(func); - /* does not need to be atomic because it is only done once at setup time */ - memcpy(adr, &s, 16); -} - -static inline void set_intr_gate(int nr, void *func) -{ - BUG_ON((unsigned)nr > 0xFF); - _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, 0); -} - -static inline void set_intr_gate_ist(int nr, void *func, unsigned ist) -{ - BUG_ON((unsigned)nr > 0xFF); - _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, ist); -} - -static inline void set_system_gate(int nr, void *func) -{ - BUG_ON((unsigned)nr > 0xFF); - _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, 0); -} - -static inline void set_system_gate_ist(int nr, void *func, unsigned ist) -{ - _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, ist); -} - -static inline void load_idt(const struct desc_ptr *ptr) -{ - asm volatile("lidt %w0"::"m" (*ptr)); -} - -static inline void store_idt(struct desc_ptr *dtr) -{ - asm("sidt %w0":"=m" (*dtr)); -} - -static inline void set_tssldt_descriptor(void *ptr, unsigned long tss, unsigned type, - unsigned size) -{ - struct ldttss_desc d; - memset(&d,0,sizeof(d)); - d.limit0 = size & 0xFFFF; - d.base0 = PTR_LOW(tss); - d.base1 = PTR_MIDDLE(tss) & 0xFF; - d.type = type; - d.p = 1; - d.limit1 = (size >> 16) & 0xF; - d.base2 = (PTR_MIDDLE(tss) >> 8) & 0xFF; - d.base3 = PTR_HIGH(tss); - memcpy(ptr, &d, 16); -} - -static inline void set_tss_desc(unsigned cpu, void *addr) -{ - /* - * sizeof(unsigned long) coming from an extra "long" at the end - * of the iobitmap. See tss_struct definition in processor.h - * - * -1? seg base+limit should be pointing to the address of the - * last valid byte - */ - set_tssldt_descriptor(&cpu_gdt(cpu)[GDT_ENTRY_TSS], - (unsigned long)addr, DESC_TSS, - IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1); -} - -static inline void set_ldt_desc(unsigned cpu, void *addr, int size) -{ - set_tssldt_descriptor(&cpu_gdt(cpu)[GDT_ENTRY_LDT], (unsigned long)addr, - DESC_LDT, size * 8 - 1); -} - -#define LDT_entry_a(info) \ - ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff)) -/* Don't allow setting of the lm bit. It is useless anyways because - 64bit system calls require __USER_CS. */ -#define LDT_entry_b(info) \ - (((info)->base_addr & 0xff000000) | \ - (((info)->base_addr & 0x00ff0000) >> 16) | \ - ((info)->limit & 0xf0000) | \ - (((info)->read_exec_only ^ 1) << 9) | \ - ((info)->contents << 10) | \ - (((info)->seg_not_present ^ 1) << 15) | \ - ((info)->seg_32bit << 22) | \ - ((info)->limit_in_pages << 23) | \ - ((info)->useable << 20) | \ - /* ((info)->lm << 21) | */ \ - 0x7000) - -#define LDT_empty(info) (\ - (info)->base_addr == 0 && \ - (info)->limit == 0 && \ - (info)->contents == 0 && \ - (info)->read_exec_only == 1 && \ - (info)->seg_32bit == 0 && \ - (info)->limit_in_pages == 0 && \ - (info)->seg_not_present == 1 && \ - (info)->useable == 0 && \ - (info)->lm == 0) - -static inline void load_TLS(struct thread_struct *t, unsigned int cpu) -{ - unsigned int i; - u64 *gdt = (u64 *)(cpu_gdt(cpu) + GDT_ENTRY_TLS_MIN); - - for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) - gdt[i] = t->tls_array[i]; -} - -/* - * load one particular LDT into the current CPU - */ -static inline void load_LDT_nolock (mm_context_t *pc, int cpu) -{ - int count = pc->size; - - if (likely(!count)) { - clear_LDT(); - return; - } - - set_ldt_desc(cpu, pc->ldt, count); - load_LDT_desc(); -} - -static inline void load_LDT(mm_context_t *pc) -{ - int cpu = get_cpu(); - load_LDT_nolock(pc, cpu); - put_cpu(); -} - -extern struct desc_ptr idt_descr; - -#endif /* !__ASSEMBLY__ */ - -#endif diff --git a/include/asm-x86/desc_defs.h b/include/asm-x86/desc_defs.h index 089004070099..e33f078b3e54 100644 --- a/include/asm-x86/desc_defs.h +++ b/include/asm-x86/desc_defs.h @@ -11,26 +11,36 @@ #include <linux/types.h> +/* + * FIXME: Acessing the desc_struct through its fields is more elegant, + * and should be the one valid thing to do. However, a lot of open code + * still touches the a and b acessors, and doing this allow us to do it + * incrementally. We keep the signature as a struct, rather than an union, + * so we can get rid of it transparently in the future -- glommer + */ // 8 byte segment descriptor struct desc_struct { - u16 limit0; - u16 base0; - unsigned base1 : 8, type : 4, s : 1, dpl : 2, p : 1; - unsigned limit : 4, avl : 1, l : 1, d : 1, g : 1, base2 : 8; -} __attribute__((packed)); + union { + struct { unsigned int a, b; }; + struct { + u16 limit0; + u16 base0; + unsigned base1: 8, type: 4, s: 1, dpl: 2, p: 1; + unsigned limit: 4, avl: 1, l: 1, d: 1, g: 1, base2: 8; + }; -struct n_desc_struct { - unsigned int a,b; -}; + }; +} __attribute__((packed)); enum { GATE_INTERRUPT = 0xE, GATE_TRAP = 0xF, GATE_CALL = 0xC, + GATE_TASK = 0x5, }; // 16byte gate -struct gate_struct { +struct gate_struct64 { u16 offset_low; u16 segment; unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1; @@ -39,17 +49,18 @@ struct gate_struct { u32 zero1; } __attribute__((packed)); -#define PTR_LOW(x) ((unsigned long)(x) & 0xFFFF) -#define PTR_MIDDLE(x) (((unsigned long)(x) >> 16) & 0xFFFF) -#define PTR_HIGH(x) ((unsigned long)(x) >> 32) +#define PTR_LOW(x) ((unsigned long long)(x) & 0xFFFF) +#define PTR_MIDDLE(x) (((unsigned long long)(x) >> 16) & 0xFFFF) +#define PTR_HIGH(x) ((unsigned long long)(x) >> 32) enum { DESC_TSS = 0x9, DESC_LDT = 0x2, + DESCTYPE_S = 0x10, /* !system */ }; // LDT or TSS descriptor in the GDT. 16 bytes. -struct ldttss_desc { +struct ldttss_desc64 { u16 limit0; u16 base0; unsigned base1 : 8, type : 5, dpl : 2, p : 1; @@ -58,6 +69,16 @@ struct ldttss_desc { u32 zero1; } __attribute__((packed)); +#ifdef CONFIG_X86_64 +typedef struct gate_struct64 gate_desc; +typedef struct ldttss_desc64 ldt_desc; +typedef struct ldttss_desc64 tss_desc; +#else +typedef struct desc_struct gate_desc; +typedef struct desc_struct ldt_desc; +typedef struct desc_struct tss_desc; +#endif + struct desc_ptr { unsigned short size; unsigned long address; diff --git a/include/asm-x86/dma.h b/include/asm-x86/dma.h index 9f936c61a4e5..e9733ce89880 100644 --- a/include/asm-x86/dma.h +++ b/include/asm-x86/dma.h @@ -1,5 +1,319 @@ +/* + * linux/include/asm/dma.h: Defines for using and allocating dma channels. + * Written by Hennus Bergman, 1992. + * High DMA channel support & info by Hannu Savolainen + * and John Boyd, Nov. 1992. + */ + +#ifndef _ASM_X86_DMA_H +#define _ASM_X86_DMA_H + +#include <linux/spinlock.h> /* And spinlocks */ +#include <asm/io.h> /* need byte IO */ +#include <linux/delay.h> + + +#ifdef HAVE_REALLY_SLOW_DMA_CONTROLLER +#define dma_outb outb_p +#else +#define dma_outb outb +#endif + +#define dma_inb inb + +/* + * NOTES about DMA transfers: + * + * controller 1: channels 0-3, byte operations, ports 00-1F + * controller 2: channels 4-7, word operations, ports C0-DF + * + * - ALL registers are 8 bits only, regardless of transfer size + * - channel 4 is not used - cascades 1 into 2. + * - channels 0-3 are byte - addresses/counts are for physical bytes + * - channels 5-7 are word - addresses/counts are for physical words + * - transfers must not cross physical 64K (0-3) or 128K (5-7) boundaries + * - transfer count loaded to registers is 1 less than actual count + * - controller 2 offsets are all even (2x offsets for controller 1) + * - page registers for 5-7 don't use data bit 0, represent 128K pages + * - page registers for 0-3 use bit 0, represent 64K pages + * + * DMA transfers are limited to the lower 16MB of _physical_ memory. + * Note that addresses loaded into registers must be _physical_ addresses, + * not logical addresses (which may differ if paging is active). + * + * Address mapping for channels 0-3: + * + * A23 ... A16 A15 ... A8 A7 ... A0 (Physical addresses) + * | ... | | ... | | ... | + * | ... | | ... | | ... | + * | ... | | ... | | ... | + * P7 ... P0 A7 ... A0 A7 ... A0 + * | Page | Addr MSB | Addr LSB | (DMA registers) + * + * Address mapping for channels 5-7: + * + * A23 ... A17 A16 A15 ... A9 A8 A7 ... A1 A0 (Physical addresses) + * | ... | \ \ ... \ \ \ ... \ \ + * | ... | \ \ ... \ \ \ ... \ (not used) + * | ... | \ \ ... \ \ \ ... \ + * P7 ... P1 (0) A7 A6 ... A0 A7 A6 ... A0 + * | Page | Addr MSB | Addr LSB | (DMA registers) + * + * Again, channels 5-7 transfer _physical_ words (16 bits), so addresses + * and counts _must_ be word-aligned (the lowest address bit is _ignored_ at + * the hardware level, so odd-byte transfers aren't possible). + * + * Transfer count (_not # bytes_) is limited to 64K, represented as actual + * count - 1 : 64K => 0xFFFF, 1 => 0x0000. Thus, count is always 1 or more, + * and up to 128K bytes may be transferred on channels 5-7 in one operation. + * + */ + +#define MAX_DMA_CHANNELS 8 + #ifdef CONFIG_X86_32 -# include "dma_32.h" + +/* The maximum address that we can perform a DMA transfer to on this platform */ +#define MAX_DMA_ADDRESS (PAGE_OFFSET+0x1000000) + +#else + +/* 16MB ISA DMA zone */ +#define MAX_DMA_PFN ((16*1024*1024) >> PAGE_SHIFT) + +/* 4GB broken PCI/AGP hardware bus master zone */ +#define MAX_DMA32_PFN ((4UL*1024*1024*1024) >> PAGE_SHIFT) + +/* Compat define for old dma zone */ +#define MAX_DMA_ADDRESS ((unsigned long)__va(MAX_DMA_PFN << PAGE_SHIFT)) + +#endif + +/* 8237 DMA controllers */ +#define IO_DMA1_BASE 0x00 /* 8 bit slave DMA, channels 0..3 */ +#define IO_DMA2_BASE 0xC0 /* 16 bit master DMA, ch 4(=slave input)..7 */ + +/* DMA controller registers */ +#define DMA1_CMD_REG 0x08 /* command register (w) */ +#define DMA1_STAT_REG 0x08 /* status register (r) */ +#define DMA1_REQ_REG 0x09 /* request register (w) */ +#define DMA1_MASK_REG 0x0A /* single-channel mask (w) */ +#define DMA1_MODE_REG 0x0B /* mode register (w) */ +#define DMA1_CLEAR_FF_REG 0x0C /* clear pointer flip-flop (w) */ +#define DMA1_TEMP_REG 0x0D /* Temporary Register (r) */ +#define DMA1_RESET_REG 0x0D /* Master Clear (w) */ +#define DMA1_CLR_MASK_REG 0x0E /* Clear Mask */ +#define DMA1_MASK_ALL_REG 0x0F /* all-channels mask (w) */ + +#define DMA2_CMD_REG 0xD0 /* command register (w) */ +#define DMA2_STAT_REG 0xD0 /* status register (r) */ +#define DMA2_REQ_REG 0xD2 /* request register (w) */ +#define DMA2_MASK_REG 0xD4 /* single-channel mask (w) */ +#define DMA2_MODE_REG 0xD6 /* mode register (w) */ +#define DMA2_CLEAR_FF_REG 0xD8 /* clear pointer flip-flop (w) */ +#define DMA2_TEMP_REG 0xDA /* Temporary Register (r) */ +#define DMA2_RESET_REG 0xDA /* Master Clear (w) */ +#define DMA2_CLR_MASK_REG 0xDC /* Clear Mask */ +#define DMA2_MASK_ALL_REG 0xDE /* all-channels mask (w) */ + +#define DMA_ADDR_0 0x00 /* DMA address registers */ +#define DMA_ADDR_1 0x02 +#define DMA_ADDR_2 0x04 +#define DMA_ADDR_3 0x06 +#define DMA_ADDR_4 0xC0 +#define DMA_ADDR_5 0xC4 +#define DMA_ADDR_6 0xC8 +#define DMA_ADDR_7 0xCC + +#define DMA_CNT_0 0x01 /* DMA count registers */ +#define DMA_CNT_1 0x03 +#define DMA_CNT_2 0x05 +#define DMA_CNT_3 0x07 +#define DMA_CNT_4 0xC2 +#define DMA_CNT_5 0xC6 +#define DMA_CNT_6 0xCA +#define DMA_CNT_7 0xCE + +#define DMA_PAGE_0 0x87 /* DMA page registers */ +#define DMA_PAGE_1 0x83 +#define DMA_PAGE_2 0x81 +#define DMA_PAGE_3 0x82 +#define DMA_PAGE_5 0x8B +#define DMA_PAGE_6 0x89 +#define DMA_PAGE_7 0x8A + +/* I/O to memory, no autoinit, increment, single mode */ +#define DMA_MODE_READ 0x44 +/* memory to I/O, no autoinit, increment, single mode */ +#define DMA_MODE_WRITE 0x48 +/* pass thru DREQ->HRQ, DACK<-HLDA only */ +#define DMA_MODE_CASCADE 0xC0 + +#define DMA_AUTOINIT 0x10 + + +extern spinlock_t dma_spin_lock; + +static __inline__ unsigned long claim_dma_lock(void) +{ + unsigned long flags; + spin_lock_irqsave(&dma_spin_lock, flags); + return flags; +} + +static __inline__ void release_dma_lock(unsigned long flags) +{ + spin_unlock_irqrestore(&dma_spin_lock, flags); +} + +/* enable/disable a specific DMA channel */ +static __inline__ void enable_dma(unsigned int dmanr) +{ + if (dmanr <= 3) + dma_outb(dmanr, DMA1_MASK_REG); + else + dma_outb(dmanr & 3, DMA2_MASK_REG); +} + +static __inline__ void disable_dma(unsigned int dmanr) +{ + if (dmanr <= 3) + dma_outb(dmanr | 4, DMA1_MASK_REG); + else + dma_outb((dmanr & 3) | 4, DMA2_MASK_REG); +} + +/* Clear the 'DMA Pointer Flip Flop'. + * Write 0 for LSB/MSB, 1 for MSB/LSB access. + * Use this once to initialize the FF to a known state. + * After that, keep track of it. :-) + * --- In order to do that, the DMA routines below should --- + * --- only be used while holding the DMA lock ! --- + */ +static __inline__ void clear_dma_ff(unsigned int dmanr) +{ + if (dmanr <= 3) + dma_outb(0, DMA1_CLEAR_FF_REG); + else + dma_outb(0, DMA2_CLEAR_FF_REG); +} + +/* set mode (above) for a specific DMA channel */ +static __inline__ void set_dma_mode(unsigned int dmanr, char mode) +{ + if (dmanr <= 3) + dma_outb(mode | dmanr, DMA1_MODE_REG); + else + dma_outb(mode | (dmanr & 3), DMA2_MODE_REG); +} + +/* Set only the page register bits of the transfer address. + * This is used for successive transfers when we know the contents of + * the lower 16 bits of the DMA current address register, but a 64k boundary + * may have been crossed. + */ +static __inline__ void set_dma_page(unsigned int dmanr, char pagenr) +{ + switch (dmanr) { + case 0: + dma_outb(pagenr, DMA_PAGE_0); + break; + case 1: + dma_outb(pagenr, DMA_PAGE_1); + break; + case 2: + dma_outb(pagenr, DMA_PAGE_2); + break; + case 3: + dma_outb(pagenr, DMA_PAGE_3); + break; + case 5: + dma_outb(pagenr & 0xfe, DMA_PAGE_5); + break; + case 6: + dma_outb(pagenr & 0xfe, DMA_PAGE_6); + break; + case 7: + dma_outb(pagenr & 0xfe, DMA_PAGE_7); + break; + } +} + + +/* Set transfer address & page bits for specific DMA channel. + * Assumes dma flipflop is clear. + */ +static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int a) +{ + set_dma_page(dmanr, a>>16); + if (dmanr <= 3) { + dma_outb(a & 0xff, ((dmanr & 3) << 1) + IO_DMA1_BASE); + dma_outb((a >> 8) & 0xff, ((dmanr & 3) << 1) + IO_DMA1_BASE); + } else { + dma_outb((a >> 1) & 0xff, ((dmanr & 3) << 2) + IO_DMA2_BASE); + dma_outb((a >> 9) & 0xff, ((dmanr & 3) << 2) + IO_DMA2_BASE); + } +} + + +/* Set transfer size (max 64k for DMA0..3, 128k for DMA5..7) for + * a specific DMA channel. + * You must ensure the parameters are valid. + * NOTE: from a manual: "the number of transfers is one more + * than the initial word count"! This is taken into account. + * Assumes dma flip-flop is clear. + * NOTE 2: "count" represents _bytes_ and must be even for channels 5-7. + */ +static __inline__ void set_dma_count(unsigned int dmanr, unsigned int count) +{ + count--; + if (dmanr <= 3) { + dma_outb(count & 0xff, ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE); + dma_outb((count >> 8) & 0xff, + ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE); + } else { + dma_outb((count >> 1) & 0xff, + ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE); + dma_outb((count >> 9) & 0xff, + ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE); + } +} + + +/* Get DMA residue count. After a DMA transfer, this + * should return zero. Reading this while a DMA transfer is + * still in progress will return unpredictable results. + * If called before the channel has been used, it may return 1. + * Otherwise, it returns the number of _bytes_ left to transfer. + * + * Assumes DMA flip-flop is clear. + */ +static __inline__ int get_dma_residue(unsigned int dmanr) +{ + unsigned int io_port; + /* using short to get 16-bit wrap around */ + unsigned short count; + + io_port = (dmanr <= 3) ? ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE + : ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE; + + count = 1 + dma_inb(io_port); + count += dma_inb(io_port) << 8; + + return (dmanr <= 3) ? count : (count << 1); +} + + +/* These are in kernel/dma.c: */ +extern int request_dma(unsigned int dmanr, const char *device_id); +extern void free_dma(unsigned int dmanr); + +/* From PCI */ + +#ifdef CONFIG_PCI +extern int isa_dma_bridge_buggy; #else -# include "dma_64.h" +#define isa_dma_bridge_buggy (0) #endif + +#endif /* _ASM_X86_DMA_H */ diff --git a/include/asm-x86/dma_32.h b/include/asm-x86/dma_32.h deleted file mode 100644 index d23aac8e1a50..000000000000 --- a/include/asm-x86/dma_32.h +++ /dev/null @@ -1,297 +0,0 @@ -/* $Id: dma.h,v 1.7 1992/12/14 00:29:34 root Exp root $ - * linux/include/asm/dma.h: Defines for using and allocating dma channels. - * Written by Hennus Bergman, 1992. - * High DMA channel support & info by Hannu Savolainen - * and John Boyd, Nov. 1992. - */ - -#ifndef _ASM_DMA_H -#define _ASM_DMA_H - -#include <linux/spinlock.h> /* And spinlocks */ -#include <asm/io.h> /* need byte IO */ -#include <linux/delay.h> - - -#ifdef HAVE_REALLY_SLOW_DMA_CONTROLLER -#define dma_outb outb_p -#else -#define dma_outb outb -#endif - -#define dma_inb inb - -/* - * NOTES about DMA transfers: - * - * controller 1: channels 0-3, byte operations, ports 00-1F - * controller 2: channels 4-7, word operations, ports C0-DF - * - * - ALL registers are 8 bits only, regardless of transfer size - * - channel 4 is not used - cascades 1 into 2. - * - channels 0-3 are byte - addresses/counts are for physical bytes - * - channels 5-7 are word - addresses/counts are for physical words - * - transfers must not cross physical 64K (0-3) or 128K (5-7) boundaries - * - transfer count loaded to registers is 1 less than actual count - * - controller 2 offsets are all even (2x offsets for controller 1) - * - page registers for 5-7 don't use data bit 0, represent 128K pages - * - page registers for 0-3 use bit 0, represent 64K pages - * - * DMA transfers are limited to the lower 16MB of _physical_ memory. - * Note that addresses loaded into registers must be _physical_ addresses, - * not logical addresses (which may differ if paging is active). - * - * Address mapping for channels 0-3: - * - * A23 ... A16 A15 ... A8 A7 ... A0 (Physical addresses) - * | ... | | ... | | ... | - * | ... | | ... | | ... | - * | ... | | ... | | ... | - * P7 ... P0 A7 ... A0 A7 ... A0 - * | Page | Addr MSB | Addr LSB | (DMA registers) - * - * Address mapping for channels 5-7: - * - * A23 ... A17 A16 A15 ... A9 A8 A7 ... A1 A0 (Physical addresses) - * | ... | \ \ ... \ \ \ ... \ \ - * | ... | \ \ ... \ \ \ ... \ (not used) - * | ... | \ \ ... \ \ \ ... \ - * P7 ... P1 (0) A7 A6 ... A0 A7 A6 ... A0 - * | Page | Addr MSB | Addr LSB | (DMA registers) - * - * Again, channels 5-7 transfer _physical_ words (16 bits), so addresses - * and counts _must_ be word-aligned (the lowest address bit is _ignored_ at - * the hardware level, so odd-byte transfers aren't possible). - * - * Transfer count (_not # bytes_) is limited to 64K, represented as actual - * count - 1 : 64K => 0xFFFF, 1 => 0x0000. Thus, count is always 1 or more, - * and up to 128K bytes may be transferred on channels 5-7 in one operation. - * - */ - -#define MAX_DMA_CHANNELS 8 - -/* The maximum address that we can perform a DMA transfer to on this platform */ -#define MAX_DMA_ADDRESS (PAGE_OFFSET+0x1000000) - -/* 8237 DMA controllers */ -#define IO_DMA1_BASE 0x00 /* 8 bit slave DMA, channels 0..3 */ -#define IO_DMA2_BASE 0xC0 /* 16 bit master DMA, ch 4(=slave input)..7 */ - -/* DMA controller registers */ -#define DMA1_CMD_REG 0x08 /* command register (w) */ -#define DMA1_STAT_REG 0x08 /* status register (r) */ -#define DMA1_REQ_REG 0x09 /* request register (w) */ -#define DMA1_MASK_REG 0x0A /* single-channel mask (w) */ -#define DMA1_MODE_REG 0x0B /* mode register (w) */ -#define DMA1_CLEAR_FF_REG 0x0C /* clear pointer flip-flop (w) */ -#define DMA1_TEMP_REG 0x0D /* Temporary Register (r) */ -#define DMA1_RESET_REG 0x0D /* Master Clear (w) */ -#define DMA1_CLR_MASK_REG 0x0E /* Clear Mask */ -#define DMA1_MASK_ALL_REG 0x0F /* all-channels mask (w) */ - -#define DMA2_CMD_REG 0xD0 /* command register (w) */ -#define DMA2_STAT_REG 0xD0 /* status register (r) */ -#define DMA2_REQ_REG 0xD2 /* request register (w) */ -#define DMA2_MASK_REG 0xD4 /* single-channel mask (w) */ -#define DMA2_MODE_REG 0xD6 /* mode register (w) */ -#define DMA2_CLEAR_FF_REG 0xD8 /* clear pointer flip-flop (w) */ -#define DMA2_TEMP_REG 0xDA /* Temporary Register (r) */ -#define DMA2_RESET_REG 0xDA /* Master Clear (w) */ -#define DMA2_CLR_MASK_REG 0xDC /* Clear Mask */ -#define DMA2_MASK_ALL_REG 0xDE /* all-channels mask (w) */ - -#define DMA_ADDR_0 0x00 /* DMA address registers */ -#define DMA_ADDR_1 0x02 -#define DMA_ADDR_2 0x04 -#define DMA_ADDR_3 0x06 -#define DMA_ADDR_4 0xC0 -#define DMA_ADDR_5 0xC4 -#define DMA_ADDR_6 0xC8 -#define DMA_ADDR_7 0xCC - -#define DMA_CNT_0 0x01 /* DMA count registers */ -#define DMA_CNT_1 0x03 -#define DMA_CNT_2 0x05 -#define DMA_CNT_3 0x07 -#define DMA_CNT_4 0xC2 -#define DMA_CNT_5 0xC6 -#define DMA_CNT_6 0xCA -#define DMA_CNT_7 0xCE - -#define DMA_PAGE_0 0x87 /* DMA page registers */ -#define DMA_PAGE_1 0x83 -#define DMA_PAGE_2 0x81 -#define DMA_PAGE_3 0x82 -#define DMA_PAGE_5 0x8B -#define DMA_PAGE_6 0x89 -#define DMA_PAGE_7 0x8A - -#define DMA_MODE_READ 0x44 /* I/O to memory, no autoinit, increment, single mode */ -#define DMA_MODE_WRITE 0x48 /* memory to I/O, no autoinit, increment, single mode */ -#define DMA_MODE_CASCADE 0xC0 /* pass thru DREQ->HRQ, DACK<-HLDA only */ - -#define DMA_AUTOINIT 0x10 - - -extern spinlock_t dma_spin_lock; - -static __inline__ unsigned long claim_dma_lock(void) -{ - unsigned long flags; - spin_lock_irqsave(&dma_spin_lock, flags); - return flags; -} - -static __inline__ void release_dma_lock(unsigned long flags) -{ - spin_unlock_irqrestore(&dma_spin_lock, flags); -} - -/* enable/disable a specific DMA channel */ -static __inline__ void enable_dma(unsigned int dmanr) -{ - if (dmanr<=3) - dma_outb(dmanr, DMA1_MASK_REG); - else - dma_outb(dmanr & 3, DMA2_MASK_REG); -} - -static __inline__ void disable_dma(unsigned int dmanr) -{ - if (dmanr<=3) - dma_outb(dmanr | 4, DMA1_MASK_REG); - else - dma_outb((dmanr & 3) | 4, DMA2_MASK_REG); -} - -/* Clear the 'DMA Pointer Flip Flop'. - * Write 0 for LSB/MSB, 1 for MSB/LSB access. - * Use this once to initialize the FF to a known state. - * After that, keep track of it. :-) - * --- In order to do that, the DMA routines below should --- - * --- only be used while holding the DMA lock ! --- - */ -static __inline__ void clear_dma_ff(unsigned int dmanr) -{ - if (dmanr<=3) - dma_outb(0, DMA1_CLEAR_FF_REG); - else - dma_outb(0, DMA2_CLEAR_FF_REG); -} - -/* set mode (above) for a specific DMA channel */ -static __inline__ void set_dma_mode(unsigned int dmanr, char mode) -{ - if (dmanr<=3) - dma_outb(mode | dmanr, DMA1_MODE_REG); - else - dma_outb(mode | (dmanr&3), DMA2_MODE_REG); -} - -/* Set only the page register bits of the transfer address. - * This is used for successive transfers when we know the contents of - * the lower 16 bits of the DMA current address register, but a 64k boundary - * may have been crossed. - */ -static __inline__ void set_dma_page(unsigned int dmanr, char pagenr) -{ - switch(dmanr) { - case 0: - dma_outb(pagenr, DMA_PAGE_0); - break; - case 1: - dma_outb(pagenr, DMA_PAGE_1); - break; - case 2: - dma_outb(pagenr, DMA_PAGE_2); - break; - case 3: - dma_outb(pagenr, DMA_PAGE_3); - break; - case 5: - dma_outb(pagenr & 0xfe, DMA_PAGE_5); - break; - case 6: - dma_outb(pagenr & 0xfe, DMA_PAGE_6); - break; - case 7: - dma_outb(pagenr & 0xfe, DMA_PAGE_7); - break; - } -} - - -/* Set transfer address & page bits for specific DMA channel. - * Assumes dma flipflop is clear. - */ -static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int a) -{ - set_dma_page(dmanr, a>>16); - if (dmanr <= 3) { - dma_outb( a & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE ); - dma_outb( (a>>8) & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE ); - } else { - dma_outb( (a>>1) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE ); - dma_outb( (a>>9) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE ); - } -} - - -/* Set transfer size (max 64k for DMA0..3, 128k for DMA5..7) for - * a specific DMA channel. - * You must ensure the parameters are valid. - * NOTE: from a manual: "the number of transfers is one more - * than the initial word count"! This is taken into account. - * Assumes dma flip-flop is clear. - * NOTE 2: "count" represents _bytes_ and must be even for channels 5-7. - */ -static __inline__ void set_dma_count(unsigned int dmanr, unsigned int count) -{ - count--; - if (dmanr <= 3) { - dma_outb( count & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE ); - dma_outb( (count>>8) & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE ); - } else { - dma_outb( (count>>1) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE ); - dma_outb( (count>>9) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE ); - } -} - - -/* Get DMA residue count. After a DMA transfer, this - * should return zero. Reading this while a DMA transfer is - * still in progress will return unpredictable results. - * If called before the channel has been used, it may return 1. - * Otherwise, it returns the number of _bytes_ left to transfer. - * - * Assumes DMA flip-flop is clear. - */ -static __inline__ int get_dma_residue(unsigned int dmanr) -{ - unsigned int io_port = (dmanr<=3)? ((dmanr&3)<<1) + 1 + IO_DMA1_BASE - : ((dmanr&3)<<2) + 2 + IO_DMA2_BASE; - - /* using short to get 16-bit wrap around */ - unsigned short count; - - count = 1 + dma_inb(io_port); - count += dma_inb(io_port) << 8; - - return (dmanr<=3)? count : (count<<1); -} - - -/* These are in kernel/dma.c: */ -extern int request_dma(unsigned int dmanr, const char * device_id); /* reserve a DMA channel */ -extern void free_dma(unsigned int dmanr); /* release it again */ - -/* From PCI */ - -#ifdef CONFIG_PCI -extern int isa_dma_bridge_buggy; -#else -#define isa_dma_bridge_buggy (0) -#endif - -#endif /* _ASM_DMA_H */ diff --git a/include/asm-x86/dma_64.h b/include/asm-x86/dma_64.h deleted file mode 100644 index a37c16f06289..000000000000 --- a/include/asm-x86/dma_64.h +++ /dev/null @@ -1,304 +0,0 @@ -/* - * linux/include/asm/dma.h: Defines for using and allocating dma channels. - * Written by Hennus Bergman, 1992. - * High DMA channel support & info by Hannu Savolainen - * and John Boyd, Nov. 1992. - */ - -#ifndef _ASM_DMA_H -#define _ASM_DMA_H - -#include <linux/spinlock.h> /* And spinlocks */ -#include <asm/io.h> /* need byte IO */ -#include <linux/delay.h> - - -#ifdef HAVE_REALLY_SLOW_DMA_CONTROLLER -#define dma_outb outb_p -#else -#define dma_outb outb -#endif - -#define dma_inb inb - -/* - * NOTES about DMA transfers: - * - * controller 1: channels 0-3, byte operations, ports 00-1F - * controller 2: channels 4-7, word operations, ports C0-DF - * - * - ALL registers are 8 bits only, regardless of transfer size - * - channel 4 is not used - cascades 1 into 2. - * - channels 0-3 are byte - addresses/counts are for physical bytes - * - channels 5-7 are word - addresses/counts are for physical words - * - transfers must not cross physical 64K (0-3) or 128K (5-7) boundaries - * - transfer count loaded to registers is 1 less than actual count - * - controller 2 offsets are all even (2x offsets for controller 1) - * - page registers for 5-7 don't use data bit 0, represent 128K pages - * - page registers for 0-3 use bit 0, represent 64K pages - * - * DMA transfers are limited to the lower 16MB of _physical_ memory. - * Note that addresses loaded into registers must be _physical_ addresses, - * not logical addresses (which may differ if paging is active). - * - * Address mapping for channels 0-3: - * - * A23 ... A16 A15 ... A8 A7 ... A0 (Physical addresses) - * | ... | | ... | | ... | - * | ... | | ... | | ... | - * | ... | | ... | | ... | - * P7 ... P0 A7 ... A0 A7 ... A0 - * | Page | Addr MSB | Addr LSB | (DMA registers) - * - * Address mapping for channels 5-7: - * - * A23 ... A17 A16 A15 ... A9 A8 A7 ... A1 A0 (Physical addresses) - * | ... | \ \ ... \ \ \ ... \ \ - * | ... | \ \ ... \ \ \ ... \ (not used) - * | ... | \ \ ... \ \ \ ... \ - * P7 ... P1 (0) A7 A6 ... A0 A7 A6 ... A0 - * | Page | Addr MSB | Addr LSB | (DMA registers) - * - * Again, channels 5-7 transfer _physical_ words (16 bits), so addresses - * and counts _must_ be word-aligned (the lowest address bit is _ignored_ at - * the hardware level, so odd-byte transfers aren't possible). - * - * Transfer count (_not # bytes_) is limited to 64K, represented as actual - * count - 1 : 64K => 0xFFFF, 1 => 0x0000. Thus, count is always 1 or more, - * and up to 128K bytes may be transferred on channels 5-7 in one operation. - * - */ - -#define MAX_DMA_CHANNELS 8 - - -/* 16MB ISA DMA zone */ -#define MAX_DMA_PFN ((16*1024*1024) >> PAGE_SHIFT) - -/* 4GB broken PCI/AGP hardware bus master zone */ -#define MAX_DMA32_PFN ((4UL*1024*1024*1024) >> PAGE_SHIFT) - -/* Compat define for old dma zone */ -#define MAX_DMA_ADDRESS ((unsigned long)__va(MAX_DMA_PFN << PAGE_SHIFT)) - -/* 8237 DMA controllers */ -#define IO_DMA1_BASE 0x00 /* 8 bit slave DMA, channels 0..3 */ -#define IO_DMA2_BASE 0xC0 /* 16 bit master DMA, ch 4(=slave input)..7 */ - -/* DMA controller registers */ -#define DMA1_CMD_REG 0x08 /* command register (w) */ -#define DMA1_STAT_REG 0x08 /* status register (r) */ -#define DMA1_REQ_REG 0x09 /* request register (w) */ -#define DMA1_MASK_REG 0x0A /* single-channel mask (w) */ -#define DMA1_MODE_REG 0x0B /* mode register (w) */ -#define DMA1_CLEAR_FF_REG 0x0C /* clear pointer flip-flop (w) */ -#define DMA1_TEMP_REG 0x0D /* Temporary Register (r) */ -#define DMA1_RESET_REG 0x0D /* Master Clear (w) */ -#define DMA1_CLR_MASK_REG 0x0E /* Clear Mask */ -#define DMA1_MASK_ALL_REG 0x0F /* all-channels mask (w) */ - -#define DMA2_CMD_REG 0xD0 /* command register (w) */ -#define DMA2_STAT_REG 0xD0 /* status register (r) */ -#define DMA2_REQ_REG 0xD2 /* request register (w) */ -#define DMA2_MASK_REG 0xD4 /* single-channel mask (w) */ -#define DMA2_MODE_REG 0xD6 /* mode register (w) */ -#define DMA2_CLEAR_FF_REG 0xD8 /* clear pointer flip-flop (w) */ -#define DMA2_TEMP_REG 0xDA /* Temporary Register (r) */ -#define DMA2_RESET_REG 0xDA /* Master Clear (w) */ -#define DMA2_CLR_MASK_REG 0xDC /* Clear Mask */ -#define DMA2_MASK_ALL_REG 0xDE /* all-channels mask (w) */ - -#define DMA_ADDR_0 0x00 /* DMA address registers */ -#define DMA_ADDR_1 0x02 -#define DMA_ADDR_2 0x04 -#define DMA_ADDR_3 0x06 -#define DMA_ADDR_4 0xC0 -#define DMA_ADDR_5 0xC4 -#define DMA_ADDR_6 0xC8 -#define DMA_ADDR_7 0xCC - -#define DMA_CNT_0 0x01 /* DMA count registers */ -#define DMA_CNT_1 0x03 -#define DMA_CNT_2 0x05 -#define DMA_CNT_3 0x07 -#define DMA_CNT_4 0xC2 -#define DMA_CNT_5 0xC6 -#define DMA_CNT_6 0xCA -#define DMA_CNT_7 0xCE - -#define DMA_PAGE_0 0x87 /* DMA page registers */ -#define DMA_PAGE_1 0x83 -#define DMA_PAGE_2 0x81 -#define DMA_PAGE_3 0x82 -#define DMA_PAGE_5 0x8B -#define DMA_PAGE_6 0x89 -#define DMA_PAGE_7 0x8A - -#define DMA_MODE_READ 0x44 /* I/O to memory, no autoinit, increment, single mode */ -#define DMA_MODE_WRITE 0x48 /* memory to I/O, no autoinit, increment, single mode */ -#define DMA_MODE_CASCADE 0xC0 /* pass thru DREQ->HRQ, DACK<-HLDA only */ - -#define DMA_AUTOINIT 0x10 - - -extern spinlock_t dma_spin_lock; - -static __inline__ unsigned long claim_dma_lock(void) -{ - unsigned long flags; - spin_lock_irqsave(&dma_spin_lock, flags); - return flags; -} - -static __inline__ void release_dma_lock(unsigned long flags) -{ - spin_unlock_irqrestore(&dma_spin_lock, flags); -} - -/* enable/disable a specific DMA channel */ -static __inline__ void enable_dma(unsigned int dmanr) -{ - if (dmanr<=3) - dma_outb(dmanr, DMA1_MASK_REG); - else - dma_outb(dmanr & 3, DMA2_MASK_REG); -} - -static __inline__ void disable_dma(unsigned int dmanr) -{ - if (dmanr<=3) - dma_outb(dmanr | 4, DMA1_MASK_REG); - else - dma_outb((dmanr & 3) | 4, DMA2_MASK_REG); -} - -/* Clear the 'DMA Pointer Flip Flop'. - * Write 0 for LSB/MSB, 1 for MSB/LSB access. - * Use this once to initialize the FF to a known state. - * After that, keep track of it. :-) - * --- In order to do that, the DMA routines below should --- - * --- only be used while holding the DMA lock ! --- - */ -static __inline__ void clear_dma_ff(unsigned int dmanr) -{ - if (dmanr<=3) - dma_outb(0, DMA1_CLEAR_FF_REG); - else - dma_outb(0, DMA2_CLEAR_FF_REG); -} - -/* set mode (above) for a specific DMA channel */ -static __inline__ void set_dma_mode(unsigned int dmanr, char mode) -{ - if (dmanr<=3) - dma_outb(mode | dmanr, DMA1_MODE_REG); - else - dma_outb(mode | (dmanr&3), DMA2_MODE_REG); -} - -/* Set only the page register bits of the transfer address. - * This is used for successive transfers when we know the contents of - * the lower 16 bits of the DMA current address register, but a 64k boundary - * may have been crossed. - */ -static __inline__ void set_dma_page(unsigned int dmanr, char pagenr) -{ - switch(dmanr) { - case 0: - dma_outb(pagenr, DMA_PAGE_0); - break; - case 1: - dma_outb(pagenr, DMA_PAGE_1); - break; - case 2: - dma_outb(pagenr, DMA_PAGE_2); - break; - case 3: - dma_outb(pagenr, DMA_PAGE_3); - break; - case 5: - dma_outb(pagenr & 0xfe, DMA_PAGE_5); - break; - case 6: - dma_outb(pagenr & 0xfe, DMA_PAGE_6); - break; - case 7: - dma_outb(pagenr & 0xfe, DMA_PAGE_7); - break; - } -} - - -/* Set transfer address & page bits for specific DMA channel. - * Assumes dma flipflop is clear. - */ -static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int a) -{ - set_dma_page(dmanr, a>>16); - if (dmanr <= 3) { - dma_outb( a & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE ); - dma_outb( (a>>8) & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE ); - } else { - dma_outb( (a>>1) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE ); - dma_outb( (a>>9) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE ); - } -} - - -/* Set transfer size (max 64k for DMA1..3, 128k for DMA5..7) for - * a specific DMA channel. - * You must ensure the parameters are valid. - * NOTE: from a manual: "the number of transfers is one more - * than the initial word count"! This is taken into account. - * Assumes dma flip-flop is clear. - * NOTE 2: "count" represents _bytes_ and must be even for channels 5-7. - */ -static __inline__ void set_dma_count(unsigned int dmanr, unsigned int count) -{ - count--; - if (dmanr <= 3) { - dma_outb( count & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE ); - dma_outb( (count>>8) & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE ); - } else { - dma_outb( (count>>1) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE ); - dma_outb( (count>>9) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE ); - } -} - - -/* Get DMA residue count. After a DMA transfer, this - * should return zero. Reading this while a DMA transfer is - * still in progress will return unpredictable results. - * If called before the channel has been used, it may return 1. - * Otherwise, it returns the number of _bytes_ left to transfer. - * - * Assumes DMA flip-flop is clear. - */ -static __inline__ int get_dma_residue(unsigned int dmanr) -{ - unsigned int io_port = (dmanr<=3)? ((dmanr&3)<<1) + 1 + IO_DMA1_BASE - : ((dmanr&3)<<2) + 2 + IO_DMA2_BASE; - - /* using short to get 16-bit wrap around */ - unsigned short count; - - count = 1 + dma_inb(io_port); - count += dma_inb(io_port) << 8; - - return (dmanr<=3)? count : (count<<1); -} - - -/* These are in kernel/dma.c: */ -extern int request_dma(unsigned int dmanr, const char * device_id); /* reserve a DMA channel */ -extern void free_dma(unsigned int dmanr); /* release it again */ - -/* From PCI */ - -#ifdef CONFIG_PCI -extern int isa_dma_bridge_buggy; -#else -#define isa_dma_bridge_buggy (0) -#endif - -#endif /* _ASM_DMA_H */ diff --git a/include/asm-x86/dmi.h b/include/asm-x86/dmi.h index 8e2b0e6aa8e7..1241e6ad1935 100644 --- a/include/asm-x86/dmi.h +++ b/include/asm-x86/dmi.h @@ -5,9 +5,6 @@ #ifdef CONFIG_X86_32 -/* Use early IO mappings for DMI because it's initialized early */ -#define dmi_ioremap bt_ioremap -#define dmi_iounmap bt_iounmap #define dmi_alloc alloc_bootmem #else /* CONFIG_X86_32 */ @@ -22,14 +19,15 @@ extern char dmi_alloc_data[DMI_MAX_DATA]; static inline void *dmi_alloc(unsigned len) { int idx = dmi_alloc_index; - if ((dmi_alloc_index += len) > DMI_MAX_DATA) + if ((dmi_alloc_index + len) > DMI_MAX_DATA) return NULL; + dmi_alloc_index += len; return dmi_alloc_data + idx; } +#endif + #define dmi_ioremap early_ioremap #define dmi_iounmap early_iounmap #endif - -#endif diff --git a/include/asm-x86/ds.h b/include/asm-x86/ds.h new file mode 100644 index 000000000000..7881368142fa --- /dev/null +++ b/include/asm-x86/ds.h @@ -0,0 +1,72 @@ +/* + * Debug Store (DS) support + * + * This provides a low-level interface to the hardware's Debug Store + * feature that is used for last branch recording (LBR) and + * precise-event based sampling (PEBS). + * + * Different architectures use a different DS layout/pointer size. + * The below functions therefore work on a void*. + * + * + * Since there is no user for PEBS, yet, only LBR (or branch + * trace store, BTS) is supported. + * + * + * Copyright (C) 2007 Intel Corporation. + * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007 + */ + +#ifndef _ASM_X86_DS_H +#define _ASM_X86_DS_H + +#include <linux/types.h> +#include <linux/init.h> + +struct cpuinfo_x86; + + +/* a branch trace record entry + * + * In order to unify the interface between various processor versions, + * we use the below data structure for all processors. + */ +enum bts_qualifier { + BTS_INVALID = 0, + BTS_BRANCH, + BTS_TASK_ARRIVES, + BTS_TASK_DEPARTS +}; + +struct bts_struct { + u64 qualifier; + union { + /* BTS_BRANCH */ + struct { + u64 from_ip; + u64 to_ip; + } lbr; + /* BTS_TASK_ARRIVES or + BTS_TASK_DEPARTS */ + u64 jiffies; + } variant; +}; + +/* Overflow handling mechanisms */ +#define DS_O_SIGNAL 1 /* send overflow signal */ +#define DS_O_WRAP 2 /* wrap around */ + +extern int ds_allocate(void **, size_t); +extern int ds_free(void **); +extern int ds_get_bts_size(void *); +extern int ds_get_bts_end(void *); +extern int ds_get_bts_index(void *); +extern int ds_set_overflow(void *, int); +extern int ds_get_overflow(void *); +extern int ds_clear(void *); +extern int ds_read_bts(void *, int, struct bts_struct *); +extern int ds_write_bts(void *, const struct bts_struct *); +extern unsigned long ds_debugctl_mask(void); +extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *c); + +#endif /* _ASM_X86_DS_H */ diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h index 3e214f39fad3..7004251fc66b 100644 --- a/include/asm-x86/e820.h +++ b/include/asm-x86/e820.h @@ -22,6 +22,12 @@ struct e820map { }; #endif /* __ASSEMBLY__ */ +#define ISA_START_ADDRESS 0xa0000 +#define ISA_END_ADDRESS 0x100000 + +#define BIOS_BEGIN 0x000a0000 +#define BIOS_END 0x00100000 + #ifdef __KERNEL__ #ifdef CONFIG_X86_32 # include "e820_32.h" diff --git a/include/asm-x86/e820_32.h b/include/asm-x86/e820_32.h index 03f60c690c8a..f1da7ebd1905 100644 --- a/include/asm-x86/e820_32.h +++ b/include/asm-x86/e820_32.h @@ -12,20 +12,28 @@ #ifndef __E820_HEADER #define __E820_HEADER +#include <linux/ioport.h> + #define HIGH_MEMORY (1024*1024) #ifndef __ASSEMBLY__ extern struct e820map e820; +extern void update_e820(void); extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type); extern int e820_any_mapped(u64 start, u64 end, unsigned type); extern void find_max_pfn(void); extern void register_bootmem_low_pages(unsigned long max_low_pfn); +extern void add_memory_region(unsigned long long start, + unsigned long long size, int type); extern void e820_register_memory(void); extern void limit_regions(unsigned long long size); extern void print_memory_map(char *who); +extern void init_iomem_resources(struct resource *code_resource, + struct resource *data_resource, + struct resource *bss_resource); #if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION) extern void e820_mark_nosave_regions(void); @@ -35,5 +43,6 @@ static inline void e820_mark_nosave_regions(void) } #endif + #endif/*!__ASSEMBLY__*/ #endif/*__E820_HEADER*/ diff --git a/include/asm-x86/e820_64.h b/include/asm-x86/e820_64.h index 0bd4787a5d57..51e4170f9ca5 100644 --- a/include/asm-x86/e820_64.h +++ b/include/asm-x86/e820_64.h @@ -11,6 +11,8 @@ #ifndef __E820_HEADER #define __E820_HEADER +#include <linux/ioport.h> + #ifndef __ASSEMBLY__ extern unsigned long find_e820_area(unsigned long start, unsigned long end, unsigned size); @@ -19,11 +21,15 @@ extern void add_memory_region(unsigned long start, unsigned long size, extern void setup_memory_region(void); extern void contig_e820_setup(void); extern unsigned long e820_end_of_ram(void); -extern void e820_reserve_resources(void); +extern void e820_reserve_resources(struct resource *code_resource, + struct resource *data_resource, struct resource *bss_resource); extern void e820_mark_nosave_regions(void); -extern void e820_print_map(char *who); extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type); extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type); +extern int e820_any_non_reserved(unsigned long start, unsigned long end); +extern int is_memory_any_valid(unsigned long start, unsigned long end); +extern int e820_all_non_reserved(unsigned long start, unsigned long end); +extern int is_memory_all_valid(unsigned long start, unsigned long end); extern unsigned long e820_hole_size(unsigned long start, unsigned long end); extern void e820_setup_gap(void); @@ -33,9 +39,11 @@ extern void e820_register_active_regions(int nid, extern void finish_e820_parsing(void); extern struct e820map e820; +extern void update_e820(void); + +extern void reserve_early(unsigned long start, unsigned long end); +extern void early_res_to_bootmem(void); -extern unsigned ebda_addr, ebda_size; -extern unsigned long nodemap_addr, nodemap_size; #endif/*!__ASSEMBLY__*/ #endif/*__E820_HEADER*/ diff --git a/include/asm-x86/efi.h b/include/asm-x86/efi.h new file mode 100644 index 000000000000..9c68a1f098d8 --- /dev/null +++ b/include/asm-x86/efi.h @@ -0,0 +1,97 @@ +#ifndef _ASM_X86_EFI_H +#define _ASM_X86_EFI_H + +#ifdef CONFIG_X86_32 + +extern unsigned long asmlinkage efi_call_phys(void *, ...); + +#define efi_call_phys0(f) efi_call_phys(f) +#define efi_call_phys1(f, a1) efi_call_phys(f, a1) +#define efi_call_phys2(f, a1, a2) efi_call_phys(f, a1, a2) +#define efi_call_phys3(f, a1, a2, a3) efi_call_phys(f, a1, a2, a3) +#define efi_call_phys4(f, a1, a2, a3, a4) \ + efi_call_phys(f, a1, a2, a3, a4) +#define efi_call_phys5(f, a1, a2, a3, a4, a5) \ + efi_call_phys(f, a1, a2, a3, a4, a5) +#define efi_call_phys6(f, a1, a2, a3, a4, a5, a6) \ + efi_call_phys(f, a1, a2, a3, a4, a5, a6) +/* + * Wrap all the virtual calls in a way that forces the parameters on the stack. + */ + +#define efi_call_virt(f, args...) \ + ((efi_##f##_t __attribute__((regparm(0)))*)efi.systab->runtime->f)(args) + +#define efi_call_virt0(f) efi_call_virt(f) +#define efi_call_virt1(f, a1) efi_call_virt(f, a1) +#define efi_call_virt2(f, a1, a2) efi_call_virt(f, a1, a2) +#define efi_call_virt3(f, a1, a2, a3) efi_call_virt(f, a1, a2, a3) +#define efi_call_virt4(f, a1, a2, a3, a4) \ + efi_call_virt(f, a1, a2, a3, a4) +#define efi_call_virt5(f, a1, a2, a3, a4, a5) \ + efi_call_virt(f, a1, a2, a3, a4, a5) +#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \ + efi_call_virt(f, a1, a2, a3, a4, a5, a6) + +#define efi_ioremap(addr, size) ioremap(addr, size) + +#else /* !CONFIG_X86_32 */ + +#define MAX_EFI_IO_PAGES 100 + +extern u64 efi_call0(void *fp); +extern u64 efi_call1(void *fp, u64 arg1); +extern u64 efi_call2(void *fp, u64 arg1, u64 arg2); +extern u64 efi_call3(void *fp, u64 arg1, u64 arg2, u64 arg3); +extern u64 efi_call4(void *fp, u64 arg1, u64 arg2, u64 arg3, u64 arg4); +extern u64 efi_call5(void *fp, u64 arg1, u64 arg2, u64 arg3, + u64 arg4, u64 arg5); +extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3, + u64 arg4, u64 arg5, u64 arg6); + +#define efi_call_phys0(f) \ + efi_call0((void *)(f)) +#define efi_call_phys1(f, a1) \ + efi_call1((void *)(f), (u64)(a1)) +#define efi_call_phys2(f, a1, a2) \ + efi_call2((void *)(f), (u64)(a1), (u64)(a2)) +#define efi_call_phys3(f, a1, a2, a3) \ + efi_call3((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3)) +#define efi_call_phys4(f, a1, a2, a3, a4) \ + efi_call4((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3), \ + (u64)(a4)) +#define efi_call_phys5(f, a1, a2, a3, a4, a5) \ + efi_call5((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3), \ + (u64)(a4), (u64)(a5)) +#define efi_call_phys6(f, a1, a2, a3, a4, a5, a6) \ + efi_call6((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3), \ + (u64)(a4), (u64)(a5), (u64)(a6)) + +#define efi_call_virt0(f) \ + efi_call0((void *)(efi.systab->runtime->f)) +#define efi_call_virt1(f, a1) \ + efi_call1((void *)(efi.systab->runtime->f), (u64)(a1)) +#define efi_call_virt2(f, a1, a2) \ + efi_call2((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2)) +#define efi_call_virt3(f, a1, a2, a3) \ + efi_call3((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ + (u64)(a3)) +#define efi_call_virt4(f, a1, a2, a3, a4) \ + efi_call4((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ + (u64)(a3), (u64)(a4)) +#define efi_call_virt5(f, a1, a2, a3, a4, a5) \ + efi_call5((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ + (u64)(a3), (u64)(a4), (u64)(a5)) +#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \ + efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ + (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6)) + +extern void *efi_ioremap(unsigned long offset, unsigned long size); + +#endif /* CONFIG_X86_32 */ + +extern void efi_reserve_bootmem(void); +extern void efi_call_phys_prelog(void); +extern void efi_call_phys_epilog(void); + +#endif diff --git a/include/asm-x86/elf.h b/include/asm-x86/elf.h index ec42a4d2e83b..d9c94e707289 100644 --- a/include/asm-x86/elf.h +++ b/include/asm-x86/elf.h @@ -73,18 +73,23 @@ typedef struct user_fxsr_struct elf_fpxregset_t; #endif #ifdef __KERNEL__ +#include <asm/vdso.h> -#ifdef CONFIG_X86_32 -#include <asm/processor.h> -#include <asm/system.h> /* for savesegment */ -#include <asm/desc.h> +extern unsigned int vdso_enabled; /* * This is used to ensure we don't load something for the wrong architecture. */ -#define elf_check_arch(x) \ +#define elf_check_arch_ia32(x) \ (((x)->e_machine == EM_386) || ((x)->e_machine == EM_486)) +#ifdef CONFIG_X86_32 +#include <asm/processor.h> +#include <asm/system.h> /* for savesegment */ +#include <asm/desc.h> + +#define elf_check_arch(x) elf_check_arch_ia32(x) + /* SVR4/i386 ABI (pages 3-31, 3-32) says that when the program starts %edx contains a pointer to a function which might be registered using `atexit'. This provides a mean for the dynamic linker to call DT_FINI functions for @@ -96,36 +101,38 @@ typedef struct user_fxsr_struct elf_fpxregset_t; just to make things more deterministic. */ #define ELF_PLAT_INIT(_r, load_addr) do { \ - _r->ebx = 0; _r->ecx = 0; _r->edx = 0; \ - _r->esi = 0; _r->edi = 0; _r->ebp = 0; \ - _r->eax = 0; \ + _r->bx = 0; _r->cx = 0; _r->dx = 0; \ + _r->si = 0; _r->di = 0; _r->bp = 0; \ + _r->ax = 0; \ } while (0) -/* regs is struct pt_regs, pr_reg is elf_gregset_t (which is - now struct_user_regs, they are different) */ - -#define ELF_CORE_COPY_REGS(pr_reg, regs) \ - pr_reg[0] = regs->ebx; \ - pr_reg[1] = regs->ecx; \ - pr_reg[2] = regs->edx; \ - pr_reg[3] = regs->esi; \ - pr_reg[4] = regs->edi; \ - pr_reg[5] = regs->ebp; \ - pr_reg[6] = regs->eax; \ - pr_reg[7] = regs->xds & 0xffff; \ - pr_reg[8] = regs->xes & 0xffff; \ - pr_reg[9] = regs->xfs & 0xffff; \ - savesegment(gs,pr_reg[10]); \ - pr_reg[11] = regs->orig_eax; \ - pr_reg[12] = regs->eip; \ - pr_reg[13] = regs->xcs & 0xffff; \ - pr_reg[14] = regs->eflags; \ - pr_reg[15] = regs->esp; \ - pr_reg[16] = regs->xss & 0xffff; +/* + * regs is struct pt_regs, pr_reg is elf_gregset_t (which is + * now struct_user_regs, they are different) + */ + +#define ELF_CORE_COPY_REGS(pr_reg, regs) do { \ + pr_reg[0] = regs->bx; \ + pr_reg[1] = regs->cx; \ + pr_reg[2] = regs->dx; \ + pr_reg[3] = regs->si; \ + pr_reg[4] = regs->di; \ + pr_reg[5] = regs->bp; \ + pr_reg[6] = regs->ax; \ + pr_reg[7] = regs->ds & 0xffff; \ + pr_reg[8] = regs->es & 0xffff; \ + pr_reg[9] = regs->fs & 0xffff; \ + savesegment(gs, pr_reg[10]); \ + pr_reg[11] = regs->orig_ax; \ + pr_reg[12] = regs->ip; \ + pr_reg[13] = regs->cs & 0xffff; \ + pr_reg[14] = regs->flags; \ + pr_reg[15] = regs->sp; \ + pr_reg[16] = regs->ss & 0xffff; \ +} while (0); #define ELF_PLATFORM (utsname()->machine) #define set_personality_64bit() do { } while (0) -extern unsigned int vdso_enabled; #else /* CONFIG_X86_32 */ @@ -137,28 +144,57 @@ extern unsigned int vdso_enabled; #define elf_check_arch(x) \ ((x)->e_machine == EM_X86_64) +#define compat_elf_check_arch(x) elf_check_arch_ia32(x) + +static inline void start_ia32_thread(struct pt_regs *regs, u32 ip, u32 sp) +{ + asm volatile("movl %0,%%fs" :: "r" (0)); + asm volatile("movl %0,%%es; movl %0,%%ds" : : "r" (__USER32_DS)); + load_gs_index(0); + regs->ip = ip; + regs->sp = sp; + regs->flags = X86_EFLAGS_IF; + regs->cs = __USER32_CS; + regs->ss = __USER32_DS; +} + +static inline void elf_common_init(struct thread_struct *t, + struct pt_regs *regs, const u16 ds) +{ + regs->ax = regs->bx = regs->cx = regs->dx = 0; + regs->si = regs->di = regs->bp = 0; + regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0; + regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0; + t->fs = t->gs = 0; + t->fsindex = t->gsindex = 0; + t->ds = t->es = ds; +} + #define ELF_PLAT_INIT(_r, load_addr) do { \ - struct task_struct *cur = current; \ - (_r)->rbx = 0; (_r)->rcx = 0; (_r)->rdx = 0; \ - (_r)->rsi = 0; (_r)->rdi = 0; (_r)->rbp = 0; \ - (_r)->rax = 0; \ - (_r)->r8 = 0; \ - (_r)->r9 = 0; \ - (_r)->r10 = 0; \ - (_r)->r11 = 0; \ - (_r)->r12 = 0; \ - (_r)->r13 = 0; \ - (_r)->r14 = 0; \ - (_r)->r15 = 0; \ - cur->thread.fs = 0; cur->thread.gs = 0; \ - cur->thread.fsindex = 0; cur->thread.gsindex = 0; \ - cur->thread.ds = 0; cur->thread.es = 0; \ + elf_common_init(¤t->thread, _r, 0); \ clear_thread_flag(TIF_IA32); \ } while (0) -/* regs is struct pt_regs, pr_reg is elf_gregset_t (which is - now struct_user_regs, they are different). Assumes current is the process - getting dumped. */ +#define COMPAT_ELF_PLAT_INIT(regs, load_addr) \ + elf_common_init(¤t->thread, regs, __USER_DS) +#define compat_start_thread(regs, ip, sp) do { \ + start_ia32_thread(regs, ip, sp); \ + set_fs(USER_DS); \ + } while (0) +#define COMPAT_SET_PERSONALITY(ex, ibcs2) do { \ + if (test_thread_flag(TIF_IA32)) \ + clear_thread_flag(TIF_ABI_PENDING); \ + else \ + set_thread_flag(TIF_ABI_PENDING); \ + current->personality |= force_personality32; \ + } while (0) +#define COMPAT_ELF_PLATFORM ("i686") + +/* + * regs is struct pt_regs, pr_reg is elf_gregset_t (which is + * now struct_user_regs, they are different). Assumes current is the process + * getting dumped. + */ #define ELF_CORE_COPY_REGS(pr_reg, regs) do { \ unsigned v; \ @@ -166,22 +202,22 @@ extern unsigned int vdso_enabled; (pr_reg)[1] = (regs)->r14; \ (pr_reg)[2] = (regs)->r13; \ (pr_reg)[3] = (regs)->r12; \ - (pr_reg)[4] = (regs)->rbp; \ - (pr_reg)[5] = (regs)->rbx; \ + (pr_reg)[4] = (regs)->bp; \ + (pr_reg)[5] = (regs)->bx; \ (pr_reg)[6] = (regs)->r11; \ (pr_reg)[7] = (regs)->r10; \ (pr_reg)[8] = (regs)->r9; \ (pr_reg)[9] = (regs)->r8; \ - (pr_reg)[10] = (regs)->rax; \ - (pr_reg)[11] = (regs)->rcx; \ - (pr_reg)[12] = (regs)->rdx; \ - (pr_reg)[13] = (regs)->rsi; \ - (pr_reg)[14] = (regs)->rdi; \ - (pr_reg)[15] = (regs)->orig_rax; \ - (pr_reg)[16] = (regs)->rip; \ + (pr_reg)[10] = (regs)->ax; \ + (pr_reg)[11] = (regs)->cx; \ + (pr_reg)[12] = (regs)->dx; \ + (pr_reg)[13] = (regs)->si; \ + (pr_reg)[14] = (regs)->di; \ + (pr_reg)[15] = (regs)->orig_ax; \ + (pr_reg)[16] = (regs)->ip; \ (pr_reg)[17] = (regs)->cs; \ - (pr_reg)[18] = (regs)->eflags; \ - (pr_reg)[19] = (regs)->rsp; \ + (pr_reg)[18] = (regs)->flags; \ + (pr_reg)[19] = (regs)->sp; \ (pr_reg)[20] = (regs)->ss; \ (pr_reg)[21] = current->thread.fs; \ (pr_reg)[22] = current->thread.gs; \ @@ -189,15 +225,17 @@ extern unsigned int vdso_enabled; asm("movl %%es,%0" : "=r" (v)); (pr_reg)[24] = v; \ asm("movl %%fs,%0" : "=r" (v)); (pr_reg)[25] = v; \ asm("movl %%gs,%0" : "=r" (v)); (pr_reg)[26] = v; \ -} while(0); +} while (0); /* I'm not sure if we can use '-' here */ #define ELF_PLATFORM ("x86_64") extern void set_personality_64bit(void); -extern int vdso_enabled; +extern unsigned int sysctl_vsyscall32; +extern int force_personality32; #endif /* !CONFIG_X86_32 */ +#define CORE_DUMP_USE_REGSET #define USE_ELF_CORE_DUMP #define ELF_EXEC_PAGESIZE 4096 @@ -232,43 +270,24 @@ extern int vdso_enabled; struct task_struct; -extern int dump_task_regs (struct task_struct *, elf_gregset_t *); -extern int dump_task_fpu (struct task_struct *, elf_fpregset_t *); - -#define ELF_CORE_COPY_TASK_REGS(tsk, elf_regs) dump_task_regs(tsk, elf_regs) -#define ELF_CORE_COPY_FPREGS(tsk, elf_fpregs) dump_task_fpu(tsk, elf_fpregs) +#define ARCH_DLINFO_IA32(vdso_enabled) \ +do if (vdso_enabled) { \ + NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE); \ +} while (0) #ifdef CONFIG_X86_32 -extern int dump_task_extended_fpu (struct task_struct *, - struct user_fxsr_struct *); -#define ELF_CORE_COPY_XFPREGS(tsk, elf_xfpregs) \ - dump_task_extended_fpu(tsk, elf_xfpregs) -#define ELF_CORE_XFPREG_TYPE NT_PRXFPREG #define VDSO_HIGH_BASE (__fix_to_virt(FIX_VDSO)) -#define VDSO_CURRENT_BASE ((unsigned long)current->mm->context.vdso) -#define VDSO_PRELINK 0 - -#define VDSO_SYM(x) \ - (VDSO_CURRENT_BASE + (unsigned long)(x) - VDSO_PRELINK) - -#define VDSO_HIGH_EHDR ((const struct elfhdr *) VDSO_HIGH_BASE) -#define VDSO_EHDR ((const struct elfhdr *) VDSO_CURRENT_BASE) -extern void __kernel_vsyscall; - -#define VDSO_ENTRY VDSO_SYM(&__kernel_vsyscall) +#define ARCH_DLINFO ARCH_DLINFO_IA32(vdso_enabled) /* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */ -#define ARCH_DLINFO \ -do if (vdso_enabled) { \ - NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \ - NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE); \ -} while (0) - #else /* CONFIG_X86_32 */ +#define VDSO_HIGH_BASE 0xffffe000U /* CONFIG_COMPAT_VDSO address */ + /* 1GB for 64bit, 8MB for 32bit */ #define STACK_RND_MASK (test_thread_flag(TIF_IA32) ? 0x7ff : 0x3fffff) @@ -277,14 +296,31 @@ do if (vdso_enabled) { \ NEW_AUX_ENT(AT_SYSINFO_EHDR,(unsigned long)current->mm->context.vdso);\ } while (0) +#define AT_SYSINFO 32 + +#define COMPAT_ARCH_DLINFO ARCH_DLINFO_IA32(sysctl_vsyscall32) + +#define COMPAT_ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000) + #endif /* !CONFIG_X86_32 */ +#define VDSO_CURRENT_BASE ((unsigned long)current->mm->context.vdso) + +#define VDSO_ENTRY \ + ((unsigned long) VDSO32_SYMBOL(VDSO_CURRENT_BASE, vsyscall)) + struct linux_binprm; #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 extern int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack); +extern int syscall32_setup_pages(struct linux_binprm *, int exstack); +#define compat_arch_setup_additional_pages syscall32_setup_pages + +extern unsigned long arch_randomize_brk(struct mm_struct *mm); +#define arch_randomize_brk arch_randomize_brk + #endif /* __KERNEL__ */ #endif diff --git a/include/asm-x86/emergency-restart.h b/include/asm-x86/emergency-restart.h index 680c39563345..8e6aef19f8f0 100644 --- a/include/asm-x86/emergency-restart.h +++ b/include/asm-x86/emergency-restart.h @@ -1,6 +1,18 @@ #ifndef _ASM_EMERGENCY_RESTART_H #define _ASM_EMERGENCY_RESTART_H +enum reboot_type { + BOOT_TRIPLE = 't', + BOOT_KBD = 'k', +#ifdef CONFIG_X86_32 + BOOT_BIOS = 'b', +#endif + BOOT_ACPI = 'a', + BOOT_EFI = 'e' +}; + +extern enum reboot_type reboot_type; + extern void machine_emergency_restart(void); #endif /* _ASM_EMERGENCY_RESTART_H */ diff --git a/include/asm-x86/fixmap_32.h b/include/asm-x86/fixmap_32.h index 249e753ac805..a7404d50686b 100644 --- a/include/asm-x86/fixmap_32.h +++ b/include/asm-x86/fixmap_32.h @@ -65,7 +65,7 @@ enum fixed_addresses { #endif #ifdef CONFIG_X86_VISWS_APIC FIX_CO_CPU, /* Cobalt timer */ - FIX_CO_APIC, /* Cobalt APIC Redirection Table */ + FIX_CO_APIC, /* Cobalt APIC Redirection Table */ FIX_LI_PCIA, /* Lithium PCI Bridge A */ FIX_LI_PCIB, /* Lithium PCI Bridge B */ #endif @@ -74,7 +74,7 @@ enum fixed_addresses { #endif #ifdef CONFIG_X86_CYCLONE_TIMER FIX_CYCLONE_TIMER, /*cyclone timer register*/ -#endif +#endif #ifdef CONFIG_HIGHMEM FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, @@ -90,11 +90,23 @@ enum fixed_addresses { FIX_PARAVIRT_BOOTMAP, #endif __end_of_permanent_fixed_addresses, - /* temporary boot-time mappings, used before ioremap() is functional */ -#define NR_FIX_BTMAPS 16 - FIX_BTMAP_END = __end_of_permanent_fixed_addresses, - FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS - 1, + /* + * 256 temporary boot-time mappings, used by early_ioremap(), + * before ioremap() is functional. + * + * We round it up to the next 512 pages boundary so that we + * can have a single pgd entry and a single pte table: + */ +#define NR_FIX_BTMAPS 64 +#define FIX_BTMAPS_NESTING 4 + FIX_BTMAP_END = + __end_of_permanent_fixed_addresses + 512 - + (__end_of_permanent_fixed_addresses & 511), + FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1, FIX_WP_TEST, +#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT + FIX_OHCI1394_BASE, +#endif __end_of_fixed_addresses }; diff --git a/include/asm-x86/fixmap_64.h b/include/asm-x86/fixmap_64.h index cdfbe4a6ae6f..70ddb21e6458 100644 --- a/include/asm-x86/fixmap_64.h +++ b/include/asm-x86/fixmap_64.h @@ -15,6 +15,7 @@ #include <asm/apicdef.h> #include <asm/page.h> #include <asm/vsyscall.h> +#include <asm/efi.h> /* * Here we define all the compile-time 'special' virtual @@ -41,6 +42,11 @@ enum fixed_addresses { FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ FIX_IO_APIC_BASE_0, FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1, + FIX_EFI_IO_MAP_LAST_PAGE, + FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE+MAX_EFI_IO_PAGES-1, +#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT + FIX_OHCI1394_BASE, +#endif __end_of_fixed_addresses }; diff --git a/include/asm-x86/fpu32.h b/include/asm-x86/fpu32.h deleted file mode 100644 index 4153db5c0c31..000000000000 --- a/include/asm-x86/fpu32.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef _FPU32_H -#define _FPU32_H 1 - -struct _fpstate_ia32; - -int restore_i387_ia32(struct task_struct *tsk, struct _fpstate_ia32 __user *buf, int fsave); -int save_i387_ia32(struct task_struct *tsk, struct _fpstate_ia32 __user *buf, - struct pt_regs *regs, int fsave); - -#endif diff --git a/include/asm-x86/futex.h b/include/asm-x86/futex.h index 1f4610e0c613..62828d63f1b1 100644 --- a/include/asm-x86/futex.h +++ b/include/asm-x86/futex.h @@ -1,5 +1,135 @@ -#ifdef CONFIG_X86_32 -# include "futex_32.h" -#else -# include "futex_64.h" +#ifndef _ASM_X86_FUTEX_H +#define _ASM_X86_FUTEX_H + +#ifdef __KERNEL__ + +#include <linux/futex.h> + +#include <asm/asm.h> +#include <asm/errno.h> +#include <asm/processor.h> +#include <asm/system.h> +#include <asm/uaccess.h> + +#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \ + __asm__ __volatile( \ +"1: " insn "\n" \ +"2: .section .fixup,\"ax\"\n \ +3: mov %3, %1\n \ + jmp 2b\n \ + .previous\n \ + .section __ex_table,\"a\"\n \ + .align 8\n" \ + _ASM_PTR "1b,3b\n \ + .previous" \ + : "=r" (oldval), "=r" (ret), "+m" (*uaddr) \ + : "i" (-EFAULT), "0" (oparg), "1" (0)) + +#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \ + __asm__ __volatile( \ +"1: movl %2, %0\n \ + movl %0, %3\n" \ + insn "\n" \ +"2: " LOCK_PREFIX "cmpxchgl %3, %2\n \ + jnz 1b\n \ +3: .section .fixup,\"ax\"\n \ +4: mov %5, %1\n \ + jmp 3b\n \ + .previous\n \ + .section __ex_table,\"a\"\n \ + .align 8\n" \ + _ASM_PTR "1b,4b,2b,4b\n \ + .previous" \ + : "=&a" (oldval), "=&r" (ret), "+m" (*uaddr), \ + "=&r" (tem) \ + : "r" (oparg), "i" (-EFAULT), "1" (0)) + +static inline int +futex_atomic_op_inuser(int encoded_op, int __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret, tem; + + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + +#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP) + /* Real i386 machines can only support FUTEX_OP_SET */ + if (op != FUTEX_OP_SET && boot_cpu_data.x86 == 3) + return -ENOSYS; +#endif + + pagefault_disable(); + + switch (op) { + case FUTEX_OP_SET: + __futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_ADD: + __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, oldval, + uaddr, oparg); + break; + case FUTEX_OP_OR: + __futex_atomic_op2("orl %4, %3", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_ANDN: + __futex_atomic_op2("andl %4, %3", ret, oldval, uaddr, ~oparg); + break; + case FUTEX_OP_XOR: + __futex_atomic_op2("xorl %4, %3", ret, oldval, uaddr, oparg); + break; + default: + ret = -ENOSYS; + } + + pagefault_enable(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; + case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; + case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; + case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; + case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; + case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; + default: ret = -ENOSYS; + } + } + return ret; +} + +static inline int +futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) +{ + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + + __asm__ __volatile__( + "1: " LOCK_PREFIX "cmpxchgl %3, %1 \n" + + "2: .section .fixup, \"ax\" \n" + "3: mov %2, %0 \n" + " jmp 2b \n" + " .previous \n" + + " .section __ex_table, \"a\" \n" + " .align 8 \n" + _ASM_PTR " 1b,3b \n" + " .previous \n" + + : "=a" (oldval), "+m" (*uaddr) + : "i" (-EFAULT), "r" (newval), "0" (oldval) + : "memory" + ); + + return oldval; +} + +#endif #endif diff --git a/include/asm-x86/futex_32.h b/include/asm-x86/futex_32.h deleted file mode 100644 index 438ef0ec7101..000000000000 --- a/include/asm-x86/futex_32.h +++ /dev/null @@ -1,135 +0,0 @@ -#ifndef _ASM_FUTEX_H -#define _ASM_FUTEX_H - -#ifdef __KERNEL__ - -#include <linux/futex.h> -#include <asm/errno.h> -#include <asm/system.h> -#include <asm/processor.h> -#include <asm/uaccess.h> - -#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \ - __asm__ __volatile ( \ -"1: " insn "\n" \ -"2: .section .fixup,\"ax\"\n\ -3: mov %3, %1\n\ - jmp 2b\n\ - .previous\n\ - .section __ex_table,\"a\"\n\ - .align 8\n\ - .long 1b,3b\n\ - .previous" \ - : "=r" (oldval), "=r" (ret), "+m" (*uaddr) \ - : "i" (-EFAULT), "0" (oparg), "1" (0)) - -#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \ - __asm__ __volatile ( \ -"1: movl %2, %0\n\ - movl %0, %3\n" \ - insn "\n" \ -"2: " LOCK_PREFIX "cmpxchgl %3, %2\n\ - jnz 1b\n\ -3: .section .fixup,\"ax\"\n\ -4: mov %5, %1\n\ - jmp 3b\n\ - .previous\n\ - .section __ex_table,\"a\"\n\ - .align 8\n\ - .long 1b,4b,2b,4b\n\ - .previous" \ - : "=&a" (oldval), "=&r" (ret), "+m" (*uaddr), \ - "=&r" (tem) \ - : "r" (oparg), "i" (-EFAULT), "1" (0)) - -static inline int -futex_atomic_op_inuser (int encoded_op, int __user *uaddr) -{ - int op = (encoded_op >> 28) & 7; - int cmp = (encoded_op >> 24) & 15; - int oparg = (encoded_op << 8) >> 20; - int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret, tem; - if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) - oparg = 1 << oparg; - - if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) - return -EFAULT; - - pagefault_disable(); - - if (op == FUTEX_OP_SET) - __futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg); - else { -#ifndef CONFIG_X86_BSWAP - if (boot_cpu_data.x86 == 3) - ret = -ENOSYS; - else -#endif - switch (op) { - case FUTEX_OP_ADD: - __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, - oldval, uaddr, oparg); - break; - case FUTEX_OP_OR: - __futex_atomic_op2("orl %4, %3", ret, oldval, uaddr, - oparg); - break; - case FUTEX_OP_ANDN: - __futex_atomic_op2("andl %4, %3", ret, oldval, uaddr, - ~oparg); - break; - case FUTEX_OP_XOR: - __futex_atomic_op2("xorl %4, %3", ret, oldval, uaddr, - oparg); - break; - default: - ret = -ENOSYS; - } - } - - pagefault_enable(); - - if (!ret) { - switch (cmp) { - case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; - case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; - case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; - case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; - case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; - case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; - default: ret = -ENOSYS; - } - } - return ret; -} - -static inline int -futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) -{ - if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) - return -EFAULT; - - __asm__ __volatile__( - "1: " LOCK_PREFIX "cmpxchgl %3, %1 \n" - - "2: .section .fixup, \"ax\" \n" - "3: mov %2, %0 \n" - " jmp 2b \n" - " .previous \n" - - " .section __ex_table, \"a\" \n" - " .align 8 \n" - " .long 1b,3b \n" - " .previous \n" - - : "=a" (oldval), "+m" (*uaddr) - : "i" (-EFAULT), "r" (newval), "0" (oldval) - : "memory" - ); - - return oldval; -} - -#endif -#endif diff --git a/include/asm-x86/futex_64.h b/include/asm-x86/futex_64.h deleted file mode 100644 index 5cdfb08013c3..000000000000 --- a/include/asm-x86/futex_64.h +++ /dev/null @@ -1,125 +0,0 @@ -#ifndef _ASM_FUTEX_H -#define _ASM_FUTEX_H - -#ifdef __KERNEL__ - -#include <linux/futex.h> -#include <asm/errno.h> -#include <asm/system.h> -#include <asm/uaccess.h> - -#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \ - __asm__ __volatile ( \ -"1: " insn "\n" \ -"2: .section .fixup,\"ax\"\n\ -3: mov %3, %1\n\ - jmp 2b\n\ - .previous\n\ - .section __ex_table,\"a\"\n\ - .align 8\n\ - .quad 1b,3b\n\ - .previous" \ - : "=r" (oldval), "=r" (ret), "=m" (*uaddr) \ - : "i" (-EFAULT), "m" (*uaddr), "0" (oparg), "1" (0)) - -#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \ - __asm__ __volatile ( \ -"1: movl %2, %0\n\ - movl %0, %3\n" \ - insn "\n" \ -"2: " LOCK_PREFIX "cmpxchgl %3, %2\n\ - jnz 1b\n\ -3: .section .fixup,\"ax\"\n\ -4: mov %5, %1\n\ - jmp 3b\n\ - .previous\n\ - .section __ex_table,\"a\"\n\ - .align 8\n\ - .quad 1b,4b,2b,4b\n\ - .previous" \ - : "=&a" (oldval), "=&r" (ret), "=m" (*uaddr), \ - "=&r" (tem) \ - : "r" (oparg), "i" (-EFAULT), "m" (*uaddr), "1" (0)) - -static inline int -futex_atomic_op_inuser (int encoded_op, int __user *uaddr) -{ - int op = (encoded_op >> 28) & 7; - int cmp = (encoded_op >> 24) & 15; - int oparg = (encoded_op << 8) >> 20; - int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret, tem; - if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) - oparg = 1 << oparg; - - if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) - return -EFAULT; - - pagefault_disable(); - - switch (op) { - case FUTEX_OP_SET: - __futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg); - break; - case FUTEX_OP_ADD: - __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, oldval, - uaddr, oparg); - break; - case FUTEX_OP_OR: - __futex_atomic_op2("orl %4, %3", ret, oldval, uaddr, oparg); - break; - case FUTEX_OP_ANDN: - __futex_atomic_op2("andl %4, %3", ret, oldval, uaddr, ~oparg); - break; - case FUTEX_OP_XOR: - __futex_atomic_op2("xorl %4, %3", ret, oldval, uaddr, oparg); - break; - default: - ret = -ENOSYS; - } - - pagefault_enable(); - - if (!ret) { - switch (cmp) { - case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; - case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; - case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; - case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; - case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; - case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; - default: ret = -ENOSYS; - } - } - return ret; -} - -static inline int -futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) -{ - if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) - return -EFAULT; - - __asm__ __volatile__( - "1: " LOCK_PREFIX "cmpxchgl %3, %1 \n" - - "2: .section .fixup, \"ax\" \n" - "3: mov %2, %0 \n" - " jmp 2b \n" - " .previous \n" - - " .section __ex_table, \"a\" \n" - " .align 8 \n" - " .quad 1b,3b \n" - " .previous \n" - - : "=a" (oldval), "=m" (*uaddr) - : "i" (-EFAULT), "r" (newval), "0" (oldval) - : "memory" - ); - - return oldval; -} - -#endif -#endif diff --git a/include/asm-x86/gart.h b/include/asm-x86/gart.h index f704c50519b8..90958ed993fa 100644 --- a/include/asm-x86/gart.h +++ b/include/asm-x86/gart.h @@ -9,6 +9,7 @@ extern int iommu_detected; extern void gart_iommu_init(void); extern void gart_iommu_shutdown(void); extern void __init gart_parse_options(char *); +extern void early_gart_iommu_check(void); extern void gart_iommu_hole_init(void); extern int fallback_aper_order; extern int fallback_aper_force; @@ -20,6 +21,10 @@ extern int fix_aperture; #define gart_iommu_aperture 0 #define gart_iommu_aperture_allowed 0 +static inline void early_gart_iommu_check(void) +{ +} + static inline void gart_iommu_shutdown(void) { } diff --git a/include/asm-x86/geode.h b/include/asm-x86/geode.h index 771af336734f..811fe14f70b2 100644 --- a/include/asm-x86/geode.h +++ b/include/asm-x86/geode.h @@ -121,9 +121,15 @@ extern int geode_get_dev_base(unsigned int dev); #define GPIO_MAP_Z 0xE8 #define GPIO_MAP_W 0xEC -extern void geode_gpio_set(unsigned int, unsigned int); -extern void geode_gpio_clear(unsigned int, unsigned int); -extern int geode_gpio_isset(unsigned int, unsigned int); +static inline u32 geode_gpio(unsigned int nr) +{ + BUG_ON(nr > 28); + return 1 << nr; +} + +extern void geode_gpio_set(u32, unsigned int); +extern void geode_gpio_clear(u32, unsigned int); +extern int geode_gpio_isset(u32, unsigned int); extern void geode_gpio_setup_event(unsigned int, int, int); extern void geode_gpio_set_irq(unsigned int, unsigned int); diff --git a/include/asm-x86/gpio.h b/include/asm-x86/gpio.h new file mode 100644 index 000000000000..ff87fca0caf9 --- /dev/null +++ b/include/asm-x86/gpio.h @@ -0,0 +1,6 @@ +#ifndef _ASM_I386_GPIO_H +#define _ASM_I386_GPIO_H + +#include <gpio.h> + +#endif /* _ASM_I386_GPIO_H */ diff --git a/include/asm-x86/hpet.h b/include/asm-x86/hpet.h index ad8d6e758785..6a9b4ac59bf7 100644 --- a/include/asm-x86/hpet.h +++ b/include/asm-x86/hpet.h @@ -69,6 +69,7 @@ extern void force_hpet_resume(void); #include <linux/interrupt.h> +typedef irqreturn_t (*rtc_irq_handler)(int interrupt, void *cookie); extern int hpet_mask_rtc_irq_bit(unsigned long bit_mask); extern int hpet_set_rtc_irq_bit(unsigned long bit_mask); extern int hpet_set_alarm_time(unsigned char hrs, unsigned char min, @@ -77,13 +78,16 @@ extern int hpet_set_periodic_freq(unsigned long freq); extern int hpet_rtc_dropped_irq(void); extern int hpet_rtc_timer_init(void); extern irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id); +extern int hpet_register_irq_handler(rtc_irq_handler handler); +extern void hpet_unregister_irq_handler(rtc_irq_handler handler); #endif /* CONFIG_HPET_EMULATE_RTC */ -#else +#else /* CONFIG_HPET_TIMER */ static inline int hpet_enable(void) { return 0; } static inline unsigned long hpet_readl(unsigned long a) { return 0; } +static inline int is_hpet_enabled(void) { return 0; } -#endif /* CONFIG_HPET_TIMER */ +#endif #endif /* ASM_X86_HPET_H */ diff --git a/include/asm-x86/hw_irq_32.h b/include/asm-x86/hw_irq_32.h index 0bedbdf5e907..6d65fbb6358b 100644 --- a/include/asm-x86/hw_irq_32.h +++ b/include/asm-x86/hw_irq_32.h @@ -26,19 +26,19 @@ * Interrupt entry/exit code at both C and assembly level */ -extern void (*interrupt[NR_IRQS])(void); +extern void (*const interrupt[NR_IRQS])(void); #ifdef CONFIG_SMP -fastcall void reschedule_interrupt(void); -fastcall void invalidate_interrupt(void); -fastcall void call_function_interrupt(void); +void reschedule_interrupt(void); +void invalidate_interrupt(void); +void call_function_interrupt(void); #endif #ifdef CONFIG_X86_LOCAL_APIC -fastcall void apic_timer_interrupt(void); -fastcall void error_interrupt(void); -fastcall void spurious_interrupt(void); -fastcall void thermal_interrupt(void); +void apic_timer_interrupt(void); +void error_interrupt(void); +void spurious_interrupt(void); +void thermal_interrupt(void); #define platform_legacy_irq(irq) ((irq) < 16) #endif diff --git a/include/asm-x86/hw_irq_64.h b/include/asm-x86/hw_irq_64.h index a470d59da678..312a58d6dac6 100644 --- a/include/asm-x86/hw_irq_64.h +++ b/include/asm-x86/hw_irq_64.h @@ -135,11 +135,13 @@ extern void init_8259A(int aeoi); extern void send_IPI_self(int vector); extern void init_VISWS_APIC_irqs(void); extern void setup_IO_APIC(void); +extern void enable_IO_APIC(void); extern void disable_IO_APIC(void); extern void print_IO_APIC(void); extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn); extern void send_IPI(int dest, int vector); extern void setup_ioapic_dest(void); +extern void native_init_IRQ(void); extern unsigned long io_apic_irqs; diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h index a8bbed349664..ba8105ca822b 100644 --- a/include/asm-x86/i387.h +++ b/include/asm-x86/i387.h @@ -1,5 +1,360 @@ -#ifdef CONFIG_X86_32 -# include "i387_32.h" +/* + * Copyright (C) 1994 Linus Torvalds + * + * Pentium III FXSR, SSE support + * General FPU state handling cleanups + * Gareth Hughes <gareth@valinux.com>, May 2000 + * x86-64 work by Andi Kleen 2002 + */ + +#ifndef _ASM_X86_I387_H +#define _ASM_X86_I387_H + +#include <linux/sched.h> +#include <linux/kernel_stat.h> +#include <linux/regset.h> +#include <asm/processor.h> +#include <asm/sigcontext.h> +#include <asm/user.h> +#include <asm/uaccess.h> + +extern void fpu_init(void); +extern unsigned int mxcsr_feature_mask; +extern void mxcsr_feature_mask_init(void); +extern void init_fpu(struct task_struct *child); +extern asmlinkage void math_state_restore(void); + +extern user_regset_active_fn fpregs_active, xfpregs_active; +extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get; +extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set; + +#ifdef CONFIG_IA32_EMULATION +struct _fpstate_ia32; +extern int save_i387_ia32(struct _fpstate_ia32 __user *buf); +extern int restore_i387_ia32(struct _fpstate_ia32 __user *buf); +#endif + +#ifdef CONFIG_X86_64 + +/* Ignore delayed exceptions from user space */ +static inline void tolerant_fwait(void) +{ + asm volatile("1: fwait\n" + "2:\n" + " .section __ex_table,\"a\"\n" + " .align 8\n" + " .quad 1b,2b\n" + " .previous\n"); +} + +static inline int restore_fpu_checking(struct i387_fxsave_struct *fx) +{ + int err; + + asm volatile("1: rex64/fxrstor (%[fx])\n\t" + "2:\n" + ".section .fixup,\"ax\"\n" + "3: movl $-1,%[err]\n" + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 8\n" + " .quad 1b,3b\n" + ".previous" + : [err] "=r" (err) +#if 0 /* See comment in __save_init_fpu() below. */ + : [fx] "r" (fx), "m" (*fx), "0" (0)); +#else + : [fx] "cdaSDb" (fx), "m" (*fx), "0" (0)); +#endif + if (unlikely(err)) + init_fpu(current); + return err; +} + +#define X87_FSW_ES (1 << 7) /* Exception Summary */ + +/* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception + is pending. Clear the x87 state here by setting it to fixed + values. The kernel data segment can be sometimes 0 and sometimes + new user value. Both should be ok. + Use the PDA as safe address because it should be already in L1. */ +static inline void clear_fpu_state(struct i387_fxsave_struct *fx) +{ + if (unlikely(fx->swd & X87_FSW_ES)) + asm volatile("fnclex"); + alternative_input(ASM_NOP8 ASM_NOP2, + " emms\n" /* clear stack tags */ + " fildl %%gs:0", /* load to clear state */ + X86_FEATURE_FXSAVE_LEAK); +} + +static inline int save_i387_checking(struct i387_fxsave_struct __user *fx) +{ + int err; + + asm volatile("1: rex64/fxsave (%[fx])\n\t" + "2:\n" + ".section .fixup,\"ax\"\n" + "3: movl $-1,%[err]\n" + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 8\n" + " .quad 1b,3b\n" + ".previous" + : [err] "=r" (err), "=m" (*fx) +#if 0 /* See comment in __fxsave_clear() below. */ + : [fx] "r" (fx), "0" (0)); +#else + : [fx] "cdaSDb" (fx), "0" (0)); +#endif + if (unlikely(err) && __clear_user(fx, sizeof(struct i387_fxsave_struct))) + err = -EFAULT; + /* No need to clear here because the caller clears USED_MATH */ + return err; +} + +static inline void __save_init_fpu(struct task_struct *tsk) +{ + /* Using "rex64; fxsave %0" is broken because, if the memory operand + uses any extended registers for addressing, a second REX prefix + will be generated (to the assembler, rex64 followed by semicolon + is a separate instruction), and hence the 64-bitness is lost. */ +#if 0 + /* Using "fxsaveq %0" would be the ideal choice, but is only supported + starting with gas 2.16. */ + __asm__ __volatile__("fxsaveq %0" + : "=m" (tsk->thread.i387.fxsave)); +#elif 0 + /* Using, as a workaround, the properly prefixed form below isn't + accepted by any binutils version so far released, complaining that + the same type of prefix is used twice if an extended register is + needed for addressing (fix submitted to mainline 2005-11-21). */ + __asm__ __volatile__("rex64/fxsave %0" + : "=m" (tsk->thread.i387.fxsave)); +#else + /* This, however, we can work around by forcing the compiler to select + an addressing mode that doesn't require extended registers. */ + __asm__ __volatile__("rex64/fxsave %P2(%1)" + : "=m" (tsk->thread.i387.fxsave) + : "cdaSDb" (tsk), + "i" (offsetof(__typeof__(*tsk), + thread.i387.fxsave))); +#endif + clear_fpu_state(&tsk->thread.i387.fxsave); + task_thread_info(tsk)->status &= ~TS_USEDFPU; +} + +/* + * Signal frame handlers. + */ + +static inline int save_i387(struct _fpstate __user *buf) +{ + struct task_struct *tsk = current; + int err = 0; + + BUILD_BUG_ON(sizeof(struct user_i387_struct) != + sizeof(tsk->thread.i387.fxsave)); + + if ((unsigned long)buf % 16) + printk("save_i387: bad fpstate %p\n", buf); + + if (!used_math()) + return 0; + clear_used_math(); /* trigger finit */ + if (task_thread_info(tsk)->status & TS_USEDFPU) { + err = save_i387_checking((struct i387_fxsave_struct __user *)buf); + if (err) return err; + task_thread_info(tsk)->status &= ~TS_USEDFPU; + stts(); + } else { + if (__copy_to_user(buf, &tsk->thread.i387.fxsave, + sizeof(struct i387_fxsave_struct))) + return -1; + } + return 1; +} + +/* + * This restores directly out of user space. Exceptions are handled. + */ +static inline int restore_i387(struct _fpstate __user *buf) +{ + set_used_math(); + if (!(task_thread_info(current)->status & TS_USEDFPU)) { + clts(); + task_thread_info(current)->status |= TS_USEDFPU; + } + return restore_fpu_checking((__force struct i387_fxsave_struct *)buf); +} + +#else /* CONFIG_X86_32 */ + +static inline void tolerant_fwait(void) +{ + asm volatile("fnclex ; fwait"); +} + +static inline void restore_fpu(struct task_struct *tsk) +{ + /* + * The "nop" is needed to make the instructions the same + * length. + */ + alternative_input( + "nop ; frstor %1", + "fxrstor %1", + X86_FEATURE_FXSR, + "m" ((tsk)->thread.i387.fxsave)); +} + +/* We need a safe address that is cheap to find and that is already + in L1 during context switch. The best choices are unfortunately + different for UP and SMP */ +#ifdef CONFIG_SMP +#define safe_address (__per_cpu_offset[0]) #else -# include "i387_64.h" +#define safe_address (kstat_cpu(0).cpustat.user) #endif + +/* + * These must be called with preempt disabled + */ +static inline void __save_init_fpu(struct task_struct *tsk) +{ + /* Use more nops than strictly needed in case the compiler + varies code */ + alternative_input( + "fnsave %[fx] ;fwait;" GENERIC_NOP8 GENERIC_NOP4, + "fxsave %[fx]\n" + "bt $7,%[fsw] ; jnc 1f ; fnclex\n1:", + X86_FEATURE_FXSR, + [fx] "m" (tsk->thread.i387.fxsave), + [fsw] "m" (tsk->thread.i387.fxsave.swd) : "memory"); + /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception + is pending. Clear the x87 state here by setting it to fixed + values. safe_address is a random variable that should be in L1 */ + alternative_input( + GENERIC_NOP8 GENERIC_NOP2, + "emms\n\t" /* clear stack tags */ + "fildl %[addr]", /* set F?P to defined value */ + X86_FEATURE_FXSAVE_LEAK, + [addr] "m" (safe_address)); + task_thread_info(tsk)->status &= ~TS_USEDFPU; +} + +/* + * Signal frame handlers... + */ +extern int save_i387(struct _fpstate __user *buf); +extern int restore_i387(struct _fpstate __user *buf); + +#endif /* CONFIG_X86_64 */ + +static inline void __unlazy_fpu(struct task_struct *tsk) +{ + if (task_thread_info(tsk)->status & TS_USEDFPU) { + __save_init_fpu(tsk); + stts(); + } else + tsk->fpu_counter = 0; +} + +static inline void __clear_fpu(struct task_struct *tsk) +{ + if (task_thread_info(tsk)->status & TS_USEDFPU) { + tolerant_fwait(); + task_thread_info(tsk)->status &= ~TS_USEDFPU; + stts(); + } +} + +static inline void kernel_fpu_begin(void) +{ + struct thread_info *me = current_thread_info(); + preempt_disable(); + if (me->status & TS_USEDFPU) + __save_init_fpu(me->task); + else + clts(); +} + +static inline void kernel_fpu_end(void) +{ + stts(); + preempt_enable(); +} + +#ifdef CONFIG_X86_64 + +static inline void save_init_fpu(struct task_struct *tsk) +{ + __save_init_fpu(tsk); + stts(); +} + +#define unlazy_fpu __unlazy_fpu +#define clear_fpu __clear_fpu + +#else /* CONFIG_X86_32 */ + +/* + * These disable preemption on their own and are safe + */ +static inline void save_init_fpu(struct task_struct *tsk) +{ + preempt_disable(); + __save_init_fpu(tsk); + stts(); + preempt_enable(); +} + +static inline void unlazy_fpu(struct task_struct *tsk) +{ + preempt_disable(); + __unlazy_fpu(tsk); + preempt_enable(); +} + +static inline void clear_fpu(struct task_struct *tsk) +{ + preempt_disable(); + __clear_fpu(tsk); + preempt_enable(); +} + +#endif /* CONFIG_X86_64 */ + +/* + * i387 state interaction + */ +static inline unsigned short get_fpu_cwd(struct task_struct *tsk) +{ + if (cpu_has_fxsr) { + return tsk->thread.i387.fxsave.cwd; + } else { + return (unsigned short)tsk->thread.i387.fsave.cwd; + } +} + +static inline unsigned short get_fpu_swd(struct task_struct *tsk) +{ + if (cpu_has_fxsr) { + return tsk->thread.i387.fxsave.swd; + } else { + return (unsigned short)tsk->thread.i387.fsave.swd; + } +} + +static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) +{ + if (cpu_has_xmm) { + return tsk->thread.i387.fxsave.mxcsr; + } else { + return MXCSR_DEFAULT; + } +} + +#endif /* _ASM_X86_I387_H */ diff --git a/include/asm-x86/i387_32.h b/include/asm-x86/i387_32.h deleted file mode 100644 index cdd1e248e3b4..000000000000 --- a/include/asm-x86/i387_32.h +++ /dev/null @@ -1,151 +0,0 @@ -/* - * include/asm-i386/i387.h - * - * Copyright (C) 1994 Linus Torvalds - * - * Pentium III FXSR, SSE support - * General FPU state handling cleanups - * Gareth Hughes <gareth@valinux.com>, May 2000 - */ - -#ifndef __ASM_I386_I387_H -#define __ASM_I386_I387_H - -#include <linux/sched.h> -#include <linux/init.h> -#include <linux/kernel_stat.h> -#include <asm/processor.h> -#include <asm/sigcontext.h> -#include <asm/user.h> - -extern void mxcsr_feature_mask_init(void); -extern void init_fpu(struct task_struct *); - -/* - * FPU lazy state save handling... - */ - -/* - * The "nop" is needed to make the instructions the same - * length. - */ -#define restore_fpu(tsk) \ - alternative_input( \ - "nop ; frstor %1", \ - "fxrstor %1", \ - X86_FEATURE_FXSR, \ - "m" ((tsk)->thread.i387.fxsave)) - -extern void kernel_fpu_begin(void); -#define kernel_fpu_end() do { stts(); preempt_enable(); } while(0) - -/* We need a safe address that is cheap to find and that is already - in L1 during context switch. The best choices are unfortunately - different for UP and SMP */ -#ifdef CONFIG_SMP -#define safe_address (__per_cpu_offset[0]) -#else -#define safe_address (kstat_cpu(0).cpustat.user) -#endif - -/* - * These must be called with preempt disabled - */ -static inline void __save_init_fpu( struct task_struct *tsk ) -{ - /* Use more nops than strictly needed in case the compiler - varies code */ - alternative_input( - "fnsave %[fx] ;fwait;" GENERIC_NOP8 GENERIC_NOP4, - "fxsave %[fx]\n" - "bt $7,%[fsw] ; jnc 1f ; fnclex\n1:", - X86_FEATURE_FXSR, - [fx] "m" (tsk->thread.i387.fxsave), - [fsw] "m" (tsk->thread.i387.fxsave.swd) : "memory"); - /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception - is pending. Clear the x87 state here by setting it to fixed - values. safe_address is a random variable that should be in L1 */ - alternative_input( - GENERIC_NOP8 GENERIC_NOP2, - "emms\n\t" /* clear stack tags */ - "fildl %[addr]", /* set F?P to defined value */ - X86_FEATURE_FXSAVE_LEAK, - [addr] "m" (safe_address)); - task_thread_info(tsk)->status &= ~TS_USEDFPU; -} - -#define __unlazy_fpu( tsk ) do { \ - if (task_thread_info(tsk)->status & TS_USEDFPU) { \ - __save_init_fpu(tsk); \ - stts(); \ - } else \ - tsk->fpu_counter = 0; \ -} while (0) - -#define __clear_fpu( tsk ) \ -do { \ - if (task_thread_info(tsk)->status & TS_USEDFPU) { \ - asm volatile("fnclex ; fwait"); \ - task_thread_info(tsk)->status &= ~TS_USEDFPU; \ - stts(); \ - } \ -} while (0) - - -/* - * These disable preemption on their own and are safe - */ -static inline void save_init_fpu( struct task_struct *tsk ) -{ - preempt_disable(); - __save_init_fpu(tsk); - stts(); - preempt_enable(); -} - -#define unlazy_fpu( tsk ) do { \ - preempt_disable(); \ - __unlazy_fpu(tsk); \ - preempt_enable(); \ -} while (0) - -#define clear_fpu( tsk ) do { \ - preempt_disable(); \ - __clear_fpu( tsk ); \ - preempt_enable(); \ -} while (0) - -/* - * FPU state interaction... - */ -extern unsigned short get_fpu_cwd( struct task_struct *tsk ); -extern unsigned short get_fpu_swd( struct task_struct *tsk ); -extern unsigned short get_fpu_mxcsr( struct task_struct *tsk ); -extern asmlinkage void math_state_restore(void); - -/* - * Signal frame handlers... - */ -extern int save_i387( struct _fpstate __user *buf ); -extern int restore_i387( struct _fpstate __user *buf ); - -/* - * ptrace request handers... - */ -extern int get_fpregs( struct user_i387_struct __user *buf, - struct task_struct *tsk ); -extern int set_fpregs( struct task_struct *tsk, - struct user_i387_struct __user *buf ); - -extern int get_fpxregs( struct user_fxsr_struct __user *buf, - struct task_struct *tsk ); -extern int set_fpxregs( struct task_struct *tsk, - struct user_fxsr_struct __user *buf ); - -/* - * FPU state for core dumps... - */ -extern int dump_fpu( struct pt_regs *regs, - struct user_i387_struct *fpu ); - -#endif /* __ASM_I386_I387_H */ diff --git a/include/asm-x86/i387_64.h b/include/asm-x86/i387_64.h deleted file mode 100644 index 3a4ffba3d6bc..000000000000 --- a/include/asm-x86/i387_64.h +++ /dev/null @@ -1,214 +0,0 @@ -/* - * include/asm-x86_64/i387.h - * - * Copyright (C) 1994 Linus Torvalds - * - * Pentium III FXSR, SSE support - * General FPU state handling cleanups - * Gareth Hughes <gareth@valinux.com>, May 2000 - * x86-64 work by Andi Kleen 2002 - */ - -#ifndef __ASM_X86_64_I387_H -#define __ASM_X86_64_I387_H - -#include <linux/sched.h> -#include <asm/processor.h> -#include <asm/sigcontext.h> -#include <asm/user.h> -#include <asm/thread_info.h> -#include <asm/uaccess.h> - -extern void fpu_init(void); -extern unsigned int mxcsr_feature_mask; -extern void mxcsr_feature_mask_init(void); -extern void init_fpu(struct task_struct *child); -extern int save_i387(struct _fpstate __user *buf); -extern asmlinkage void math_state_restore(void); - -/* - * FPU lazy state save handling... - */ - -#define unlazy_fpu(tsk) do { \ - if (task_thread_info(tsk)->status & TS_USEDFPU) \ - save_init_fpu(tsk); \ - else \ - tsk->fpu_counter = 0; \ -} while (0) - -/* Ignore delayed exceptions from user space */ -static inline void tolerant_fwait(void) -{ - asm volatile("1: fwait\n" - "2:\n" - " .section __ex_table,\"a\"\n" - " .align 8\n" - " .quad 1b,2b\n" - " .previous\n"); -} - -#define clear_fpu(tsk) do { \ - if (task_thread_info(tsk)->status & TS_USEDFPU) { \ - tolerant_fwait(); \ - task_thread_info(tsk)->status &= ~TS_USEDFPU; \ - stts(); \ - } \ -} while (0) - -/* - * ptrace request handers... - */ -extern int get_fpregs(struct user_i387_struct __user *buf, - struct task_struct *tsk); -extern int set_fpregs(struct task_struct *tsk, - struct user_i387_struct __user *buf); - -/* - * i387 state interaction - */ -#define get_fpu_mxcsr(t) ((t)->thread.i387.fxsave.mxcsr) -#define get_fpu_cwd(t) ((t)->thread.i387.fxsave.cwd) -#define get_fpu_fxsr_twd(t) ((t)->thread.i387.fxsave.twd) -#define get_fpu_swd(t) ((t)->thread.i387.fxsave.swd) -#define set_fpu_cwd(t,val) ((t)->thread.i387.fxsave.cwd = (val)) -#define set_fpu_swd(t,val) ((t)->thread.i387.fxsave.swd = (val)) -#define set_fpu_fxsr_twd(t,val) ((t)->thread.i387.fxsave.twd = (val)) - -#define X87_FSW_ES (1 << 7) /* Exception Summary */ - -/* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception - is pending. Clear the x87 state here by setting it to fixed - values. The kernel data segment can be sometimes 0 and sometimes - new user value. Both should be ok. - Use the PDA as safe address because it should be already in L1. */ -static inline void clear_fpu_state(struct i387_fxsave_struct *fx) -{ - if (unlikely(fx->swd & X87_FSW_ES)) - asm volatile("fnclex"); - alternative_input(ASM_NOP8 ASM_NOP2, - " emms\n" /* clear stack tags */ - " fildl %%gs:0", /* load to clear state */ - X86_FEATURE_FXSAVE_LEAK); -} - -static inline int restore_fpu_checking(struct i387_fxsave_struct *fx) -{ - int err; - - asm volatile("1: rex64/fxrstor (%[fx])\n\t" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 8\n" - " .quad 1b,3b\n" - ".previous" - : [err] "=r" (err) -#if 0 /* See comment in __fxsave_clear() below. */ - : [fx] "r" (fx), "m" (*fx), "0" (0)); -#else - : [fx] "cdaSDb" (fx), "m" (*fx), "0" (0)); -#endif - if (unlikely(err)) - init_fpu(current); - return err; -} - -static inline int save_i387_checking(struct i387_fxsave_struct __user *fx) -{ - int err; - - asm volatile("1: rex64/fxsave (%[fx])\n\t" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 8\n" - " .quad 1b,3b\n" - ".previous" - : [err] "=r" (err), "=m" (*fx) -#if 0 /* See comment in __fxsave_clear() below. */ - : [fx] "r" (fx), "0" (0)); -#else - : [fx] "cdaSDb" (fx), "0" (0)); -#endif - if (unlikely(err) && __clear_user(fx, sizeof(struct i387_fxsave_struct))) - err = -EFAULT; - /* No need to clear here because the caller clears USED_MATH */ - return err; -} - -static inline void __fxsave_clear(struct task_struct *tsk) -{ - /* Using "rex64; fxsave %0" is broken because, if the memory operand - uses any extended registers for addressing, a second REX prefix - will be generated (to the assembler, rex64 followed by semicolon - is a separate instruction), and hence the 64-bitness is lost. */ -#if 0 - /* Using "fxsaveq %0" would be the ideal choice, but is only supported - starting with gas 2.16. */ - __asm__ __volatile__("fxsaveq %0" - : "=m" (tsk->thread.i387.fxsave)); -#elif 0 - /* Using, as a workaround, the properly prefixed form below isn't - accepted by any binutils version so far released, complaining that - the same type of prefix is used twice if an extended register is - needed for addressing (fix submitted to mainline 2005-11-21). */ - __asm__ __volatile__("rex64/fxsave %0" - : "=m" (tsk->thread.i387.fxsave)); -#else - /* This, however, we can work around by forcing the compiler to select - an addressing mode that doesn't require extended registers. */ - __asm__ __volatile__("rex64/fxsave %P2(%1)" - : "=m" (tsk->thread.i387.fxsave) - : "cdaSDb" (tsk), - "i" (offsetof(__typeof__(*tsk), - thread.i387.fxsave))); -#endif - clear_fpu_state(&tsk->thread.i387.fxsave); -} - -static inline void kernel_fpu_begin(void) -{ - struct thread_info *me = current_thread_info(); - preempt_disable(); - if (me->status & TS_USEDFPU) { - __fxsave_clear(me->task); - me->status &= ~TS_USEDFPU; - return; - } - clts(); -} - -static inline void kernel_fpu_end(void) -{ - stts(); - preempt_enable(); -} - -static inline void save_init_fpu(struct task_struct *tsk) -{ - __fxsave_clear(tsk); - task_thread_info(tsk)->status &= ~TS_USEDFPU; - stts(); -} - -/* - * This restores directly out of user space. Exceptions are handled. - */ -static inline int restore_i387(struct _fpstate __user *buf) -{ - set_used_math(); - if (!(task_thread_info(current)->status & TS_USEDFPU)) { - clts(); - task_thread_info(current)->status |= TS_USEDFPU; - } - return restore_fpu_checking((__force struct i387_fxsave_struct *)buf); -} - -#endif /* __ASM_X86_64_I387_H */ diff --git a/include/asm-x86/i8253.h b/include/asm-x86/i8253.h index 747548ec5d1d..b51c0487fc41 100644 --- a/include/asm-x86/i8253.h +++ b/include/asm-x86/i8253.h @@ -12,4 +12,7 @@ extern struct clock_event_device *global_clock_event; extern void setup_pit_timer(void); +#define inb_pit inb_p +#define outb_pit outb_p + #endif /* __ASM_I8253_H__ */ diff --git a/include/asm-x86/i8259.h b/include/asm-x86/i8259.h index 29d8f9a6b3fc..67c319e0efc7 100644 --- a/include/asm-x86/i8259.h +++ b/include/asm-x86/i8259.h @@ -3,10 +3,25 @@ extern unsigned int cached_irq_mask; -#define __byte(x,y) (((unsigned char *) &(y))[x]) +#define __byte(x,y) (((unsigned char *) &(y))[x]) #define cached_master_mask (__byte(0, cached_irq_mask)) #define cached_slave_mask (__byte(1, cached_irq_mask)) +/* i8259A PIC registers */ +#define PIC_MASTER_CMD 0x20 +#define PIC_MASTER_IMR 0x21 +#define PIC_MASTER_ISR PIC_MASTER_CMD +#define PIC_MASTER_POLL PIC_MASTER_ISR +#define PIC_MASTER_OCW3 PIC_MASTER_ISR +#define PIC_SLAVE_CMD 0xa0 +#define PIC_SLAVE_IMR 0xa1 + +/* i8259A PIC related value */ +#define PIC_CASCADE_IR 2 +#define MASTER_ICW4_DEFAULT 0x01 +#define SLAVE_ICW4_DEFAULT 0x01 +#define PIC_ICW4_AEOI 2 + extern spinlock_t i8259A_lock; extern void init_8259A(int auto_eoi); @@ -14,4 +29,7 @@ extern void enable_8259A_irq(unsigned int irq); extern void disable_8259A_irq(unsigned int irq); extern unsigned int startup_8259A_irq(unsigned int irq); +#define inb_pic inb_p +#define outb_pic outb_p + #endif /* __ASM_I8259_H__ */ diff --git a/include/asm-x86/ia32.h b/include/asm-x86/ia32.h index 0190b7c4e319..aa9733206e29 100644 --- a/include/asm-x86/ia32.h +++ b/include/asm-x86/ia32.h @@ -159,12 +159,6 @@ struct ustat32 { #define IA32_STACK_TOP IA32_PAGE_OFFSET #ifdef __KERNEL__ -struct user_desc; -struct siginfo_t; -int do_get_thread_area(struct thread_struct *t, struct user_desc __user *info); -int do_set_thread_area(struct thread_struct *t, struct user_desc __user *info); -int ia32_child_tls(struct task_struct *p, struct pt_regs *childregs); - struct linux_binprm; extern int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, int exec_stack); diff --git a/include/asm-x86/ia32_unistd.h b/include/asm-x86/ia32_unistd.h index 5b52ce507338..61cea9e7c5c1 100644 --- a/include/asm-x86/ia32_unistd.h +++ b/include/asm-x86/ia32_unistd.h @@ -5,7 +5,7 @@ * This file contains the system call numbers of the ia32 port, * this is for the kernel only. * Only add syscalls here where some part of the kernel needs to know - * the number. This should be otherwise in sync with asm-i386/unistd.h. -AK + * the number. This should be otherwise in sync with asm-x86/unistd_32.h. -AK */ #define __NR_ia32_restart_syscall 0 diff --git a/include/asm-x86/ide.h b/include/asm-x86/ide.h index 42130adf9c7c..c2552d8bebf7 100644 --- a/include/asm-x86/ide.h +++ b/include/asm-x86/ide.h @@ -1,6 +1,4 @@ /* - * linux/include/asm-i386/ide.h - * * Copyright (C) 1994-1996 Linus Torvalds & authors */ diff --git a/include/asm-x86/idle.h b/include/asm-x86/idle.h index 6bd47dcf2067..d240e5b30a45 100644 --- a/include/asm-x86/idle.h +++ b/include/asm-x86/idle.h @@ -6,7 +6,6 @@ struct notifier_block; void idle_notifier_register(struct notifier_block *n); -void idle_notifier_unregister(struct notifier_block *n); void enter_idle(void); void exit_idle(void); diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h index fe881cd1e6f4..586d7aa54ceb 100644 --- a/include/asm-x86/io_32.h +++ b/include/asm-x86/io_32.h @@ -100,8 +100,6 @@ static inline void * phys_to_virt(unsigned long address) */ #define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT) -extern void __iomem * __ioremap(unsigned long offset, unsigned long size, unsigned long flags); - /** * ioremap - map bus memory into CPU space * @offset: bus address of the memory @@ -111,32 +109,39 @@ extern void __iomem * __ioremap(unsigned long offset, unsigned long size, unsign * make bus memory CPU accessible via the readb/readw/readl/writeb/ * writew/writel functions and the other mmio helpers. The returned * address is not guaranteed to be usable directly as a virtual - * address. + * address. * * If the area you are trying to map is a PCI BAR you should have a * look at pci_iomap(). */ +extern void __iomem *ioremap_nocache(unsigned long offset, unsigned long size); +extern void __iomem *ioremap_cache(unsigned long offset, unsigned long size); -static inline void __iomem * ioremap(unsigned long offset, unsigned long size) +/* + * The default ioremap() behavior is non-cached: + */ +static inline void __iomem *ioremap(unsigned long offset, unsigned long size) { - return __ioremap(offset, size, 0); + return ioremap_nocache(offset, size); } -extern void __iomem * ioremap_nocache(unsigned long offset, unsigned long size); extern void iounmap(volatile void __iomem *addr); /* - * bt_ioremap() and bt_iounmap() are for temporary early boot-time + * early_ioremap() and early_iounmap() are for temporary early boot-time * mappings, before the real ioremap() is functional. * A boot-time mapping is currently limited to at most 16 pages. */ -extern void *bt_ioremap(unsigned long offset, unsigned long size); -extern void bt_iounmap(void *addr, unsigned long size); +extern void early_ioremap_init(void); +extern void early_ioremap_clear(void); +extern void early_ioremap_reset(void); +extern void *early_ioremap(unsigned long offset, unsigned long size); +extern void early_iounmap(void *addr, unsigned long size); extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); /* Use early IO mappings for DMI because it's initialized early */ -#define dmi_ioremap bt_ioremap -#define dmi_iounmap bt_iounmap +#define dmi_ioremap early_ioremap +#define dmi_iounmap early_iounmap #define dmi_alloc alloc_bootmem /* @@ -250,10 +255,10 @@ static inline void flush_write_buffers(void) #endif /* __KERNEL__ */ -static inline void native_io_delay(void) -{ - asm volatile("outb %%al,$0x80" : : : "memory"); -} +extern void native_io_delay(void); + +extern int io_delay_type; +extern void io_delay_init(void); #if defined(CONFIG_PARAVIRT) #include <asm/paravirt.h> diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h index a037b0794332..f64a59cc396d 100644 --- a/include/asm-x86/io_64.h +++ b/include/asm-x86/io_64.h @@ -35,12 +35,24 @@ * - Arnaldo Carvalho de Melo <acme@conectiva.com.br> */ -#define __SLOW_DOWN_IO "\noutb %%al,$0x80" +extern void native_io_delay(void); -#ifdef REALLY_SLOW_IO -#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO +extern int io_delay_type; +extern void io_delay_init(void); + +#if defined(CONFIG_PARAVIRT) +#include <asm/paravirt.h> #else -#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO + +static inline void slow_down_io(void) +{ + native_io_delay(); +#ifdef REALLY_SLOW_IO + native_io_delay(); + native_io_delay(); + native_io_delay(); +#endif +} #endif /* @@ -52,9 +64,15 @@ static inline void out##s(unsigned x value, unsigned short port) { #define __OUT2(s,s1,s2) \ __asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1" +#ifndef REALLY_SLOW_IO +#define REALLY_SLOW_IO +#define UNSET_REALLY_SLOW_IO +#endif + #define __OUT(s,s1,x) \ __OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \ -__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \ +__OUT1(s##_p, x) __OUT2(s, s1, "w") : : "a" (value), "Nd" (port)); \ + slow_down_io(); } #define __IN1(s) \ static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v; @@ -63,8 +81,13 @@ static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v; __asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0" #define __IN(s,s1,i...) \ -__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ -__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ +__IN1(s) __IN2(s, s1, "w") : "=a" (_v) : "Nd" (port), ##i); return _v; } \ +__IN1(s##_p) __IN2(s, s1, "w") : "=a" (_v) : "Nd" (port), ##i); \ + slow_down_io(); return _v; } + +#ifdef UNSET_REALLY_SLOW_IO +#undef REALLY_SLOW_IO +#endif #define __INS(s) \ static inline void ins##s(unsigned short port, void * addr, unsigned long count) \ @@ -127,13 +150,6 @@ static inline void * phys_to_virt(unsigned long address) #include <asm-generic/iomap.h> -extern void __iomem *__ioremap(unsigned long offset, unsigned long size, unsigned long flags); - -static inline void __iomem * ioremap (unsigned long offset, unsigned long size) -{ - return __ioremap(offset, size, 0); -} - extern void *early_ioremap(unsigned long addr, unsigned long size); extern void early_iounmap(void *addr, unsigned long size); @@ -142,8 +158,19 @@ extern void early_iounmap(void *addr, unsigned long size); * it's useful if some control registers are in such an area and write combining * or read caching is not desirable: */ -extern void __iomem * ioremap_nocache (unsigned long offset, unsigned long size); +extern void __iomem *ioremap_nocache(unsigned long offset, unsigned long size); +extern void __iomem *ioremap_cache(unsigned long offset, unsigned long size); + +/* + * The default ioremap() behavior is non-cached: + */ +static inline void __iomem *ioremap(unsigned long offset, unsigned long size) +{ + return ioremap_nocache(offset, size); +} + extern void iounmap(volatile void __iomem *addr); + extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); /* diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h index 88494966beeb..0f5b3fef0b08 100644 --- a/include/asm-x86/io_apic.h +++ b/include/asm-x86/io_apic.h @@ -1,5 +1,159 @@ +#ifndef __ASM_IO_APIC_H +#define __ASM_IO_APIC_H + +#include <asm/types.h> +#include <asm/mpspec.h> +#include <asm/apicdef.h> + +/* + * Intel IO-APIC support for SMP and UP systems. + * + * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar + */ + +/* + * The structure of the IO-APIC: + */ +union IO_APIC_reg_00 { + u32 raw; + struct { + u32 __reserved_2 : 14, + LTS : 1, + delivery_type : 1, + __reserved_1 : 8, + ID : 8; + } __attribute__ ((packed)) bits; +}; + +union IO_APIC_reg_01 { + u32 raw; + struct { + u32 version : 8, + __reserved_2 : 7, + PRQ : 1, + entries : 8, + __reserved_1 : 8; + } __attribute__ ((packed)) bits; +}; + +union IO_APIC_reg_02 { + u32 raw; + struct { + u32 __reserved_2 : 24, + arbitration : 4, + __reserved_1 : 4; + } __attribute__ ((packed)) bits; +}; + +union IO_APIC_reg_03 { + u32 raw; + struct { + u32 boot_DT : 1, + __reserved_1 : 31; + } __attribute__ ((packed)) bits; +}; + +enum ioapic_irq_destination_types { + dest_Fixed = 0, + dest_LowestPrio = 1, + dest_SMI = 2, + dest__reserved_1 = 3, + dest_NMI = 4, + dest_INIT = 5, + dest__reserved_2 = 6, + dest_ExtINT = 7 +}; + +struct IO_APIC_route_entry { + __u32 vector : 8, + delivery_mode : 3, /* 000: FIXED + * 001: lowest prio + * 111: ExtINT + */ + dest_mode : 1, /* 0: physical, 1: logical */ + delivery_status : 1, + polarity : 1, + irr : 1, + trigger : 1, /* 0: edge, 1: level */ + mask : 1, /* 0: enabled, 1: disabled */ + __reserved_2 : 15; + #ifdef CONFIG_X86_32 -# include "io_apic_32.h" + union { + struct { + __u32 __reserved_1 : 24, + physical_dest : 4, + __reserved_2 : 4; + } physical; + + struct { + __u32 __reserved_1 : 24, + logical_dest : 8; + } logical; + } dest; #else -# include "io_apic_64.h" + __u32 __reserved_3 : 24, + dest : 8; +#endif + +} __attribute__ ((packed)); + +#ifdef CONFIG_X86_IO_APIC + +/* + * # of IO-APICs and # of IRQ routing registers + */ +extern int nr_ioapics; +extern int nr_ioapic_registers[MAX_IO_APICS]; + +/* + * MP-BIOS irq configuration table structures: + */ + +/* I/O APIC entries */ +extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; + +/* # of MP IRQ source entries */ +extern int mp_irq_entries; + +/* MP IRQ source entries */ +extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; + +/* non-0 if default (table-less) MP configuration */ +extern int mpc_default_type; + +/* Older SiS APIC requires we rewrite the index register */ +extern int sis_apic_bug; + +/* 1 if "noapic" boot option passed */ +extern int skip_ioapic_setup; + +static inline void disable_ioapic_setup(void) +{ + skip_ioapic_setup = 1; +} + +/* + * If we use the IO-APIC for IRQ routing, disable automatic + * assignment of PCI IRQ's. + */ +#define io_apic_assign_pci_irqs \ + (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs) + +#ifdef CONFIG_ACPI +extern int io_apic_get_unique_id(int ioapic, int apic_id); +extern int io_apic_get_version(int ioapic); +extern int io_apic_get_redir_entries(int ioapic); +extern int io_apic_set_pci_routing(int ioapic, int pin, int irq, + int edge_level, int active_high_low); +extern int timer_uses_ioapic_pin_0; +#endif /* CONFIG_ACPI */ + +extern int (*ioapic_renumber_irq)(int ioapic, int irq); +extern void ioapic_init_mappings(void); + +#else /* !CONFIG_X86_IO_APIC */ +#define io_apic_assign_pci_irqs 0 +#endif + #endif diff --git a/include/asm-x86/io_apic_32.h b/include/asm-x86/io_apic_32.h deleted file mode 100644 index 3f087883ea48..000000000000 --- a/include/asm-x86/io_apic_32.h +++ /dev/null @@ -1,155 +0,0 @@ -#ifndef __ASM_IO_APIC_H -#define __ASM_IO_APIC_H - -#include <asm/types.h> -#include <asm/mpspec.h> -#include <asm/apicdef.h> - -/* - * Intel IO-APIC support for SMP and UP systems. - * - * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar - */ - -/* - * The structure of the IO-APIC: - */ -union IO_APIC_reg_00 { - u32 raw; - struct { - u32 __reserved_2 : 14, - LTS : 1, - delivery_type : 1, - __reserved_1 : 8, - ID : 8; - } __attribute__ ((packed)) bits; -}; - -union IO_APIC_reg_01 { - u32 raw; - struct { - u32 version : 8, - __reserved_2 : 7, - PRQ : 1, - entries : 8, - __reserved_1 : 8; - } __attribute__ ((packed)) bits; -}; - -union IO_APIC_reg_02 { - u32 raw; - struct { - u32 __reserved_2 : 24, - arbitration : 4, - __reserved_1 : 4; - } __attribute__ ((packed)) bits; -}; - -union IO_APIC_reg_03 { - u32 raw; - struct { - u32 boot_DT : 1, - __reserved_1 : 31; - } __attribute__ ((packed)) bits; -}; - -enum ioapic_irq_destination_types { - dest_Fixed = 0, - dest_LowestPrio = 1, - dest_SMI = 2, - dest__reserved_1 = 3, - dest_NMI = 4, - dest_INIT = 5, - dest__reserved_2 = 6, - dest_ExtINT = 7 -}; - -struct IO_APIC_route_entry { - __u32 vector : 8, - delivery_mode : 3, /* 000: FIXED - * 001: lowest prio - * 111: ExtINT - */ - dest_mode : 1, /* 0: physical, 1: logical */ - delivery_status : 1, - polarity : 1, - irr : 1, - trigger : 1, /* 0: edge, 1: level */ - mask : 1, /* 0: enabled, 1: disabled */ - __reserved_2 : 15; - - union { struct { __u32 - __reserved_1 : 24, - physical_dest : 4, - __reserved_2 : 4; - } physical; - - struct { __u32 - __reserved_1 : 24, - logical_dest : 8; - } logical; - } dest; - -} __attribute__ ((packed)); - -#ifdef CONFIG_X86_IO_APIC - -/* - * # of IO-APICs and # of IRQ routing registers - */ -extern int nr_ioapics; -extern int nr_ioapic_registers[MAX_IO_APICS]; - -/* - * MP-BIOS irq configuration table structures: - */ - -/* I/O APIC entries */ -extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; - -/* # of MP IRQ source entries */ -extern int mp_irq_entries; - -/* MP IRQ source entries */ -extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; - -/* non-0 if default (table-less) MP configuration */ -extern int mpc_default_type; - -/* Older SiS APIC requires we rewrite the index register */ -extern int sis_apic_bug; - -/* 1 if "noapic" boot option passed */ -extern int skip_ioapic_setup; - -static inline void disable_ioapic_setup(void) -{ - skip_ioapic_setup = 1; -} - -static inline int ioapic_setup_disabled(void) -{ - return skip_ioapic_setup; -} - -/* - * If we use the IO-APIC for IRQ routing, disable automatic - * assignment of PCI IRQ's. - */ -#define io_apic_assign_pci_irqs (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs) - -#ifdef CONFIG_ACPI -extern int io_apic_get_unique_id (int ioapic, int apic_id); -extern int io_apic_get_version (int ioapic); -extern int io_apic_get_redir_entries (int ioapic); -extern int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low); -extern int timer_uses_ioapic_pin_0; -#endif /* CONFIG_ACPI */ - -extern int (*ioapic_renumber_irq)(int ioapic, int irq); - -#else /* !CONFIG_X86_IO_APIC */ -#define io_apic_assign_pci_irqs 0 -#endif - -#endif diff --git a/include/asm-x86/io_apic_64.h b/include/asm-x86/io_apic_64.h deleted file mode 100644 index e2c13675ee4e..000000000000 --- a/include/asm-x86/io_apic_64.h +++ /dev/null @@ -1,138 +0,0 @@ -#ifndef __ASM_IO_APIC_H -#define __ASM_IO_APIC_H - -#include <asm/types.h> -#include <asm/mpspec.h> -#include <asm/apicdef.h> - -/* - * Intel IO-APIC support for SMP and UP systems. - * - * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar - */ - -#define APIC_MISMATCH_DEBUG - -/* - * The structure of the IO-APIC: - */ -union IO_APIC_reg_00 { - u32 raw; - struct { - u32 __reserved_2 : 14, - LTS : 1, - delivery_type : 1, - __reserved_1 : 8, - ID : 8; - } __attribute__ ((packed)) bits; -}; - -union IO_APIC_reg_01 { - u32 raw; - struct { - u32 version : 8, - __reserved_2 : 7, - PRQ : 1, - entries : 8, - __reserved_1 : 8; - } __attribute__ ((packed)) bits; -}; - -union IO_APIC_reg_02 { - u32 raw; - struct { - u32 __reserved_2 : 24, - arbitration : 4, - __reserved_1 : 4; - } __attribute__ ((packed)) bits; -}; - -union IO_APIC_reg_03 { - u32 raw; - struct { - u32 boot_DT : 1, - __reserved_1 : 31; - } __attribute__ ((packed)) bits; -}; - -/* - * # of IO-APICs and # of IRQ routing registers - */ -extern int nr_ioapics; -extern int nr_ioapic_registers[MAX_IO_APICS]; - -enum ioapic_irq_destination_types { - dest_Fixed = 0, - dest_LowestPrio = 1, - dest_SMI = 2, - dest__reserved_1 = 3, - dest_NMI = 4, - dest_INIT = 5, - dest__reserved_2 = 6, - dest_ExtINT = 7 -}; - -struct IO_APIC_route_entry { - __u32 vector : 8, - delivery_mode : 3, /* 000: FIXED - * 001: lowest prio - * 111: ExtINT - */ - dest_mode : 1, /* 0: physical, 1: logical */ - delivery_status : 1, - polarity : 1, - irr : 1, - trigger : 1, /* 0: edge, 1: level */ - mask : 1, /* 0: enabled, 1: disabled */ - __reserved_2 : 15; - - __u32 __reserved_3 : 24, - dest : 8; -} __attribute__ ((packed)); - -/* - * MP-BIOS irq configuration table structures: - */ - -/* I/O APIC entries */ -extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; - -/* # of MP IRQ source entries */ -extern int mp_irq_entries; - -/* MP IRQ source entries */ -extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; - -/* non-0 if default (table-less) MP configuration */ -extern int mpc_default_type; - -/* 1 if "noapic" boot option passed */ -extern int skip_ioapic_setup; - -static inline void disable_ioapic_setup(void) -{ - skip_ioapic_setup = 1; -} - - -/* - * If we use the IO-APIC for IRQ routing, disable automatic - * assignment of PCI IRQ's. - */ -#define io_apic_assign_pci_irqs (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs) - -#ifdef CONFIG_ACPI -extern int io_apic_get_version (int ioapic); -extern int io_apic_get_redir_entries (int ioapic); -extern int io_apic_set_pci_routing (int ioapic, int pin, int irq, int, int); -#endif - -extern int sis_apic_bug; /* dummy */ - -void enable_NMI_through_LVT0 (void * dummy); - -extern spinlock_t i8259A_lock; - -extern int timer_over_8254; - -#endif diff --git a/include/asm-x86/irqflags.h b/include/asm-x86/irqflags.h index 1b695ff52687..92021c1ffa3a 100644 --- a/include/asm-x86/irqflags.h +++ b/include/asm-x86/irqflags.h @@ -1,5 +1,245 @@ -#ifdef CONFIG_X86_32 -# include "irqflags_32.h" +#ifndef _X86_IRQFLAGS_H_ +#define _X86_IRQFLAGS_H_ + +#include <asm/processor-flags.h> + +#ifndef __ASSEMBLY__ +/* + * Interrupt control: + */ + +static inline unsigned long native_save_fl(void) +{ + unsigned long flags; + + __asm__ __volatile__( + "# __raw_save_flags\n\t" + "pushf ; pop %0" + : "=g" (flags) + : /* no input */ + : "memory" + ); + + return flags; +} + +static inline void native_restore_fl(unsigned long flags) +{ + __asm__ __volatile__( + "push %0 ; popf" + : /* no output */ + :"g" (flags) + :"memory", "cc" + ); +} + +static inline void native_irq_disable(void) +{ + asm volatile("cli": : :"memory"); +} + +static inline void native_irq_enable(void) +{ + asm volatile("sti": : :"memory"); +} + +static inline void native_safe_halt(void) +{ + asm volatile("sti; hlt": : :"memory"); +} + +static inline void native_halt(void) +{ + asm volatile("hlt": : :"memory"); +} + +#endif + +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else +#ifndef __ASSEMBLY__ + +static inline unsigned long __raw_local_save_flags(void) +{ + return native_save_fl(); +} + +static inline void raw_local_irq_restore(unsigned long flags) +{ + native_restore_fl(flags); +} + +static inline void raw_local_irq_disable(void) +{ + native_irq_disable(); +} + +static inline void raw_local_irq_enable(void) +{ + native_irq_enable(); +} + +/* + * Used in the idle loop; sti takes one instruction cycle + * to complete: + */ +static inline void raw_safe_halt(void) +{ + native_safe_halt(); +} + +/* + * Used when interrupts are already enabled or to + * shutdown the processor: + */ +static inline void halt(void) +{ + native_halt(); +} + +/* + * For spinlocks, etc: + */ +static inline unsigned long __raw_local_irq_save(void) +{ + unsigned long flags = __raw_local_save_flags(); + + raw_local_irq_disable(); + + return flags; +} +#else + +#define ENABLE_INTERRUPTS(x) sti +#define DISABLE_INTERRUPTS(x) cli + +#ifdef CONFIG_X86_64 +#define INTERRUPT_RETURN iretq +#define ENABLE_INTERRUPTS_SYSCALL_RET \ + movq %gs:pda_oldrsp, %rsp; \ + swapgs; \ + sysretq; +#else +#define INTERRUPT_RETURN iret +#define ENABLE_INTERRUPTS_SYSCALL_RET sti; sysexit +#define GET_CR0_INTO_EAX movl %cr0, %eax +#endif + + +#endif /* __ASSEMBLY__ */ +#endif /* CONFIG_PARAVIRT */ + +#ifndef __ASSEMBLY__ +#define raw_local_save_flags(flags) \ + do { (flags) = __raw_local_save_flags(); } while (0) + +#define raw_local_irq_save(flags) \ + do { (flags) = __raw_local_irq_save(); } while (0) + +static inline int raw_irqs_disabled_flags(unsigned long flags) +{ + return !(flags & X86_EFLAGS_IF); +} + +static inline int raw_irqs_disabled(void) +{ + unsigned long flags = __raw_local_save_flags(); + + return raw_irqs_disabled_flags(flags); +} + +/* + * makes the traced hardirq state match with the machine state + * + * should be a rarely used function, only in places where its + * otherwise impossible to know the irq state, like in traps. + */ +static inline void trace_hardirqs_fixup_flags(unsigned long flags) +{ + if (raw_irqs_disabled_flags(flags)) + trace_hardirqs_off(); + else + trace_hardirqs_on(); +} + +static inline void trace_hardirqs_fixup(void) +{ + unsigned long flags = __raw_local_save_flags(); + + trace_hardirqs_fixup_flags(flags); +} + #else -# include "irqflags_64.h" + +#ifdef CONFIG_X86_64 +/* + * Currently paravirt can't handle swapgs nicely when we + * don't have a stack we can rely on (such as a user space + * stack). So we either find a way around these or just fault + * and emulate if a guest tries to call swapgs directly. + * + * Either way, this is a good way to document that we don't + * have a reliable stack. x86_64 only. + */ +#define SWAPGS_UNSAFE_STACK swapgs +#define ARCH_TRACE_IRQS_ON call trace_hardirqs_on_thunk +#define ARCH_TRACE_IRQS_OFF call trace_hardirqs_off_thunk +#define ARCH_LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk +#define ARCH_LOCKDEP_SYS_EXIT_IRQ \ + TRACE_IRQS_ON; \ + sti; \ + SAVE_REST; \ + LOCKDEP_SYS_EXIT; \ + RESTORE_REST; \ + cli; \ + TRACE_IRQS_OFF; + +#else +#define ARCH_TRACE_IRQS_ON \ + pushl %eax; \ + pushl %ecx; \ + pushl %edx; \ + call trace_hardirqs_on; \ + popl %edx; \ + popl %ecx; \ + popl %eax; + +#define ARCH_TRACE_IRQS_OFF \ + pushl %eax; \ + pushl %ecx; \ + pushl %edx; \ + call trace_hardirqs_off; \ + popl %edx; \ + popl %ecx; \ + popl %eax; + +#define ARCH_LOCKDEP_SYS_EXIT \ + pushl %eax; \ + pushl %ecx; \ + pushl %edx; \ + call lockdep_sys_exit; \ + popl %edx; \ + popl %ecx; \ + popl %eax; + +#define ARCH_LOCKDEP_SYS_EXIT_IRQ +#endif + +#ifdef CONFIG_TRACE_IRQFLAGS +# define TRACE_IRQS_ON ARCH_TRACE_IRQS_ON +# define TRACE_IRQS_OFF ARCH_TRACE_IRQS_OFF +#else +# define TRACE_IRQS_ON +# define TRACE_IRQS_OFF +#endif +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define LOCKDEP_SYS_EXIT ARCH_LOCKDEP_SYS_EXIT +# define LOCKDEP_SYS_EXIT_IRQ ARCH_LOCKDEP_SYS_EXIT_IRQ +# else +# define LOCKDEP_SYS_EXIT +# define LOCKDEP_SYS_EXIT_IRQ +# endif + +#endif /* __ASSEMBLY__ */ #endif diff --git a/include/asm-x86/irqflags_32.h b/include/asm-x86/irqflags_32.h deleted file mode 100644 index 4c7720089cb5..000000000000 --- a/include/asm-x86/irqflags_32.h +++ /dev/null @@ -1,197 +0,0 @@ -/* - * include/asm-i386/irqflags.h - * - * IRQ flags handling - * - * This file gets included from lowlevel asm headers too, to provide - * wrapped versions of the local_irq_*() APIs, based on the - * raw_local_irq_*() functions from the lowlevel headers. - */ -#ifndef _ASM_IRQFLAGS_H -#define _ASM_IRQFLAGS_H -#include <asm/processor-flags.h> - -#ifndef __ASSEMBLY__ -static inline unsigned long native_save_fl(void) -{ - unsigned long f; - asm volatile("pushfl ; popl %0":"=g" (f): /* no input */); - return f; -} - -static inline void native_restore_fl(unsigned long f) -{ - asm volatile("pushl %0 ; popfl": /* no output */ - :"g" (f) - :"memory", "cc"); -} - -static inline void native_irq_disable(void) -{ - asm volatile("cli": : :"memory"); -} - -static inline void native_irq_enable(void) -{ - asm volatile("sti": : :"memory"); -} - -static inline void native_safe_halt(void) -{ - asm volatile("sti; hlt": : :"memory"); -} - -static inline void native_halt(void) -{ - asm volatile("hlt": : :"memory"); -} -#endif /* __ASSEMBLY__ */ - -#ifdef CONFIG_PARAVIRT -#include <asm/paravirt.h> -#else -#ifndef __ASSEMBLY__ - -static inline unsigned long __raw_local_save_flags(void) -{ - return native_save_fl(); -} - -static inline void raw_local_irq_restore(unsigned long flags) -{ - native_restore_fl(flags); -} - -static inline void raw_local_irq_disable(void) -{ - native_irq_disable(); -} - -static inline void raw_local_irq_enable(void) -{ - native_irq_enable(); -} - -/* - * Used in the idle loop; sti takes one instruction cycle - * to complete: - */ -static inline void raw_safe_halt(void) -{ - native_safe_halt(); -} - -/* - * Used when interrupts are already enabled or to - * shutdown the processor: - */ -static inline void halt(void) -{ - native_halt(); -} - -/* - * For spinlocks, etc: - */ -static inline unsigned long __raw_local_irq_save(void) -{ - unsigned long flags = __raw_local_save_flags(); - - raw_local_irq_disable(); - - return flags; -} - -#else -#define DISABLE_INTERRUPTS(clobbers) cli -#define ENABLE_INTERRUPTS(clobbers) sti -#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit -#define INTERRUPT_RETURN iret -#define GET_CR0_INTO_EAX movl %cr0, %eax -#endif /* __ASSEMBLY__ */ -#endif /* CONFIG_PARAVIRT */ - -#ifndef __ASSEMBLY__ -#define raw_local_save_flags(flags) \ - do { (flags) = __raw_local_save_flags(); } while (0) - -#define raw_local_irq_save(flags) \ - do { (flags) = __raw_local_irq_save(); } while (0) - -static inline int raw_irqs_disabled_flags(unsigned long flags) -{ - return !(flags & X86_EFLAGS_IF); -} - -static inline int raw_irqs_disabled(void) -{ - unsigned long flags = __raw_local_save_flags(); - - return raw_irqs_disabled_flags(flags); -} - -/* - * makes the traced hardirq state match with the machine state - * - * should be a rarely used function, only in places where its - * otherwise impossible to know the irq state, like in traps. - */ -static inline void trace_hardirqs_fixup_flags(unsigned long flags) -{ - if (raw_irqs_disabled_flags(flags)) - trace_hardirqs_off(); - else - trace_hardirqs_on(); -} - -static inline void trace_hardirqs_fixup(void) -{ - unsigned long flags = __raw_local_save_flags(); - - trace_hardirqs_fixup_flags(flags); -} -#endif /* __ASSEMBLY__ */ - -/* - * Do the CPU's IRQ-state tracing from assembly code. We call a - * C function, so save all the C-clobbered registers: - */ -#ifdef CONFIG_TRACE_IRQFLAGS - -# define TRACE_IRQS_ON \ - pushl %eax; \ - pushl %ecx; \ - pushl %edx; \ - call trace_hardirqs_on; \ - popl %edx; \ - popl %ecx; \ - popl %eax; - -# define TRACE_IRQS_OFF \ - pushl %eax; \ - pushl %ecx; \ - pushl %edx; \ - call trace_hardirqs_off; \ - popl %edx; \ - popl %ecx; \ - popl %eax; - -#else -# define TRACE_IRQS_ON -# define TRACE_IRQS_OFF -#endif - -#ifdef CONFIG_DEBUG_LOCK_ALLOC -# define LOCKDEP_SYS_EXIT \ - pushl %eax; \ - pushl %ecx; \ - pushl %edx; \ - call lockdep_sys_exit; \ - popl %edx; \ - popl %ecx; \ - popl %eax; -#else -# define LOCKDEP_SYS_EXIT -#endif - -#endif diff --git a/include/asm-x86/irqflags_64.h b/include/asm-x86/irqflags_64.h deleted file mode 100644 index bb9163bb29d1..000000000000 --- a/include/asm-x86/irqflags_64.h +++ /dev/null @@ -1,176 +0,0 @@ -/* - * include/asm-x86_64/irqflags.h - * - * IRQ flags handling - * - * This file gets included from lowlevel asm headers too, to provide - * wrapped versions of the local_irq_*() APIs, based on the - * raw_local_irq_*() functions from the lowlevel headers. - */ -#ifndef _ASM_IRQFLAGS_H -#define _ASM_IRQFLAGS_H -#include <asm/processor-flags.h> - -#ifndef __ASSEMBLY__ -/* - * Interrupt control: - */ - -static inline unsigned long __raw_local_save_flags(void) -{ - unsigned long flags; - - __asm__ __volatile__( - "# __raw_save_flags\n\t" - "pushfq ; popq %q0" - : "=g" (flags) - : /* no input */ - : "memory" - ); - - return flags; -} - -#define raw_local_save_flags(flags) \ - do { (flags) = __raw_local_save_flags(); } while (0) - -static inline void raw_local_irq_restore(unsigned long flags) -{ - __asm__ __volatile__( - "pushq %0 ; popfq" - : /* no output */ - :"g" (flags) - :"memory", "cc" - ); -} - -#ifdef CONFIG_X86_VSMP - -/* - * Interrupt control for the VSMP architecture: - */ - -static inline void raw_local_irq_disable(void) -{ - unsigned long flags = __raw_local_save_flags(); - - raw_local_irq_restore((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC); -} - -static inline void raw_local_irq_enable(void) -{ - unsigned long flags = __raw_local_save_flags(); - - raw_local_irq_restore((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); -} - -static inline int raw_irqs_disabled_flags(unsigned long flags) -{ - return !(flags & X86_EFLAGS_IF) || (flags & X86_EFLAGS_AC); -} - -#else /* CONFIG_X86_VSMP */ - -static inline void raw_local_irq_disable(void) -{ - __asm__ __volatile__("cli" : : : "memory"); -} - -static inline void raw_local_irq_enable(void) -{ - __asm__ __volatile__("sti" : : : "memory"); -} - -static inline int raw_irqs_disabled_flags(unsigned long flags) -{ - return !(flags & X86_EFLAGS_IF); -} - -#endif - -/* - * For spinlocks, etc.: - */ - -static inline unsigned long __raw_local_irq_save(void) -{ - unsigned long flags = __raw_local_save_flags(); - - raw_local_irq_disable(); - - return flags; -} - -#define raw_local_irq_save(flags) \ - do { (flags) = __raw_local_irq_save(); } while (0) - -static inline int raw_irqs_disabled(void) -{ - unsigned long flags = __raw_local_save_flags(); - - return raw_irqs_disabled_flags(flags); -} - -/* - * makes the traced hardirq state match with the machine state - * - * should be a rarely used function, only in places where its - * otherwise impossible to know the irq state, like in traps. - */ -static inline void trace_hardirqs_fixup_flags(unsigned long flags) -{ - if (raw_irqs_disabled_flags(flags)) - trace_hardirqs_off(); - else - trace_hardirqs_on(); -} - -static inline void trace_hardirqs_fixup(void) -{ - unsigned long flags = __raw_local_save_flags(); - - trace_hardirqs_fixup_flags(flags); -} -/* - * Used in the idle loop; sti takes one instruction cycle - * to complete: - */ -static inline void raw_safe_halt(void) -{ - __asm__ __volatile__("sti; hlt" : : : "memory"); -} - -/* - * Used when interrupts are already enabled or to - * shutdown the processor: - */ -static inline void halt(void) -{ - __asm__ __volatile__("hlt": : :"memory"); -} - -#else /* __ASSEMBLY__: */ -# ifdef CONFIG_TRACE_IRQFLAGS -# define TRACE_IRQS_ON call trace_hardirqs_on_thunk -# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk -# else -# define TRACE_IRQS_ON -# define TRACE_IRQS_OFF -# endif -# ifdef CONFIG_DEBUG_LOCK_ALLOC -# define LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk -# define LOCKDEP_SYS_EXIT_IRQ \ - TRACE_IRQS_ON; \ - sti; \ - SAVE_REST; \ - LOCKDEP_SYS_EXIT; \ - RESTORE_REST; \ - cli; \ - TRACE_IRQS_OFF; -# else -# define LOCKDEP_SYS_EXIT -# define LOCKDEP_SYS_EXIT_IRQ -# endif -#endif - -#endif diff --git a/include/asm-x86/k8.h b/include/asm-x86/k8.h index 699dd6961eda..452e2b696ff4 100644 --- a/include/asm-x86/k8.h +++ b/include/asm-x86/k8.h @@ -10,5 +10,6 @@ extern struct pci_dev **k8_northbridges; extern int num_k8_northbridges; extern int cache_k8_northbridges(void); extern void k8_flush_garts(void); +extern int k8_scan_nodes(unsigned long start, unsigned long end); #endif diff --git a/include/asm-x86/kdebug.h b/include/asm-x86/kdebug.h index e2f9b62e535e..dd442a1632c0 100644 --- a/include/asm-x86/kdebug.h +++ b/include/asm-x86/kdebug.h @@ -22,12 +22,17 @@ enum die_val { DIE_PAGE_FAULT, }; -extern void printk_address(unsigned long address); +extern void printk_address(unsigned long address, int reliable); extern void die(const char *,struct pt_regs *,long); -extern void __die(const char *,struct pt_regs *,long); +extern int __must_check __die(const char *, struct pt_regs *, long); extern void show_registers(struct pt_regs *regs); +extern void __show_registers(struct pt_regs *, int all); +extern void show_trace(struct task_struct *t, struct pt_regs *regs, + unsigned long *sp, unsigned long bp); +extern void __show_regs(struct pt_regs *regs); +extern void show_regs(struct pt_regs *regs); extern void dump_pagetable(unsigned long); extern unsigned long oops_begin(void); -extern void oops_end(unsigned long); +extern void oops_end(unsigned long, struct pt_regs *, int signr); #endif diff --git a/include/asm-x86/kexec.h b/include/asm-x86/kexec.h index 718ddbfb9516..c90d3c77afc2 100644 --- a/include/asm-x86/kexec.h +++ b/include/asm-x86/kexec.h @@ -1,5 +1,170 @@ +#ifndef _KEXEC_H +#define _KEXEC_H + #ifdef CONFIG_X86_32 -# include "kexec_32.h" +# define PA_CONTROL_PAGE 0 +# define VA_CONTROL_PAGE 1 +# define PA_PGD 2 +# define VA_PGD 3 +# define PA_PTE_0 4 +# define VA_PTE_0 5 +# define PA_PTE_1 6 +# define VA_PTE_1 7 +# ifdef CONFIG_X86_PAE +# define PA_PMD_0 8 +# define VA_PMD_0 9 +# define PA_PMD_1 10 +# define VA_PMD_1 11 +# define PAGES_NR 12 +# else +# define PAGES_NR 8 +# endif #else -# include "kexec_64.h" +# define PA_CONTROL_PAGE 0 +# define VA_CONTROL_PAGE 1 +# define PA_PGD 2 +# define VA_PGD 3 +# define PA_PUD_0 4 +# define VA_PUD_0 5 +# define PA_PMD_0 6 +# define VA_PMD_0 7 +# define PA_PTE_0 8 +# define VA_PTE_0 9 +# define PA_PUD_1 10 +# define VA_PUD_1 11 +# define PA_PMD_1 12 +# define VA_PMD_1 13 +# define PA_PTE_1 14 +# define VA_PTE_1 15 +# define PA_TABLE_PAGE 16 +# define PAGES_NR 17 #endif + +#ifndef __ASSEMBLY__ + +#include <linux/string.h> + +#include <asm/page.h> +#include <asm/ptrace.h> + +/* + * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return. + * I.e. Maximum page that is mapped directly into kernel memory, + * and kmap is not required. + * + * So far x86_64 is limited to 40 physical address bits. + */ +#ifdef CONFIG_X86_32 +/* Maximum physical address we can use pages from */ +# define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) +/* Maximum address we can reach in physical address mode */ +# define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) +/* Maximum address we can use for the control code buffer */ +# define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE + +# define KEXEC_CONTROL_CODE_SIZE 4096 + +/* The native architecture */ +# define KEXEC_ARCH KEXEC_ARCH_386 + +/* We can also handle crash dumps from 64 bit kernel. */ +# define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64) +#else +/* Maximum physical address we can use pages from */ +# define KEXEC_SOURCE_MEMORY_LIMIT (0xFFFFFFFFFFUL) +/* Maximum address we can reach in physical address mode */ +# define KEXEC_DESTINATION_MEMORY_LIMIT (0xFFFFFFFFFFUL) +/* Maximum address we can use for the control pages */ +# define KEXEC_CONTROL_MEMORY_LIMIT (0xFFFFFFFFFFUL) + +/* Allocate one page for the pdp and the second for the code */ +# define KEXEC_CONTROL_CODE_SIZE (4096UL + 4096UL) + +/* The native architecture */ +# define KEXEC_ARCH KEXEC_ARCH_X86_64 +#endif + +/* + * CPU does not save ss and sp on stack if execution is already + * running in kernel mode at the time of NMI occurrence. This code + * fixes it. + */ +static inline void crash_fixup_ss_esp(struct pt_regs *newregs, + struct pt_regs *oldregs) +{ +#ifdef CONFIG_X86_32 + newregs->sp = (unsigned long)&(oldregs->sp); + __asm__ __volatile__( + "xorl %%eax, %%eax\n\t" + "movw %%ss, %%ax\n\t" + :"=a"(newregs->ss)); +#endif +} + +/* + * This function is responsible for capturing register states if coming + * via panic otherwise just fix up the ss and sp if coming via kernel + * mode exception. + */ +static inline void crash_setup_regs(struct pt_regs *newregs, + struct pt_regs *oldregs) +{ + if (oldregs) { + memcpy(newregs, oldregs, sizeof(*newregs)); + crash_fixup_ss_esp(newregs, oldregs); + } else { +#ifdef CONFIG_X86_32 + __asm__ __volatile__("movl %%ebx,%0" : "=m"(newregs->bx)); + __asm__ __volatile__("movl %%ecx,%0" : "=m"(newregs->cx)); + __asm__ __volatile__("movl %%edx,%0" : "=m"(newregs->dx)); + __asm__ __volatile__("movl %%esi,%0" : "=m"(newregs->si)); + __asm__ __volatile__("movl %%edi,%0" : "=m"(newregs->di)); + __asm__ __volatile__("movl %%ebp,%0" : "=m"(newregs->bp)); + __asm__ __volatile__("movl %%eax,%0" : "=m"(newregs->ax)); + __asm__ __volatile__("movl %%esp,%0" : "=m"(newregs->sp)); + __asm__ __volatile__("movl %%ss, %%eax;" :"=a"(newregs->ss)); + __asm__ __volatile__("movl %%cs, %%eax;" :"=a"(newregs->cs)); + __asm__ __volatile__("movl %%ds, %%eax;" :"=a"(newregs->ds)); + __asm__ __volatile__("movl %%es, %%eax;" :"=a"(newregs->es)); + __asm__ __volatile__("pushfl; popl %0" :"=m"(newregs->flags)); +#else + __asm__ __volatile__("movq %%rbx,%0" : "=m"(newregs->bx)); + __asm__ __volatile__("movq %%rcx,%0" : "=m"(newregs->cx)); + __asm__ __volatile__("movq %%rdx,%0" : "=m"(newregs->dx)); + __asm__ __volatile__("movq %%rsi,%0" : "=m"(newregs->si)); + __asm__ __volatile__("movq %%rdi,%0" : "=m"(newregs->di)); + __asm__ __volatile__("movq %%rbp,%0" : "=m"(newregs->bp)); + __asm__ __volatile__("movq %%rax,%0" : "=m"(newregs->ax)); + __asm__ __volatile__("movq %%rsp,%0" : "=m"(newregs->sp)); + __asm__ __volatile__("movq %%r8,%0" : "=m"(newregs->r8)); + __asm__ __volatile__("movq %%r9,%0" : "=m"(newregs->r9)); + __asm__ __volatile__("movq %%r10,%0" : "=m"(newregs->r10)); + __asm__ __volatile__("movq %%r11,%0" : "=m"(newregs->r11)); + __asm__ __volatile__("movq %%r12,%0" : "=m"(newregs->r12)); + __asm__ __volatile__("movq %%r13,%0" : "=m"(newregs->r13)); + __asm__ __volatile__("movq %%r14,%0" : "=m"(newregs->r14)); + __asm__ __volatile__("movq %%r15,%0" : "=m"(newregs->r15)); + __asm__ __volatile__("movl %%ss, %%eax;" :"=a"(newregs->ss)); + __asm__ __volatile__("movl %%cs, %%eax;" :"=a"(newregs->cs)); + __asm__ __volatile__("pushfq; popq %0" :"=m"(newregs->flags)); +#endif + newregs->ip = (unsigned long)current_text_addr(); + } +} + +#ifdef CONFIG_X86_32 +asmlinkage NORET_TYPE void +relocate_kernel(unsigned long indirection_page, + unsigned long control_page, + unsigned long start_address, + unsigned int has_pae) ATTRIB_NORET; +#else +NORET_TYPE void +relocate_kernel(unsigned long indirection_page, + unsigned long page_list, + unsigned long start_address) ATTRIB_NORET; +#endif + +#endif /* __ASSEMBLY__ */ + +#endif /* _KEXEC_H */ diff --git a/include/asm-x86/kexec_32.h b/include/asm-x86/kexec_32.h deleted file mode 100644 index 4b9dc9e6b701..000000000000 --- a/include/asm-x86/kexec_32.h +++ /dev/null @@ -1,99 +0,0 @@ -#ifndef _I386_KEXEC_H -#define _I386_KEXEC_H - -#define PA_CONTROL_PAGE 0 -#define VA_CONTROL_PAGE 1 -#define PA_PGD 2 -#define VA_PGD 3 -#define PA_PTE_0 4 -#define VA_PTE_0 5 -#define PA_PTE_1 6 -#define VA_PTE_1 7 -#ifdef CONFIG_X86_PAE -#define PA_PMD_0 8 -#define VA_PMD_0 9 -#define PA_PMD_1 10 -#define VA_PMD_1 11 -#define PAGES_NR 12 -#else -#define PAGES_NR 8 -#endif - -#ifndef __ASSEMBLY__ - -#include <asm/ptrace.h> -#include <asm/string.h> - -/* - * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return. - * I.e. Maximum page that is mapped directly into kernel memory, - * and kmap is not required. - */ - -/* Maximum physical address we can use pages from */ -#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) -/* Maximum address we can reach in physical address mode */ -#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) -/* Maximum address we can use for the control code buffer */ -#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE - -#define KEXEC_CONTROL_CODE_SIZE 4096 - -/* The native architecture */ -#define KEXEC_ARCH KEXEC_ARCH_386 - -/* We can also handle crash dumps from 64 bit kernel. */ -#define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64) - -/* CPU does not save ss and esp on stack if execution is already - * running in kernel mode at the time of NMI occurrence. This code - * fixes it. - */ -static inline void crash_fixup_ss_esp(struct pt_regs *newregs, - struct pt_regs *oldregs) -{ - memcpy(newregs, oldregs, sizeof(*newregs)); - newregs->esp = (unsigned long)&(oldregs->esp); - __asm__ __volatile__( - "xorl %%eax, %%eax\n\t" - "movw %%ss, %%ax\n\t" - :"=a"(newregs->xss)); -} - -/* - * This function is responsible for capturing register states if coming - * via panic otherwise just fix up the ss and esp if coming via kernel - * mode exception. - */ -static inline void crash_setup_regs(struct pt_regs *newregs, - struct pt_regs *oldregs) -{ - if (oldregs) - crash_fixup_ss_esp(newregs, oldregs); - else { - __asm__ __volatile__("movl %%ebx,%0" : "=m"(newregs->ebx)); - __asm__ __volatile__("movl %%ecx,%0" : "=m"(newregs->ecx)); - __asm__ __volatile__("movl %%edx,%0" : "=m"(newregs->edx)); - __asm__ __volatile__("movl %%esi,%0" : "=m"(newregs->esi)); - __asm__ __volatile__("movl %%edi,%0" : "=m"(newregs->edi)); - __asm__ __volatile__("movl %%ebp,%0" : "=m"(newregs->ebp)); - __asm__ __volatile__("movl %%eax,%0" : "=m"(newregs->eax)); - __asm__ __volatile__("movl %%esp,%0" : "=m"(newregs->esp)); - __asm__ __volatile__("movw %%ss, %%ax;" :"=a"(newregs->xss)); - __asm__ __volatile__("movw %%cs, %%ax;" :"=a"(newregs->xcs)); - __asm__ __volatile__("movw %%ds, %%ax;" :"=a"(newregs->xds)); - __asm__ __volatile__("movw %%es, %%ax;" :"=a"(newregs->xes)); - __asm__ __volatile__("pushfl; popl %0" :"=m"(newregs->eflags)); - - newregs->eip = (unsigned long)current_text_addr(); - } -} -asmlinkage NORET_TYPE void -relocate_kernel(unsigned long indirection_page, - unsigned long control_page, - unsigned long start_address, - unsigned int has_pae) ATTRIB_NORET; - -#endif /* __ASSEMBLY__ */ - -#endif /* _I386_KEXEC_H */ diff --git a/include/asm-x86/kexec_64.h b/include/asm-x86/kexec_64.h deleted file mode 100644 index 738e581b67f8..000000000000 --- a/include/asm-x86/kexec_64.h +++ /dev/null @@ -1,94 +0,0 @@ -#ifndef _X86_64_KEXEC_H -#define _X86_64_KEXEC_H - -#define PA_CONTROL_PAGE 0 -#define VA_CONTROL_PAGE 1 -#define PA_PGD 2 -#define VA_PGD 3 -#define PA_PUD_0 4 -#define VA_PUD_0 5 -#define PA_PMD_0 6 -#define VA_PMD_0 7 -#define PA_PTE_0 8 -#define VA_PTE_0 9 -#define PA_PUD_1 10 -#define VA_PUD_1 11 -#define PA_PMD_1 12 -#define VA_PMD_1 13 -#define PA_PTE_1 14 -#define VA_PTE_1 15 -#define PA_TABLE_PAGE 16 -#define PAGES_NR 17 - -#ifndef __ASSEMBLY__ - -#include <linux/string.h> - -#include <asm/page.h> -#include <asm/ptrace.h> - -/* - * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return. - * I.e. Maximum page that is mapped directly into kernel memory, - * and kmap is not required. - * - * So far x86_64 is limited to 40 physical address bits. - */ - -/* Maximum physical address we can use pages from */ -#define KEXEC_SOURCE_MEMORY_LIMIT (0xFFFFFFFFFFUL) -/* Maximum address we can reach in physical address mode */ -#define KEXEC_DESTINATION_MEMORY_LIMIT (0xFFFFFFFFFFUL) -/* Maximum address we can use for the control pages */ -#define KEXEC_CONTROL_MEMORY_LIMIT (0xFFFFFFFFFFUL) - -/* Allocate one page for the pdp and the second for the code */ -#define KEXEC_CONTROL_CODE_SIZE (4096UL + 4096UL) - -/* The native architecture */ -#define KEXEC_ARCH KEXEC_ARCH_X86_64 - -/* - * Saving the registers of the cpu on which panic occured in - * crash_kexec to save a valid sp. The registers of other cpus - * will be saved in machine_crash_shutdown while shooting down them. - */ - -static inline void crash_setup_regs(struct pt_regs *newregs, - struct pt_regs *oldregs) -{ - if (oldregs) - memcpy(newregs, oldregs, sizeof(*newregs)); - else { - __asm__ __volatile__("movq %%rbx,%0" : "=m"(newregs->rbx)); - __asm__ __volatile__("movq %%rcx,%0" : "=m"(newregs->rcx)); - __asm__ __volatile__("movq %%rdx,%0" : "=m"(newregs->rdx)); - __asm__ __volatile__("movq %%rsi,%0" : "=m"(newregs->rsi)); - __asm__ __volatile__("movq %%rdi,%0" : "=m"(newregs->rdi)); - __asm__ __volatile__("movq %%rbp,%0" : "=m"(newregs->rbp)); - __asm__ __volatile__("movq %%rax,%0" : "=m"(newregs->rax)); - __asm__ __volatile__("movq %%rsp,%0" : "=m"(newregs->rsp)); - __asm__ __volatile__("movq %%r8,%0" : "=m"(newregs->r8)); - __asm__ __volatile__("movq %%r9,%0" : "=m"(newregs->r9)); - __asm__ __volatile__("movq %%r10,%0" : "=m"(newregs->r10)); - __asm__ __volatile__("movq %%r11,%0" : "=m"(newregs->r11)); - __asm__ __volatile__("movq %%r12,%0" : "=m"(newregs->r12)); - __asm__ __volatile__("movq %%r13,%0" : "=m"(newregs->r13)); - __asm__ __volatile__("movq %%r14,%0" : "=m"(newregs->r14)); - __asm__ __volatile__("movq %%r15,%0" : "=m"(newregs->r15)); - __asm__ __volatile__("movl %%ss, %%eax;" :"=a"(newregs->ss)); - __asm__ __volatile__("movl %%cs, %%eax;" :"=a"(newregs->cs)); - __asm__ __volatile__("pushfq; popq %0" :"=m"(newregs->eflags)); - - newregs->rip = (unsigned long)current_text_addr(); - } -} - -NORET_TYPE void -relocate_kernel(unsigned long indirection_page, - unsigned long page_list, - unsigned long start_address) ATTRIB_NORET; - -#endif /* __ASSEMBLY__ */ - -#endif /* _X86_64_KEXEC_H */ diff --git a/include/asm-x86/kprobes.h b/include/asm-x86/kprobes.h index b7bbd25ba2a6..143476a3cb52 100644 --- a/include/asm-x86/kprobes.h +++ b/include/asm-x86/kprobes.h @@ -1,5 +1,98 @@ -#ifdef CONFIG_X86_32 -# include "kprobes_32.h" -#else -# include "kprobes_64.h" -#endif +#ifndef _ASM_KPROBES_H +#define _ASM_KPROBES_H +/* + * Kernel Probes (KProbes) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2004 + * + * See arch/x86/kernel/kprobes.c for x86 kprobes history. + */ +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/percpu.h> + +#define __ARCH_WANT_KPROBES_INSN_SLOT + +struct pt_regs; +struct kprobe; + +typedef u8 kprobe_opcode_t; +#define BREAKPOINT_INSTRUCTION 0xcc +#define RELATIVEJUMP_INSTRUCTION 0xe9 +#define MAX_INSN_SIZE 16 +#define MAX_STACK_SIZE 64 +#define MIN_STACK_SIZE(ADDR) (((MAX_STACK_SIZE) < \ + (((unsigned long)current_thread_info()) + THREAD_SIZE \ + - (unsigned long)(ADDR))) \ + ? (MAX_STACK_SIZE) \ + : (((unsigned long)current_thread_info()) + THREAD_SIZE \ + - (unsigned long)(ADDR))) + +#define ARCH_SUPPORTS_KRETPROBES +#define flush_insn_slot(p) do { } while (0) + +extern const int kretprobe_blacklist_size; + +void arch_remove_kprobe(struct kprobe *p); +void kretprobe_trampoline(void); + +/* Architecture specific copy of original instruction*/ +struct arch_specific_insn { + /* copy of the original instruction */ + kprobe_opcode_t *insn; + /* + * boostable = -1: This instruction type is not boostable. + * boostable = 0: This instruction type is boostable. + * boostable = 1: This instruction has been boosted: we have + * added a relative jump after the instruction copy in insn, + * so no single-step and fixup are needed (unless there's + * a post_handler or break_handler). + */ + int boostable; +}; + +struct prev_kprobe { + struct kprobe *kp; + unsigned long status; + unsigned long old_flags; + unsigned long saved_flags; +}; + +/* per-cpu kprobe control block */ +struct kprobe_ctlblk { + unsigned long kprobe_status; + unsigned long kprobe_old_flags; + unsigned long kprobe_saved_flags; + unsigned long *jprobe_saved_sp; + struct pt_regs jprobe_saved_regs; + kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE]; + struct prev_kprobe prev_kprobe; +}; + +/* trap3/1 are intr gates for kprobes. So, restore the status of IF, + * if necessary, before executing the original int3/1 (trap) handler. + */ +static inline void restore_interrupts(struct pt_regs *regs) +{ + if (regs->flags & X86_EFLAGS_IF) + local_irq_enable(); +} + +extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); +extern int kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data); +#endif /* _ASM_KPROBES_H */ diff --git a/include/asm-x86/kprobes_32.h b/include/asm-x86/kprobes_32.h deleted file mode 100644 index 9fe8f3bddfd5..000000000000 --- a/include/asm-x86/kprobes_32.h +++ /dev/null @@ -1,94 +0,0 @@ -#ifndef _ASM_KPROBES_H -#define _ASM_KPROBES_H -/* - * Kernel Probes (KProbes) - * include/asm-i386/kprobes.h - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright (C) IBM Corporation, 2002, 2004 - * - * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel - * Probes initial implementation ( includes suggestions from - * Rusty Russell). - */ -#include <linux/types.h> -#include <linux/ptrace.h> - -#define __ARCH_WANT_KPROBES_INSN_SLOT - -struct kprobe; -struct pt_regs; - -typedef u8 kprobe_opcode_t; -#define BREAKPOINT_INSTRUCTION 0xcc -#define RELATIVEJUMP_INSTRUCTION 0xe9 -#define MAX_INSN_SIZE 16 -#define MAX_STACK_SIZE 64 -#define MIN_STACK_SIZE(ADDR) (((MAX_STACK_SIZE) < \ - (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) \ - ? (MAX_STACK_SIZE) \ - : (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) - -#define ARCH_SUPPORTS_KRETPROBES -#define flush_insn_slot(p) do { } while (0) - -extern const int kretprobe_blacklist_size; - -void arch_remove_kprobe(struct kprobe *p); -void kretprobe_trampoline(void); - -/* Architecture specific copy of original instruction*/ -struct arch_specific_insn { - /* copy of the original instruction */ - kprobe_opcode_t *insn; - /* - * If this flag is not 0, this kprobe can be boost when its - * post_handler and break_handler is not set. - */ - int boostable; -}; - -struct prev_kprobe { - struct kprobe *kp; - unsigned long status; - unsigned long old_eflags; - unsigned long saved_eflags; -}; - -/* per-cpu kprobe control block */ -struct kprobe_ctlblk { - unsigned long kprobe_status; - unsigned long kprobe_old_eflags; - unsigned long kprobe_saved_eflags; - unsigned long *jprobe_saved_esp; - struct pt_regs jprobe_saved_regs; - kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE]; - struct prev_kprobe prev_kprobe; -}; - -/* trap3/1 are intr gates for kprobes. So, restore the status of IF, - * if necessary, before executing the original int3/1 (trap) handler. - */ -static inline void restore_interrupts(struct pt_regs *regs) -{ - if (regs->eflags & IF_MASK) - local_irq_enable(); -} - -extern int kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data); -extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); -#endif /* _ASM_KPROBES_H */ diff --git a/include/asm-x86/kprobes_64.h b/include/asm-x86/kprobes_64.h deleted file mode 100644 index 743d76218fc9..000000000000 --- a/include/asm-x86/kprobes_64.h +++ /dev/null @@ -1,90 +0,0 @@ -#ifndef _ASM_KPROBES_H -#define _ASM_KPROBES_H -/* - * Kernel Probes (KProbes) - * include/asm-x86_64/kprobes.h - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright (C) IBM Corporation, 2002, 2004 - * - * 2004-Oct Prasanna S Panchamukhi <prasanna@in.ibm.com> and Jim Keniston - * kenistoj@us.ibm.com adopted from i386. - */ -#include <linux/types.h> -#include <linux/ptrace.h> -#include <linux/percpu.h> - -#define __ARCH_WANT_KPROBES_INSN_SLOT - -struct pt_regs; -struct kprobe; - -typedef u8 kprobe_opcode_t; -#define BREAKPOINT_INSTRUCTION 0xcc -#define MAX_INSN_SIZE 15 -#define MAX_STACK_SIZE 64 -#define MIN_STACK_SIZE(ADDR) (((MAX_STACK_SIZE) < \ - (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) \ - ? (MAX_STACK_SIZE) \ - : (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) - -#define ARCH_SUPPORTS_KRETPROBES -extern const int kretprobe_blacklist_size; - -void kretprobe_trampoline(void); -extern void arch_remove_kprobe(struct kprobe *p); -#define flush_insn_slot(p) do { } while (0) - -/* Architecture specific copy of original instruction*/ -struct arch_specific_insn { - /* copy of the original instruction */ - kprobe_opcode_t *insn; -}; - -struct prev_kprobe { - struct kprobe *kp; - unsigned long status; - unsigned long old_rflags; - unsigned long saved_rflags; -}; - -/* per-cpu kprobe control block */ -struct kprobe_ctlblk { - unsigned long kprobe_status; - unsigned long kprobe_old_rflags; - unsigned long kprobe_saved_rflags; - unsigned long *jprobe_saved_rsp; - struct pt_regs jprobe_saved_regs; - kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE]; - struct prev_kprobe prev_kprobe; -}; - -/* trap3/1 are intr gates for kprobes. So, restore the status of IF, - * if necessary, before executing the original int3/1 (trap) handler. - */ -static inline void restore_interrupts(struct pt_regs *regs) -{ - if (regs->eflags & IF_MASK) - local_irq_enable(); -} - -extern int post_kprobe_handler(struct pt_regs *regs); -extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); -extern int kprobe_handler(struct pt_regs *regs); - -extern int kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data); -#endif /* _ASM_KPROBES_H */ diff --git a/include/asm-x86/kvm.h b/include/asm-x86/kvm.h new file mode 100644 index 000000000000..7a71120426a3 --- /dev/null +++ b/include/asm-x86/kvm.h @@ -0,0 +1,191 @@ +#ifndef __LINUX_KVM_X86_H +#define __LINUX_KVM_X86_H + +/* + * KVM x86 specific structures and definitions + * + */ + +#include <asm/types.h> +#include <linux/ioctl.h> + +/* Architectural interrupt line count. */ +#define KVM_NR_INTERRUPTS 256 + +struct kvm_memory_alias { + __u32 slot; /* this has a different namespace than memory slots */ + __u32 flags; + __u64 guest_phys_addr; + __u64 memory_size; + __u64 target_phys_addr; +}; + +/* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */ +struct kvm_pic_state { + __u8 last_irr; /* edge detection */ + __u8 irr; /* interrupt request register */ + __u8 imr; /* interrupt mask register */ + __u8 isr; /* interrupt service register */ + __u8 priority_add; /* highest irq priority */ + __u8 irq_base; + __u8 read_reg_select; + __u8 poll; + __u8 special_mask; + __u8 init_state; + __u8 auto_eoi; + __u8 rotate_on_auto_eoi; + __u8 special_fully_nested_mode; + __u8 init4; /* true if 4 byte init */ + __u8 elcr; /* PIIX edge/trigger selection */ + __u8 elcr_mask; +}; + +#define KVM_IOAPIC_NUM_PINS 24 +struct kvm_ioapic_state { + __u64 base_address; + __u32 ioregsel; + __u32 id; + __u32 irr; + __u32 pad; + union { + __u64 bits; + struct { + __u8 vector; + __u8 delivery_mode:3; + __u8 dest_mode:1; + __u8 delivery_status:1; + __u8 polarity:1; + __u8 remote_irr:1; + __u8 trig_mode:1; + __u8 mask:1; + __u8 reserve:7; + __u8 reserved[4]; + __u8 dest_id; + } fields; + } redirtbl[KVM_IOAPIC_NUM_PINS]; +}; + +#define KVM_IRQCHIP_PIC_MASTER 0 +#define KVM_IRQCHIP_PIC_SLAVE 1 +#define KVM_IRQCHIP_IOAPIC 2 + +/* for KVM_GET_REGS and KVM_SET_REGS */ +struct kvm_regs { + /* out (KVM_GET_REGS) / in (KVM_SET_REGS) */ + __u64 rax, rbx, rcx, rdx; + __u64 rsi, rdi, rsp, rbp; + __u64 r8, r9, r10, r11; + __u64 r12, r13, r14, r15; + __u64 rip, rflags; +}; + +/* for KVM_GET_LAPIC and KVM_SET_LAPIC */ +#define KVM_APIC_REG_SIZE 0x400 +struct kvm_lapic_state { + char regs[KVM_APIC_REG_SIZE]; +}; + +struct kvm_segment { + __u64 base; + __u32 limit; + __u16 selector; + __u8 type; + __u8 present, dpl, db, s, l, g, avl; + __u8 unusable; + __u8 padding; +}; + +struct kvm_dtable { + __u64 base; + __u16 limit; + __u16 padding[3]; +}; + + +/* for KVM_GET_SREGS and KVM_SET_SREGS */ +struct kvm_sregs { + /* out (KVM_GET_SREGS) / in (KVM_SET_SREGS) */ + struct kvm_segment cs, ds, es, fs, gs, ss; + struct kvm_segment tr, ldt; + struct kvm_dtable gdt, idt; + __u64 cr0, cr2, cr3, cr4, cr8; + __u64 efer; + __u64 apic_base; + __u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64]; +}; + +/* for KVM_GET_FPU and KVM_SET_FPU */ +struct kvm_fpu { + __u8 fpr[8][16]; + __u16 fcw; + __u16 fsw; + __u8 ftwx; /* in fxsave format */ + __u8 pad1; + __u16 last_opcode; + __u64 last_ip; + __u64 last_dp; + __u8 xmm[16][16]; + __u32 mxcsr; + __u32 pad2; +}; + +struct kvm_msr_entry { + __u32 index; + __u32 reserved; + __u64 data; +}; + +/* for KVM_GET_MSRS and KVM_SET_MSRS */ +struct kvm_msrs { + __u32 nmsrs; /* number of msrs in entries */ + __u32 pad; + + struct kvm_msr_entry entries[0]; +}; + +/* for KVM_GET_MSR_INDEX_LIST */ +struct kvm_msr_list { + __u32 nmsrs; /* number of msrs in entries */ + __u32 indices[0]; +}; + + +struct kvm_cpuid_entry { + __u32 function; + __u32 eax; + __u32 ebx; + __u32 ecx; + __u32 edx; + __u32 padding; +}; + +/* for KVM_SET_CPUID */ +struct kvm_cpuid { + __u32 nent; + __u32 padding; + struct kvm_cpuid_entry entries[0]; +}; + +struct kvm_cpuid_entry2 { + __u32 function; + __u32 index; + __u32 flags; + __u32 eax; + __u32 ebx; + __u32 ecx; + __u32 edx; + __u32 padding[3]; +}; + +#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1 +#define KVM_CPUID_FLAG_STATEFUL_FUNC 2 +#define KVM_CPUID_FLAG_STATE_READ_NEXT 4 + +/* for KVM_SET_CPUID2 */ +struct kvm_cpuid2 { + __u32 nent; + __u32 padding; + struct kvm_cpuid_entry2 entries[0]; +}; + +#endif diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h new file mode 100644 index 000000000000..4702b04b979a --- /dev/null +++ b/include/asm-x86/kvm_host.h @@ -0,0 +1,611 @@ +#/* + * Kernel-based Virtual Machine driver for Linux + * + * This header defines architecture specific interfaces, x86 version + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef ASM_KVM_HOST_H +#define ASM_KVM_HOST_H + +#include <linux/types.h> +#include <linux/mm.h> + +#include <linux/kvm.h> +#include <linux/kvm_para.h> +#include <linux/kvm_types.h> + +#include <asm/desc.h> + +#define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1) +#define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD)) +#define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS|0xFFFFFF0000000000ULL) + +#define KVM_GUEST_CR0_MASK \ + (X86_CR0_PG | X86_CR0_PE | X86_CR0_WP | X86_CR0_NE \ + | X86_CR0_NW | X86_CR0_CD) +#define KVM_VM_CR0_ALWAYS_ON \ + (X86_CR0_PG | X86_CR0_PE | X86_CR0_WP | X86_CR0_NE | X86_CR0_TS \ + | X86_CR0_MP) +#define KVM_GUEST_CR4_MASK \ + (X86_CR4_VME | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_PGE | X86_CR4_VMXE) +#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) +#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) + +#define INVALID_PAGE (~(hpa_t)0) +#define UNMAPPED_GVA (~(gpa_t)0) + +#define DE_VECTOR 0 +#define UD_VECTOR 6 +#define NM_VECTOR 7 +#define DF_VECTOR 8 +#define TS_VECTOR 10 +#define NP_VECTOR 11 +#define SS_VECTOR 12 +#define GP_VECTOR 13 +#define PF_VECTOR 14 + +#define SELECTOR_TI_MASK (1 << 2) +#define SELECTOR_RPL_MASK 0x03 + +#define IOPL_SHIFT 12 + +#define KVM_ALIAS_SLOTS 4 + +#define KVM_PERMILLE_MMU_PAGES 20 +#define KVM_MIN_ALLOC_MMU_PAGES 64 +#define KVM_NUM_MMU_PAGES 1024 +#define KVM_MIN_FREE_MMU_PAGES 5 +#define KVM_REFILL_PAGES 25 +#define KVM_MAX_CPUID_ENTRIES 40 + +extern spinlock_t kvm_lock; +extern struct list_head vm_list; + +struct kvm_vcpu; +struct kvm; + +enum { + VCPU_REGS_RAX = 0, + VCPU_REGS_RCX = 1, + VCPU_REGS_RDX = 2, + VCPU_REGS_RBX = 3, + VCPU_REGS_RSP = 4, + VCPU_REGS_RBP = 5, + VCPU_REGS_RSI = 6, + VCPU_REGS_RDI = 7, +#ifdef CONFIG_X86_64 + VCPU_REGS_R8 = 8, + VCPU_REGS_R9 = 9, + VCPU_REGS_R10 = 10, + VCPU_REGS_R11 = 11, + VCPU_REGS_R12 = 12, + VCPU_REGS_R13 = 13, + VCPU_REGS_R14 = 14, + VCPU_REGS_R15 = 15, +#endif + NR_VCPU_REGS +}; + +enum { + VCPU_SREG_CS, + VCPU_SREG_DS, + VCPU_SREG_ES, + VCPU_SREG_FS, + VCPU_SREG_GS, + VCPU_SREG_SS, + VCPU_SREG_TR, + VCPU_SREG_LDTR, +}; + +#include <asm/kvm_x86_emulate.h> + +#define KVM_NR_MEM_OBJS 40 + +/* + * We don't want allocation failures within the mmu code, so we preallocate + * enough memory for a single page fault in a cache. + */ +struct kvm_mmu_memory_cache { + int nobjs; + void *objects[KVM_NR_MEM_OBJS]; +}; + +#define NR_PTE_CHAIN_ENTRIES 5 + +struct kvm_pte_chain { + u64 *parent_ptes[NR_PTE_CHAIN_ENTRIES]; + struct hlist_node link; +}; + +/* + * kvm_mmu_page_role, below, is defined as: + * + * bits 0:3 - total guest paging levels (2-4, or zero for real mode) + * bits 4:7 - page table level for this shadow (1-4) + * bits 8:9 - page table quadrant for 2-level guests + * bit 16 - "metaphysical" - gfn is not a real page (huge page/real mode) + * bits 17:19 - common access permissions for all ptes in this shadow page + */ +union kvm_mmu_page_role { + unsigned word; + struct { + unsigned glevels : 4; + unsigned level : 4; + unsigned quadrant : 2; + unsigned pad_for_nice_hex_output : 6; + unsigned metaphysical : 1; + unsigned access : 3; + }; +}; + +struct kvm_mmu_page { + struct list_head link; + struct hlist_node hash_link; + + /* + * The following two entries are used to key the shadow page in the + * hash table. + */ + gfn_t gfn; + union kvm_mmu_page_role role; + + u64 *spt; + /* hold the gfn of each spte inside spt */ + gfn_t *gfns; + unsigned long slot_bitmap; /* One bit set per slot which has memory + * in this shadow page. + */ + int multimapped; /* More than one parent_pte? */ + int root_count; /* Currently serving as active root */ + union { + u64 *parent_pte; /* !multimapped */ + struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */ + }; +}; + +/* + * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level + * 32-bit). The kvm_mmu structure abstracts the details of the current mmu + * mode. + */ +struct kvm_mmu { + void (*new_cr3)(struct kvm_vcpu *vcpu); + int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err); + void (*free)(struct kvm_vcpu *vcpu); + gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva); + void (*prefetch_page)(struct kvm_vcpu *vcpu, + struct kvm_mmu_page *page); + hpa_t root_hpa; + int root_level; + int shadow_root_level; + + u64 *pae_root; +}; + +struct kvm_vcpu_arch { + u64 host_tsc; + int interrupt_window_open; + unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */ + DECLARE_BITMAP(irq_pending, KVM_NR_INTERRUPTS); + unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */ + unsigned long rip; /* needs vcpu_load_rsp_rip() */ + + unsigned long cr0; + unsigned long cr2; + unsigned long cr3; + unsigned long cr4; + unsigned long cr8; + u64 pdptrs[4]; /* pae */ + u64 shadow_efer; + u64 apic_base; + struct kvm_lapic *apic; /* kernel irqchip context */ +#define VCPU_MP_STATE_RUNNABLE 0 +#define VCPU_MP_STATE_UNINITIALIZED 1 +#define VCPU_MP_STATE_INIT_RECEIVED 2 +#define VCPU_MP_STATE_SIPI_RECEIVED 3 +#define VCPU_MP_STATE_HALTED 4 + int mp_state; + int sipi_vector; + u64 ia32_misc_enable_msr; + bool tpr_access_reporting; + + struct kvm_mmu mmu; + + struct kvm_mmu_memory_cache mmu_pte_chain_cache; + struct kvm_mmu_memory_cache mmu_rmap_desc_cache; + struct kvm_mmu_memory_cache mmu_page_cache; + struct kvm_mmu_memory_cache mmu_page_header_cache; + + gfn_t last_pt_write_gfn; + int last_pt_write_count; + u64 *last_pte_updated; + + struct { + gfn_t gfn; /* presumed gfn during guest pte update */ + struct page *page; /* page corresponding to that gfn */ + } update_pte; + + struct i387_fxsave_struct host_fx_image; + struct i387_fxsave_struct guest_fx_image; + + gva_t mmio_fault_cr2; + struct kvm_pio_request pio; + void *pio_data; + + struct kvm_queued_exception { + bool pending; + bool has_error_code; + u8 nr; + u32 error_code; + } exception; + + struct { + int active; + u8 save_iopl; + struct kvm_save_segment { + u16 selector; + unsigned long base; + u32 limit; + u32 ar; + } tr, es, ds, fs, gs; + } rmode; + int halt_request; /* real mode on Intel only */ + + int cpuid_nent; + struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES]; + /* emulate context */ + + struct x86_emulate_ctxt emulate_ctxt; +}; + +struct kvm_mem_alias { + gfn_t base_gfn; + unsigned long npages; + gfn_t target_gfn; +}; + +struct kvm_arch{ + int naliases; + struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS]; + + unsigned int n_free_mmu_pages; + unsigned int n_requested_mmu_pages; + unsigned int n_alloc_mmu_pages; + struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; + /* + * Hash table of struct kvm_mmu_page. + */ + struct list_head active_mmu_pages; + struct kvm_pic *vpic; + struct kvm_ioapic *vioapic; + + int round_robin_prev_vcpu; + unsigned int tss_addr; + struct page *apic_access_page; +}; + +struct kvm_vm_stat { + u32 mmu_shadow_zapped; + u32 mmu_pte_write; + u32 mmu_pte_updated; + u32 mmu_pde_zapped; + u32 mmu_flooded; + u32 mmu_recycled; + u32 mmu_cache_miss; + u32 remote_tlb_flush; +}; + +struct kvm_vcpu_stat { + u32 pf_fixed; + u32 pf_guest; + u32 tlb_flush; + u32 invlpg; + + u32 exits; + u32 io_exits; + u32 mmio_exits; + u32 signal_exits; + u32 irq_window_exits; + u32 halt_exits; + u32 halt_wakeup; + u32 request_irq_exits; + u32 irq_exits; + u32 host_state_reload; + u32 efer_reload; + u32 fpu_reload; + u32 insn_emulation; + u32 insn_emulation_fail; +}; + +struct descriptor_table { + u16 limit; + unsigned long base; +} __attribute__((packed)); + +struct kvm_x86_ops { + int (*cpu_has_kvm_support)(void); /* __init */ + int (*disabled_by_bios)(void); /* __init */ + void (*hardware_enable)(void *dummy); /* __init */ + void (*hardware_disable)(void *dummy); + void (*check_processor_compatibility)(void *rtn); + int (*hardware_setup)(void); /* __init */ + void (*hardware_unsetup)(void); /* __exit */ + bool (*cpu_has_accelerated_tpr)(void); + + /* Create, but do not attach this VCPU */ + struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); + void (*vcpu_free)(struct kvm_vcpu *vcpu); + int (*vcpu_reset)(struct kvm_vcpu *vcpu); + + void (*prepare_guest_switch)(struct kvm_vcpu *vcpu); + void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); + void (*vcpu_put)(struct kvm_vcpu *vcpu); + void (*vcpu_decache)(struct kvm_vcpu *vcpu); + + int (*set_guest_debug)(struct kvm_vcpu *vcpu, + struct kvm_debug_guest *dbg); + void (*guest_debug_pre)(struct kvm_vcpu *vcpu); + int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); + int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); + u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); + void (*get_segment)(struct kvm_vcpu *vcpu, + struct kvm_segment *var, int seg); + void (*set_segment)(struct kvm_vcpu *vcpu, + struct kvm_segment *var, int seg); + void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); + void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu); + void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); + void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); + void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); + void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); + void (*get_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); + void (*set_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); + void (*get_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); + void (*set_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); + unsigned long (*get_dr)(struct kvm_vcpu *vcpu, int dr); + void (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value, + int *exception); + void (*cache_regs)(struct kvm_vcpu *vcpu); + void (*decache_regs)(struct kvm_vcpu *vcpu); + unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); + void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); + + void (*tlb_flush)(struct kvm_vcpu *vcpu); + + void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run); + int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu); + void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); + void (*patch_hypercall)(struct kvm_vcpu *vcpu, + unsigned char *hypercall_addr); + int (*get_irq)(struct kvm_vcpu *vcpu); + void (*set_irq)(struct kvm_vcpu *vcpu, int vec); + void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, + bool has_error_code, u32 error_code); + bool (*exception_injected)(struct kvm_vcpu *vcpu); + void (*inject_pending_irq)(struct kvm_vcpu *vcpu); + void (*inject_pending_vectors)(struct kvm_vcpu *vcpu, + struct kvm_run *run); + + int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); +}; + +extern struct kvm_x86_ops *kvm_x86_ops; + +int kvm_mmu_module_init(void); +void kvm_mmu_module_exit(void); + +void kvm_mmu_destroy(struct kvm_vcpu *vcpu); +int kvm_mmu_create(struct kvm_vcpu *vcpu); +int kvm_mmu_setup(struct kvm_vcpu *vcpu); +void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); + +int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); +void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); +void kvm_mmu_zap_all(struct kvm *kvm); +unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); +void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); + +enum emulation_result { + EMULATE_DONE, /* no further processing */ + EMULATE_DO_MMIO, /* kvm_run filled with mmio request */ + EMULATE_FAIL, /* can't emulate this instruction */ +}; + +#define EMULTYPE_NO_DECODE (1 << 0) +#define EMULTYPE_TRAP_UD (1 << 1) +int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run, + unsigned long cr2, u16 error_code, int emulation_type); +void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); +void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); +void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); +void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, + unsigned long *rflags); + +unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr); +void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long value, + unsigned long *rflags); +int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); +int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); + +struct x86_emulate_ctxt; + +int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, + int size, unsigned port); +int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, + int size, unsigned long count, int down, + gva_t address, int rep, unsigned port); +void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); +int kvm_emulate_halt(struct kvm_vcpu *vcpu); +int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); +int emulate_clts(struct kvm_vcpu *vcpu); +int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, + unsigned long *dest); +int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, + unsigned long value); + +void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); +void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr0); +void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr0); +void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr0); +unsigned long get_cr8(struct kvm_vcpu *vcpu); +void lmsw(struct kvm_vcpu *vcpu, unsigned long msw); +void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); + +int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); +int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); + +void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); +void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); +void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2, + u32 error_code); + +void fx_init(struct kvm_vcpu *vcpu); + +int emulator_read_std(unsigned long addr, + void *val, + unsigned int bytes, + struct kvm_vcpu *vcpu); +int emulator_write_emulated(unsigned long addr, + const void *val, + unsigned int bytes, + struct kvm_vcpu *vcpu); + +unsigned long segment_base(u16 selector); + +void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); +void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, + const u8 *new, int bytes); +int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); +void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); +int kvm_mmu_load(struct kvm_vcpu *vcpu); +void kvm_mmu_unload(struct kvm_vcpu *vcpu); + +int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); + +int kvm_fix_hypercall(struct kvm_vcpu *vcpu); + +int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code); + +int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); +int complete_pio(struct kvm_vcpu *vcpu); + +static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) +{ + struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); + + return (struct kvm_mmu_page *)page_private(page); +} + +static inline u16 read_fs(void) +{ + u16 seg; + asm("mov %%fs, %0" : "=g"(seg)); + return seg; +} + +static inline u16 read_gs(void) +{ + u16 seg; + asm("mov %%gs, %0" : "=g"(seg)); + return seg; +} + +static inline u16 read_ldt(void) +{ + u16 ldt; + asm("sldt %0" : "=g"(ldt)); + return ldt; +} + +static inline void load_fs(u16 sel) +{ + asm("mov %0, %%fs" : : "rm"(sel)); +} + +static inline void load_gs(u16 sel) +{ + asm("mov %0, %%gs" : : "rm"(sel)); +} + +#ifndef load_ldt +static inline void load_ldt(u16 sel) +{ + asm("lldt %0" : : "rm"(sel)); +} +#endif + +static inline void get_idt(struct descriptor_table *table) +{ + asm("sidt %0" : "=m"(*table)); +} + +static inline void get_gdt(struct descriptor_table *table) +{ + asm("sgdt %0" : "=m"(*table)); +} + +static inline unsigned long read_tr_base(void) +{ + u16 tr; + asm("str %0" : "=g"(tr)); + return segment_base(tr); +} + +#ifdef CONFIG_X86_64 +static inline unsigned long read_msr(unsigned long msr) +{ + u64 value; + + rdmsrl(msr, value); + return value; +} +#endif + +static inline void fx_save(struct i387_fxsave_struct *image) +{ + asm("fxsave (%0)":: "r" (image)); +} + +static inline void fx_restore(struct i387_fxsave_struct *image) +{ + asm("fxrstor (%0)":: "r" (image)); +} + +static inline void fpu_init(void) +{ + asm("finit"); +} + +static inline u32 get_rdx_init_val(void) +{ + return 0x600; /* P6 family */ +} + +static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) +{ + kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); +} + +#define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30" +#define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2" +#define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3" +#define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30" +#define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0" +#define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0" +#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4" +#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4" +#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30" + +#define MSR_IA32_TIME_STAMP_COUNTER 0x010 + +#define TSS_IOPB_BASE_OFFSET 0x66 +#define TSS_BASE_SIZE 0x68 +#define TSS_IOPB_SIZE (65536 / 8) +#define TSS_REDIRECTION_SIZE (256 / 8) +#define RMODE_TSS_SIZE (TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1) + +#endif diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h new file mode 100644 index 000000000000..c6f3fd8d8c53 --- /dev/null +++ b/include/asm-x86/kvm_para.h @@ -0,0 +1,105 @@ +#ifndef __X86_KVM_PARA_H +#define __X86_KVM_PARA_H + +/* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It + * should be used to determine that a VM is running under KVM. + */ +#define KVM_CPUID_SIGNATURE 0x40000000 + +/* This CPUID returns a feature bitmap in eax. Before enabling a particular + * paravirtualization, the appropriate feature bit should be checked. + */ +#define KVM_CPUID_FEATURES 0x40000001 + +#ifdef __KERNEL__ +#include <asm/processor.h> + +/* This instruction is vmcall. On non-VT architectures, it will generate a + * trap that we will then rewrite to the appropriate instruction. + */ +#define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1" + +/* For KVM hypercalls, a three-byte sequence of either the vmrun or the vmmrun + * instruction. The hypervisor may replace it with something else but only the + * instructions are guaranteed to be supported. + * + * Up to four arguments may be passed in rbx, rcx, rdx, and rsi respectively. + * The hypercall number should be placed in rax and the return value will be + * placed in rax. No other registers will be clobbered unless explicited + * noted by the particular hypercall. + */ + +static inline long kvm_hypercall0(unsigned int nr) +{ + long ret; + asm volatile(KVM_HYPERCALL + : "=a"(ret) + : "a"(nr)); + return ret; +} + +static inline long kvm_hypercall1(unsigned int nr, unsigned long p1) +{ + long ret; + asm volatile(KVM_HYPERCALL + : "=a"(ret) + : "a"(nr), "b"(p1)); + return ret; +} + +static inline long kvm_hypercall2(unsigned int nr, unsigned long p1, + unsigned long p2) +{ + long ret; + asm volatile(KVM_HYPERCALL + : "=a"(ret) + : "a"(nr), "b"(p1), "c"(p2)); + return ret; +} + +static inline long kvm_hypercall3(unsigned int nr, unsigned long p1, + unsigned long p2, unsigned long p3) +{ + long ret; + asm volatile(KVM_HYPERCALL + : "=a"(ret) + : "a"(nr), "b"(p1), "c"(p2), "d"(p3)); + return ret; +} + +static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, + unsigned long p2, unsigned long p3, + unsigned long p4) +{ + long ret; + asm volatile(KVM_HYPERCALL + : "=a"(ret) + : "a"(nr), "b"(p1), "c"(p2), "d"(p3), "S"(p4)); + return ret; +} + +static inline int kvm_para_available(void) +{ + unsigned int eax, ebx, ecx, edx; + char signature[13]; + + cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx); + memcpy(signature + 0, &ebx, 4); + memcpy(signature + 4, &ecx, 4); + memcpy(signature + 8, &edx, 4); + signature[12] = 0; + + if (strcmp(signature, "KVMKVMKVM") == 0) + return 1; + + return 0; +} + +static inline unsigned int kvm_arch_para_features(void) +{ + return cpuid_eax(KVM_CPUID_FEATURES); +} + +#endif + +#endif diff --git a/include/asm-x86/kvm_x86_emulate.h b/include/asm-x86/kvm_x86_emulate.h new file mode 100644 index 000000000000..7db91b9bdcd4 --- /dev/null +++ b/include/asm-x86/kvm_x86_emulate.h @@ -0,0 +1,186 @@ +/****************************************************************************** + * x86_emulate.h + * + * Generic x86 (32-bit and 64-bit) instruction decoder and emulator. + * + * Copyright (c) 2005 Keir Fraser + * + * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4 + */ + +#ifndef __X86_EMULATE_H__ +#define __X86_EMULATE_H__ + +struct x86_emulate_ctxt; + +/* + * x86_emulate_ops: + * + * These operations represent the instruction emulator's interface to memory. + * There are two categories of operation: those that act on ordinary memory + * regions (*_std), and those that act on memory regions known to require + * special treatment or emulation (*_emulated). + * + * The emulator assumes that an instruction accesses only one 'emulated memory' + * location, that this location is the given linear faulting address (cr2), and + * that this is one of the instruction's data operands. Instruction fetches and + * stack operations are assumed never to access emulated memory. The emulator + * automatically deduces which operand of a string-move operation is accessing + * emulated memory, and assumes that the other operand accesses normal memory. + * + * NOTES: + * 1. The emulator isn't very smart about emulated vs. standard memory. + * 'Emulated memory' access addresses should be checked for sanity. + * 'Normal memory' accesses may fault, and the caller must arrange to + * detect and handle reentrancy into the emulator via recursive faults. + * Accesses may be unaligned and may cross page boundaries. + * 2. If the access fails (cannot emulate, or a standard access faults) then + * it is up to the memop to propagate the fault to the guest VM via + * some out-of-band mechanism, unknown to the emulator. The memop signals + * failure by returning X86EMUL_PROPAGATE_FAULT to the emulator, which will + * then immediately bail. + * 3. Valid access sizes are 1, 2, 4 and 8 bytes. On x86/32 systems only + * cmpxchg8b_emulated need support 8-byte accesses. + * 4. The emulator cannot handle 64-bit mode emulation on an x86/32 system. + */ +/* Access completed successfully: continue emulation as normal. */ +#define X86EMUL_CONTINUE 0 +/* Access is unhandleable: bail from emulation and return error to caller. */ +#define X86EMUL_UNHANDLEABLE 1 +/* Terminate emulation but return success to the caller. */ +#define X86EMUL_PROPAGATE_FAULT 2 /* propagate a generated fault to guest */ +#define X86EMUL_RETRY_INSTR 2 /* retry the instruction for some reason */ +#define X86EMUL_CMPXCHG_FAILED 2 /* cmpxchg did not see expected value */ +struct x86_emulate_ops { + /* + * read_std: Read bytes of standard (non-emulated/special) memory. + * Used for instruction fetch, stack operations, and others. + * @addr: [IN ] Linear address from which to read. + * @val: [OUT] Value read from memory, zero-extended to 'u_long'. + * @bytes: [IN ] Number of bytes to read from memory. + */ + int (*read_std)(unsigned long addr, void *val, + unsigned int bytes, struct kvm_vcpu *vcpu); + + /* + * read_emulated: Read bytes from emulated/special memory area. + * @addr: [IN ] Linear address from which to read. + * @val: [OUT] Value read from memory, zero-extended to 'u_long'. + * @bytes: [IN ] Number of bytes to read from memory. + */ + int (*read_emulated) (unsigned long addr, + void *val, + unsigned int bytes, + struct kvm_vcpu *vcpu); + + /* + * write_emulated: Read bytes from emulated/special memory area. + * @addr: [IN ] Linear address to which to write. + * @val: [IN ] Value to write to memory (low-order bytes used as + * required). + * @bytes: [IN ] Number of bytes to write to memory. + */ + int (*write_emulated) (unsigned long addr, + const void *val, + unsigned int bytes, + struct kvm_vcpu *vcpu); + + /* + * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an + * emulated/special memory area. + * @addr: [IN ] Linear address to access. + * @old: [IN ] Value expected to be current at @addr. + * @new: [IN ] Value to write to @addr. + * @bytes: [IN ] Number of bytes to access using CMPXCHG. + */ + int (*cmpxchg_emulated) (unsigned long addr, + const void *old, + const void *new, + unsigned int bytes, + struct kvm_vcpu *vcpu); + +}; + +/* Type, address-of, and value of an instruction's operand. */ +struct operand { + enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type; + unsigned int bytes; + unsigned long val, orig_val, *ptr; +}; + +struct fetch_cache { + u8 data[15]; + unsigned long start; + unsigned long end; +}; + +struct decode_cache { + u8 twobyte; + u8 b; + u8 lock_prefix; + u8 rep_prefix; + u8 op_bytes; + u8 ad_bytes; + u8 rex_prefix; + struct operand src; + struct operand dst; + unsigned long *override_base; + unsigned int d; + unsigned long regs[NR_VCPU_REGS]; + unsigned long eip; + /* modrm */ + u8 modrm; + u8 modrm_mod; + u8 modrm_reg; + u8 modrm_rm; + u8 use_modrm_ea; + unsigned long modrm_ea; + unsigned long modrm_val; + struct fetch_cache fetch; +}; + +struct x86_emulate_ctxt { + /* Register state before/after emulation. */ + struct kvm_vcpu *vcpu; + + /* Linear faulting address (if emulating a page-faulting instruction). */ + unsigned long eflags; + + /* Emulated execution mode, represented by an X86EMUL_MODE value. */ + int mode; + + unsigned long cs_base; + unsigned long ds_base; + unsigned long es_base; + unsigned long ss_base; + unsigned long gs_base; + unsigned long fs_base; + + /* decode cache */ + + struct decode_cache decode; +}; + +/* Repeat String Operation Prefix */ +#define REPE_PREFIX 1 +#define REPNE_PREFIX 2 + +/* Execution mode, passed to the emulator. */ +#define X86EMUL_MODE_REAL 0 /* Real mode. */ +#define X86EMUL_MODE_PROT16 2 /* 16-bit protected mode. */ +#define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */ +#define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ + +/* Host execution mode. */ +#if defined(__i386__) +#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32 +#elif defined(CONFIG_X86_64) +#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64 +#endif + +int x86_decode_insn(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops); +int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops); + +#endif /* __X86_EMULATE_H__ */ diff --git a/include/asm-x86/lguest.h b/include/asm-x86/lguest.h index b9d003b8005e..4d9367b72976 100644 --- a/include/asm-x86/lguest.h +++ b/include/asm-x86/lguest.h @@ -44,14 +44,14 @@ struct lguest_ro_state { /* Host information we need to restore when we switch back. */ u32 host_cr3; - struct Xgt_desc_struct host_idt_desc; - struct Xgt_desc_struct host_gdt_desc; + struct desc_ptr host_idt_desc; + struct desc_ptr host_gdt_desc; u32 host_sp; /* Fields which are used when guest is running. */ - struct Xgt_desc_struct guest_idt_desc; - struct Xgt_desc_struct guest_gdt_desc; - struct i386_hw_tss guest_tss; + struct desc_ptr guest_idt_desc; + struct desc_ptr guest_gdt_desc; + struct x86_hw_tss guest_tss; struct desc_struct guest_idt[IDT_ENTRIES]; struct desc_struct guest_gdt[GDT_ENTRIES]; }; @@ -78,8 +78,8 @@ static inline void lguest_set_ts(void) } /* Full 4G segment descriptors, suitable for CS and DS. */ -#define FULL_EXEC_SEGMENT ((struct desc_struct){0x0000ffff, 0x00cf9b00}) -#define FULL_SEGMENT ((struct desc_struct){0x0000ffff, 0x00cf9300}) +#define FULL_EXEC_SEGMENT ((struct desc_struct){ { {0x0000ffff, 0x00cf9b00} } }) +#define FULL_SEGMENT ((struct desc_struct){ { {0x0000ffff, 0x00cf9300} } }) #endif /* __ASSEMBLY__ */ diff --git a/include/asm-x86/linkage.h b/include/asm-x86/linkage.h index 94b257fa8701..31739c7d66a9 100644 --- a/include/asm-x86/linkage.h +++ b/include/asm-x86/linkage.h @@ -1,5 +1,25 @@ +#ifndef __ASM_LINKAGE_H +#define __ASM_LINKAGE_H + +#ifdef CONFIG_X86_64 +#define __ALIGN .p2align 4,,15 +#define __ALIGN_STR ".p2align 4,,15" +#endif + #ifdef CONFIG_X86_32 -# include "linkage_32.h" -#else -# include "linkage_64.h" +#define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0))) +#define prevent_tail_call(ret) __asm__ ("" : "=r" (ret) : "0" (ret)) +/* + * For 32-bit UML - mark functions implemented in assembly that use + * regparm input parameters: + */ +#define asmregparm __attribute__((regparm(3))) +#endif + +#ifdef CONFIG_X86_ALIGNMENT_16 +#define __ALIGN .align 16,0x90 +#define __ALIGN_STR ".align 16,0x90" +#endif + #endif + diff --git a/include/asm-x86/linkage_32.h b/include/asm-x86/linkage_32.h deleted file mode 100644 index f4a6ebac0247..000000000000 --- a/include/asm-x86/linkage_32.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef __ASM_LINKAGE_H -#define __ASM_LINKAGE_H - -#define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0))) -#define FASTCALL(x) x __attribute__((regparm(3))) -#define fastcall __attribute__((regparm(3))) - -#define prevent_tail_call(ret) __asm__ ("" : "=r" (ret) : "0" (ret)) - -#ifdef CONFIG_X86_ALIGNMENT_16 -#define __ALIGN .align 16,0x90 -#define __ALIGN_STR ".align 16,0x90" -#endif - -#endif diff --git a/include/asm-x86/linkage_64.h b/include/asm-x86/linkage_64.h deleted file mode 100644 index b5f39d0189ce..000000000000 --- a/include/asm-x86/linkage_64.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_LINKAGE_H -#define __ASM_LINKAGE_H - -#define __ALIGN .p2align 4,,15 - -#endif diff --git a/include/asm-x86/local.h b/include/asm-x86/local.h index c7a1b1c66c96..f852c62b3319 100644 --- a/include/asm-x86/local.h +++ b/include/asm-x86/local.h @@ -1,5 +1,240 @@ -#ifdef CONFIG_X86_32 -# include "local_32.h" -#else -# include "local_64.h" +#ifndef _ARCH_LOCAL_H +#define _ARCH_LOCAL_H + +#include <linux/percpu.h> + +#include <asm/system.h> +#include <asm/atomic.h> +#include <asm/asm.h> + +typedef struct { + atomic_long_t a; +} local_t; + +#define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) } + +#define local_read(l) atomic_long_read(&(l)->a) +#define local_set(l, i) atomic_long_set(&(l)->a, (i)) + +static inline void local_inc(local_t *l) +{ + __asm__ __volatile__( + _ASM_INC "%0" + :"+m" (l->a.counter)); +} + +static inline void local_dec(local_t *l) +{ + __asm__ __volatile__( + _ASM_DEC "%0" + :"+m" (l->a.counter)); +} + +static inline void local_add(long i, local_t *l) +{ + __asm__ __volatile__( + _ASM_ADD "%1,%0" + :"+m" (l->a.counter) + :"ir" (i)); +} + +static inline void local_sub(long i, local_t *l) +{ + __asm__ __volatile__( + _ASM_SUB "%1,%0" + :"+m" (l->a.counter) + :"ir" (i)); +} + +/** + * local_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @l: pointer to type local_t + * + * Atomically subtracts @i from @l and returns + * true if the result is zero, or false for all + * other cases. + */ +static inline int local_sub_and_test(long i, local_t *l) +{ + unsigned char c; + + __asm__ __volatile__( + _ASM_SUB "%2,%0; sete %1" + :"+m" (l->a.counter), "=qm" (c) + :"ir" (i) : "memory"); + return c; +} + +/** + * local_dec_and_test - decrement and test + * @l: pointer to type local_t + * + * Atomically decrements @l by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static inline int local_dec_and_test(local_t *l) +{ + unsigned char c; + + __asm__ __volatile__( + _ASM_DEC "%0; sete %1" + :"+m" (l->a.counter), "=qm" (c) + : : "memory"); + return c != 0; +} + +/** + * local_inc_and_test - increment and test + * @l: pointer to type local_t + * + * Atomically increments @l by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static inline int local_inc_and_test(local_t *l) +{ + unsigned char c; + + __asm__ __volatile__( + _ASM_INC "%0; sete %1" + :"+m" (l->a.counter), "=qm" (c) + : : "memory"); + return c != 0; +} + +/** + * local_add_negative - add and test if negative + * @i: integer value to add + * @l: pointer to type local_t + * + * Atomically adds @i to @l and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +static inline int local_add_negative(long i, local_t *l) +{ + unsigned char c; + + __asm__ __volatile__( + _ASM_ADD "%2,%0; sets %1" + :"+m" (l->a.counter), "=qm" (c) + :"ir" (i) : "memory"); + return c; +} + +/** + * local_add_return - add and return + * @i: integer value to add + * @l: pointer to type local_t + * + * Atomically adds @i to @l and returns @i + @l + */ +static inline long local_add_return(long i, local_t *l) +{ + long __i; +#ifdef CONFIG_M386 + unsigned long flags; + if (unlikely(boot_cpu_data.x86 <= 3)) + goto no_xadd; #endif + /* Modern 486+ processor */ + __i = i; + __asm__ __volatile__( + _ASM_XADD "%0, %1;" + :"+r" (i), "+m" (l->a.counter) + : : "memory"); + return i + __i; + +#ifdef CONFIG_M386 +no_xadd: /* Legacy 386 processor */ + local_irq_save(flags); + __i = local_read(l); + local_set(l, i + __i); + local_irq_restore(flags); + return i + __i; +#endif +} + +static inline long local_sub_return(long i, local_t *l) +{ + return local_add_return(-i, l); +} + +#define local_inc_return(l) (local_add_return(1, l)) +#define local_dec_return(l) (local_sub_return(1, l)) + +#define local_cmpxchg(l, o, n) \ + (cmpxchg_local(&((l)->a.counter), (o), (n))) +/* Always has a lock prefix */ +#define local_xchg(l, n) (xchg(&((l)->a.counter), (n))) + +/** + * local_add_unless - add unless the number is a given value + * @l: pointer of type local_t + * @a: the amount to add to l... + * @u: ...unless l is equal to u. + * + * Atomically adds @a to @l, so long as it was not @u. + * Returns non-zero if @l was not @u, and zero otherwise. + */ +#define local_add_unless(l, a, u) \ +({ \ + long c, old; \ + c = local_read(l); \ + for (;;) { \ + if (unlikely(c == (u))) \ + break; \ + old = local_cmpxchg((l), c, c + (a)); \ + if (likely(old == c)) \ + break; \ + c = old; \ + } \ + c != (u); \ +}) +#define local_inc_not_zero(l) local_add_unless((l), 1, 0) + +/* On x86_32, these are no better than the atomic variants. + * On x86-64 these are better than the atomic variants on SMP kernels + * because they dont use a lock prefix. + */ +#define __local_inc(l) local_inc(l) +#define __local_dec(l) local_dec(l) +#define __local_add(i, l) local_add((i), (l)) +#define __local_sub(i, l) local_sub((i), (l)) + +/* Use these for per-cpu local_t variables: on some archs they are + * much more efficient than these naive implementations. Note they take + * a variable, not an address. + * + * X86_64: This could be done better if we moved the per cpu data directly + * after GS. + */ + +/* Need to disable preemption for the cpu local counters otherwise we could + still access a variable of a previous CPU in a non atomic way. */ +#define cpu_local_wrap_v(l) \ + ({ local_t res__; \ + preempt_disable(); \ + res__ = (l); \ + preempt_enable(); \ + res__; }) +#define cpu_local_wrap(l) \ + ({ preempt_disable(); \ + l; \ + preempt_enable(); }) \ + +#define cpu_local_read(l) cpu_local_wrap_v(local_read(&__get_cpu_var(l))) +#define cpu_local_set(l, i) cpu_local_wrap(local_set(&__get_cpu_var(l), (i))) +#define cpu_local_inc(l) cpu_local_wrap(local_inc(&__get_cpu_var(l))) +#define cpu_local_dec(l) cpu_local_wrap(local_dec(&__get_cpu_var(l))) +#define cpu_local_add(i, l) cpu_local_wrap(local_add((i), &__get_cpu_var(l))) +#define cpu_local_sub(i, l) cpu_local_wrap(local_sub((i), &__get_cpu_var(l))) + +#define __cpu_local_inc(l) cpu_local_inc(l) +#define __cpu_local_dec(l) cpu_local_dec(l) +#define __cpu_local_add(i, l) cpu_local_add((i), (l)) +#define __cpu_local_sub(i, l) cpu_local_sub((i), (l)) + +#endif /* _ARCH_LOCAL_H */ diff --git a/include/asm-x86/local_32.h b/include/asm-x86/local_32.h deleted file mode 100644 index 6e85975b9ed2..000000000000 --- a/include/asm-x86/local_32.h +++ /dev/null @@ -1,233 +0,0 @@ -#ifndef _ARCH_I386_LOCAL_H -#define _ARCH_I386_LOCAL_H - -#include <linux/percpu.h> -#include <asm/system.h> -#include <asm/atomic.h> - -typedef struct -{ - atomic_long_t a; -} local_t; - -#define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) } - -#define local_read(l) atomic_long_read(&(l)->a) -#define local_set(l,i) atomic_long_set(&(l)->a, (i)) - -static __inline__ void local_inc(local_t *l) -{ - __asm__ __volatile__( - "incl %0" - :"+m" (l->a.counter)); -} - -static __inline__ void local_dec(local_t *l) -{ - __asm__ __volatile__( - "decl %0" - :"+m" (l->a.counter)); -} - -static __inline__ void local_add(long i, local_t *l) -{ - __asm__ __volatile__( - "addl %1,%0" - :"+m" (l->a.counter) - :"ir" (i)); -} - -static __inline__ void local_sub(long i, local_t *l) -{ - __asm__ __volatile__( - "subl %1,%0" - :"+m" (l->a.counter) - :"ir" (i)); -} - -/** - * local_sub_and_test - subtract value from variable and test result - * @i: integer value to subtract - * @l: pointer of type local_t - * - * Atomically subtracts @i from @l and returns - * true if the result is zero, or false for all - * other cases. - */ -static __inline__ int local_sub_and_test(long i, local_t *l) -{ - unsigned char c; - - __asm__ __volatile__( - "subl %2,%0; sete %1" - :"+m" (l->a.counter), "=qm" (c) - :"ir" (i) : "memory"); - return c; -} - -/** - * local_dec_and_test - decrement and test - * @l: pointer of type local_t - * - * Atomically decrements @l by 1 and - * returns true if the result is 0, or false for all other - * cases. - */ -static __inline__ int local_dec_and_test(local_t *l) -{ - unsigned char c; - - __asm__ __volatile__( - "decl %0; sete %1" - :"+m" (l->a.counter), "=qm" (c) - : : "memory"); - return c != 0; -} - -/** - * local_inc_and_test - increment and test - * @l: pointer of type local_t - * - * Atomically increments @l by 1 - * and returns true if the result is zero, or false for all - * other cases. - */ -static __inline__ int local_inc_and_test(local_t *l) -{ - unsigned char c; - - __asm__ __volatile__( - "incl %0; sete %1" - :"+m" (l->a.counter), "=qm" (c) - : : "memory"); - return c != 0; -} - -/** - * local_add_negative - add and test if negative - * @l: pointer of type local_t - * @i: integer value to add - * - * Atomically adds @i to @l and returns true - * if the result is negative, or false when - * result is greater than or equal to zero. - */ -static __inline__ int local_add_negative(long i, local_t *l) -{ - unsigned char c; - - __asm__ __volatile__( - "addl %2,%0; sets %1" - :"+m" (l->a.counter), "=qm" (c) - :"ir" (i) : "memory"); - return c; -} - -/** - * local_add_return - add and return - * @l: pointer of type local_t - * @i: integer value to add - * - * Atomically adds @i to @l and returns @i + @l - */ -static __inline__ long local_add_return(long i, local_t *l) -{ - long __i; -#ifdef CONFIG_M386 - unsigned long flags; - if(unlikely(boot_cpu_data.x86 <= 3)) - goto no_xadd; -#endif - /* Modern 486+ processor */ - __i = i; - __asm__ __volatile__( - "xaddl %0, %1;" - :"+r" (i), "+m" (l->a.counter) - : : "memory"); - return i + __i; - -#ifdef CONFIG_M386 -no_xadd: /* Legacy 386 processor */ - local_irq_save(flags); - __i = local_read(l); - local_set(l, i + __i); - local_irq_restore(flags); - return i + __i; -#endif -} - -static __inline__ long local_sub_return(long i, local_t *l) -{ - return local_add_return(-i,l); -} - -#define local_inc_return(l) (local_add_return(1,l)) -#define local_dec_return(l) (local_sub_return(1,l)) - -#define local_cmpxchg(l, o, n) \ - (cmpxchg_local(&((l)->a.counter), (o), (n))) -/* Always has a lock prefix */ -#define local_xchg(l, n) (xchg(&((l)->a.counter), (n))) - -/** - * local_add_unless - add unless the number is a given value - * @l: pointer of type local_t - * @a: the amount to add to l... - * @u: ...unless l is equal to u. - * - * Atomically adds @a to @l, so long as it was not @u. - * Returns non-zero if @l was not @u, and zero otherwise. - */ -#define local_add_unless(l, a, u) \ -({ \ - long c, old; \ - c = local_read(l); \ - for (;;) { \ - if (unlikely(c == (u))) \ - break; \ - old = local_cmpxchg((l), c, c + (a)); \ - if (likely(old == c)) \ - break; \ - c = old; \ - } \ - c != (u); \ -}) -#define local_inc_not_zero(l) local_add_unless((l), 1, 0) - -/* On x86, these are no better than the atomic variants. */ -#define __local_inc(l) local_inc(l) -#define __local_dec(l) local_dec(l) -#define __local_add(i,l) local_add((i),(l)) -#define __local_sub(i,l) local_sub((i),(l)) - -/* Use these for per-cpu local_t variables: on some archs they are - * much more efficient than these naive implementations. Note they take - * a variable, not an address. - */ - -/* Need to disable preemption for the cpu local counters otherwise we could - still access a variable of a previous CPU in a non atomic way. */ -#define cpu_local_wrap_v(l) \ - ({ local_t res__; \ - preempt_disable(); \ - res__ = (l); \ - preempt_enable(); \ - res__; }) -#define cpu_local_wrap(l) \ - ({ preempt_disable(); \ - l; \ - preempt_enable(); }) \ - -#define cpu_local_read(l) cpu_local_wrap_v(local_read(&__get_cpu_var(l))) -#define cpu_local_set(l, i) cpu_local_wrap(local_set(&__get_cpu_var(l), (i))) -#define cpu_local_inc(l) cpu_local_wrap(local_inc(&__get_cpu_var(l))) -#define cpu_local_dec(l) cpu_local_wrap(local_dec(&__get_cpu_var(l))) -#define cpu_local_add(i, l) cpu_local_wrap(local_add((i), &__get_cpu_var(l))) -#define cpu_local_sub(i, l) cpu_local_wrap(local_sub((i), &__get_cpu_var(l))) - -#define __cpu_local_inc(l) cpu_local_inc(l) -#define __cpu_local_dec(l) cpu_local_dec(l) -#define __cpu_local_add(i, l) cpu_local_add((i), (l)) -#define __cpu_local_sub(i, l) cpu_local_sub((i), (l)) - -#endif /* _ARCH_I386_LOCAL_H */ diff --git a/include/asm-x86/local_64.h b/include/asm-x86/local_64.h deleted file mode 100644 index e87492bb0693..000000000000 --- a/include/asm-x86/local_64.h +++ /dev/null @@ -1,222 +0,0 @@ -#ifndef _ARCH_X8664_LOCAL_H -#define _ARCH_X8664_LOCAL_H - -#include <linux/percpu.h> -#include <asm/atomic.h> - -typedef struct -{ - atomic_long_t a; -} local_t; - -#define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) } - -#define local_read(l) atomic_long_read(&(l)->a) -#define local_set(l,i) atomic_long_set(&(l)->a, (i)) - -static inline void local_inc(local_t *l) -{ - __asm__ __volatile__( - "incq %0" - :"=m" (l->a.counter) - :"m" (l->a.counter)); -} - -static inline void local_dec(local_t *l) -{ - __asm__ __volatile__( - "decq %0" - :"=m" (l->a.counter) - :"m" (l->a.counter)); -} - -static inline void local_add(long i, local_t *l) -{ - __asm__ __volatile__( - "addq %1,%0" - :"=m" (l->a.counter) - :"ir" (i), "m" (l->a.counter)); -} - -static inline void local_sub(long i, local_t *l) -{ - __asm__ __volatile__( - "subq %1,%0" - :"=m" (l->a.counter) - :"ir" (i), "m" (l->a.counter)); -} - -/** - * local_sub_and_test - subtract value from variable and test result - * @i: integer value to subtract - * @l: pointer to type local_t - * - * Atomically subtracts @i from @l and returns - * true if the result is zero, or false for all - * other cases. - */ -static __inline__ int local_sub_and_test(long i, local_t *l) -{ - unsigned char c; - - __asm__ __volatile__( - "subq %2,%0; sete %1" - :"=m" (l->a.counter), "=qm" (c) - :"ir" (i), "m" (l->a.counter) : "memory"); - return c; -} - -/** - * local_dec_and_test - decrement and test - * @l: pointer to type local_t - * - * Atomically decrements @l by 1 and - * returns true if the result is 0, or false for all other - * cases. - */ -static __inline__ int local_dec_and_test(local_t *l) -{ - unsigned char c; - - __asm__ __volatile__( - "decq %0; sete %1" - :"=m" (l->a.counter), "=qm" (c) - :"m" (l->a.counter) : "memory"); - return c != 0; -} - -/** - * local_inc_and_test - increment and test - * @l: pointer to type local_t - * - * Atomically increments @l by 1 - * and returns true if the result is zero, or false for all - * other cases. - */ -static __inline__ int local_inc_and_test(local_t *l) -{ - unsigned char c; - - __asm__ __volatile__( - "incq %0; sete %1" - :"=m" (l->a.counter), "=qm" (c) - :"m" (l->a.counter) : "memory"); - return c != 0; -} - -/** - * local_add_negative - add and test if negative - * @i: integer value to add - * @l: pointer to type local_t - * - * Atomically adds @i to @l and returns true - * if the result is negative, or false when - * result is greater than or equal to zero. - */ -static __inline__ int local_add_negative(long i, local_t *l) -{ - unsigned char c; - - __asm__ __volatile__( - "addq %2,%0; sets %1" - :"=m" (l->a.counter), "=qm" (c) - :"ir" (i), "m" (l->a.counter) : "memory"); - return c; -} - -/** - * local_add_return - add and return - * @i: integer value to add - * @l: pointer to type local_t - * - * Atomically adds @i to @l and returns @i + @l - */ -static __inline__ long local_add_return(long i, local_t *l) -{ - long __i = i; - __asm__ __volatile__( - "xaddq %0, %1;" - :"+r" (i), "+m" (l->a.counter) - : : "memory"); - return i + __i; -} - -static __inline__ long local_sub_return(long i, local_t *l) -{ - return local_add_return(-i,l); -} - -#define local_inc_return(l) (local_add_return(1,l)) -#define local_dec_return(l) (local_sub_return(1,l)) - -#define local_cmpxchg(l, o, n) \ - (cmpxchg_local(&((l)->a.counter), (o), (n))) -/* Always has a lock prefix */ -#define local_xchg(l, n) (xchg(&((l)->a.counter), (n))) - -/** - * atomic_up_add_unless - add unless the number is a given value - * @l: pointer of type local_t - * @a: the amount to add to l... - * @u: ...unless l is equal to u. - * - * Atomically adds @a to @l, so long as it was not @u. - * Returns non-zero if @l was not @u, and zero otherwise. - */ -#define local_add_unless(l, a, u) \ -({ \ - long c, old; \ - c = local_read(l); \ - for (;;) { \ - if (unlikely(c == (u))) \ - break; \ - old = local_cmpxchg((l), c, c + (a)); \ - if (likely(old == c)) \ - break; \ - c = old; \ - } \ - c != (u); \ -}) -#define local_inc_not_zero(l) local_add_unless((l), 1, 0) - -/* On x86-64 these are better than the atomic variants on SMP kernels - because they dont use a lock prefix. */ -#define __local_inc(l) local_inc(l) -#define __local_dec(l) local_dec(l) -#define __local_add(i,l) local_add((i),(l)) -#define __local_sub(i,l) local_sub((i),(l)) - -/* Use these for per-cpu local_t variables: on some archs they are - * much more efficient than these naive implementations. Note they take - * a variable, not an address. - * - * This could be done better if we moved the per cpu data directly - * after GS. - */ - -/* Need to disable preemption for the cpu local counters otherwise we could - still access a variable of a previous CPU in a non atomic way. */ -#define cpu_local_wrap_v(l) \ - ({ local_t res__; \ - preempt_disable(); \ - res__ = (l); \ - preempt_enable(); \ - res__; }) -#define cpu_local_wrap(l) \ - ({ preempt_disable(); \ - l; \ - preempt_enable(); }) \ - -#define cpu_local_read(l) cpu_local_wrap_v(local_read(&__get_cpu_var(l))) -#define cpu_local_set(l, i) cpu_local_wrap(local_set(&__get_cpu_var(l), (i))) -#define cpu_local_inc(l) cpu_local_wrap(local_inc(&__get_cpu_var(l))) -#define cpu_local_dec(l) cpu_local_wrap(local_dec(&__get_cpu_var(l))) -#define cpu_local_add(i, l) cpu_local_wrap(local_add((i), &__get_cpu_var(l))) -#define cpu_local_sub(i, l) cpu_local_wrap(local_sub((i), &__get_cpu_var(l))) - -#define __cpu_local_inc(l) cpu_local_inc(l) -#define __cpu_local_dec(l) cpu_local_dec(l) -#define __cpu_local_add(i, l) cpu_local_add((i), (l)) -#define __cpu_local_sub(i, l) cpu_local_sub((i), (l)) - -#endif /* _ARCH_X8664_LOCAL_H */ diff --git a/include/asm-x86/mach-bigsmp/mach_apic.h b/include/asm-x86/mach-bigsmp/mach_apic.h index ebd319f838ab..6df235e8ea91 100644 --- a/include/asm-x86/mach-bigsmp/mach_apic.h +++ b/include/asm-x86/mach-bigsmp/mach_apic.h @@ -110,13 +110,13 @@ static inline int cpu_to_logical_apicid(int cpu) } static inline int mpc_apic_id(struct mpc_config_processor *m, - struct mpc_config_translation *translation_record) + struct mpc_config_translation *translation_record) { - printk("Processor #%d %ld:%ld APIC version %d\n", - m->mpc_apicid, - (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, - (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, - m->mpc_apicver); + printk("Processor #%d %u:%u APIC version %d\n", + m->mpc_apicid, + (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, + (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, + m->mpc_apicver); return m->mpc_apicid; } diff --git a/include/asm-x86/mach-default/apm.h b/include/asm-x86/mach-default/apm.h index 1f730b8bd1fd..989f34c37d32 100644 --- a/include/asm-x86/mach-default/apm.h +++ b/include/asm-x86/mach-default/apm.h @@ -1,6 +1,4 @@ /* - * include/asm-i386/mach-default/apm.h - * * Machine specific APM BIOS functions for generic. * Split out from apm.c by Osamu Tomita <tomita@cinet.co.jp> */ diff --git a/include/asm-x86/mach-default/io_ports.h b/include/asm-x86/mach-default/io_ports.h deleted file mode 100644 index 48540ba97166..000000000000 --- a/include/asm-x86/mach-default/io_ports.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * arch/i386/mach-generic/io_ports.h - * - * Machine specific IO port address definition for generic. - * Written by Osamu Tomita <tomita@cinet.co.jp> - */ -#ifndef _MACH_IO_PORTS_H -#define _MACH_IO_PORTS_H - -/* i8259A PIC registers */ -#define PIC_MASTER_CMD 0x20 -#define PIC_MASTER_IMR 0x21 -#define PIC_MASTER_ISR PIC_MASTER_CMD -#define PIC_MASTER_POLL PIC_MASTER_ISR -#define PIC_MASTER_OCW3 PIC_MASTER_ISR -#define PIC_SLAVE_CMD 0xa0 -#define PIC_SLAVE_IMR 0xa1 - -/* i8259A PIC related value */ -#define PIC_CASCADE_IR 2 -#define MASTER_ICW4_DEFAULT 0x01 -#define SLAVE_ICW4_DEFAULT 0x01 -#define PIC_ICW4_AEOI 2 - -#endif /* !_MACH_IO_PORTS_H */ diff --git a/include/asm-x86/mach-default/mach_apic.h b/include/asm-x86/mach-default/mach_apic.h index 6db1c3babe9a..e3c2c1012c1c 100644 --- a/include/asm-x86/mach-default/mach_apic.h +++ b/include/asm-x86/mach-default/mach_apic.h @@ -89,15 +89,15 @@ static inline physid_mask_t apicid_to_cpu_present(int phys_apicid) return physid_mask_of_physid(phys_apicid); } -static inline int mpc_apic_id(struct mpc_config_processor *m, - struct mpc_config_translation *translation_record) -{ - printk("Processor #%d %ld:%ld APIC version %d\n", - m->mpc_apicid, - (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, - (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, - m->mpc_apicver); - return (m->mpc_apicid); +static inline int mpc_apic_id(struct mpc_config_processor *m, + struct mpc_config_translation *translation_record) +{ + printk("Processor #%d %u:%u APIC version %d\n", + m->mpc_apicid, + (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, + (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, + m->mpc_apicver); + return m->mpc_apicid; } static inline void setup_portio_remap(void) diff --git a/include/asm-x86/mach-default/mach_time.h b/include/asm-x86/mach-default/mach_time.h deleted file mode 100644 index 31eb5de6f3dc..000000000000 --- a/include/asm-x86/mach-default/mach_time.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * include/asm-i386/mach-default/mach_time.h - * - * Machine specific set RTC function for generic. - * Split out from time.c by Osamu Tomita <tomita@cinet.co.jp> - */ -#ifndef _MACH_TIME_H -#define _MACH_TIME_H - -#include <linux/mc146818rtc.h> - -/* for check timing call set_rtc_mmss() 500ms */ -/* used in arch/i386/time.c::do_timer_interrupt() */ -#define USEC_AFTER 500000 -#define USEC_BEFORE 500000 - -/* - * In order to set the CMOS clock precisely, set_rtc_mmss has to be - * called 500 ms after the second nowtime has started, because when - * nowtime is written into the registers of the CMOS clock, it will - * jump to the next second precisely 500 ms later. Check the Motorola - * MC146818A or Dallas DS12887 data sheet for details. - * - * BUG: This routine does not handle hour overflow properly; it just - * sets the minutes. Usually you'll only notice that after reboot! - */ -static inline int mach_set_rtc_mmss(unsigned long nowtime) -{ - int retval = 0; - int real_seconds, real_minutes, cmos_minutes; - unsigned char save_control, save_freq_select; - - save_control = CMOS_READ(RTC_CONTROL); /* tell the clock it's being set */ - CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); - - save_freq_select = CMOS_READ(RTC_FREQ_SELECT); /* stop and reset prescaler */ - CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); - - cmos_minutes = CMOS_READ(RTC_MINUTES); - if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) - BCD_TO_BIN(cmos_minutes); - - /* - * since we're only adjusting minutes and seconds, - * don't interfere with hour overflow. This avoids - * messing with unknown time zones but requires your - * RTC not to be off by more than 15 minutes - */ - real_seconds = nowtime % 60; - real_minutes = nowtime / 60; - if (((abs(real_minutes - cmos_minutes) + 15)/30) & 1) - real_minutes += 30; /* correct for half hour time zone */ - real_minutes %= 60; - - if (abs(real_minutes - cmos_minutes) < 30) { - if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { - BIN_TO_BCD(real_seconds); - BIN_TO_BCD(real_minutes); - } - CMOS_WRITE(real_seconds,RTC_SECONDS); - CMOS_WRITE(real_minutes,RTC_MINUTES); - } else { - printk(KERN_WARNING - "set_rtc_mmss: can't update from %d to %d\n", - cmos_minutes, real_minutes); - retval = -1; - } - - /* The following flags have to be released exactly in this order, - * otherwise the DS12887 (popular MC146818A clone with integrated - * battery and quartz) will not reset the oscillator and will not - * update precisely 500 ms later. You won't find this mentioned in - * the Dallas Semiconductor data sheets, but who believes data - * sheets anyway ... -- Markus Kuhn - */ - CMOS_WRITE(save_control, RTC_CONTROL); - CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); - - return retval; -} - -static inline unsigned long mach_get_cmos_time(void) -{ - unsigned int year, mon, day, hour, min, sec; - - do { - sec = CMOS_READ(RTC_SECONDS); - min = CMOS_READ(RTC_MINUTES); - hour = CMOS_READ(RTC_HOURS); - day = CMOS_READ(RTC_DAY_OF_MONTH); - mon = CMOS_READ(RTC_MONTH); - year = CMOS_READ(RTC_YEAR); - } while (sec != CMOS_READ(RTC_SECONDS)); - - if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { - BCD_TO_BIN(sec); - BCD_TO_BIN(min); - BCD_TO_BIN(hour); - BCD_TO_BIN(day); - BCD_TO_BIN(mon); - BCD_TO_BIN(year); - } - - year += 1900; - if (year < 1970) - year += 100; - - return mktime(year, mon, day, hour, min, sec); -} - -#endif /* !_MACH_TIME_H */ diff --git a/include/asm-x86/mach-default/mach_timer.h b/include/asm-x86/mach-default/mach_timer.h index 807992fd4171..4b76e536cd98 100644 --- a/include/asm-x86/mach-default/mach_timer.h +++ b/include/asm-x86/mach-default/mach_timer.h @@ -1,6 +1,4 @@ /* - * include/asm-i386/mach-default/mach_timer.h - * * Machine specific calibrate_tsc() for generic. * Split out from timer_tsc.c by Osamu Tomita <tomita@cinet.co.jp> */ diff --git a/include/asm-x86/mach-default/mach_traps.h b/include/asm-x86/mach-default/mach_traps.h index 625438b8a6eb..2fe7705c0484 100644 --- a/include/asm-x86/mach-default/mach_traps.h +++ b/include/asm-x86/mach-default/mach_traps.h @@ -1,6 +1,4 @@ /* - * include/asm-i386/mach-default/mach_traps.h - * * Machine specific NMI handling for generic. * Split out from traps.c by Osamu Tomita <tomita@cinet.co.jp> */ diff --git a/include/asm-x86/mach-es7000/mach_apic.h b/include/asm-x86/mach-es7000/mach_apic.h index caec64be516d..d23011fdf454 100644 --- a/include/asm-x86/mach-es7000/mach_apic.h +++ b/include/asm-x86/mach-es7000/mach_apic.h @@ -131,11 +131,11 @@ static inline int cpu_to_logical_apicid(int cpu) static inline int mpc_apic_id(struct mpc_config_processor *m, struct mpc_config_translation *unused) { - printk("Processor #%d %ld:%ld APIC version %d\n", - m->mpc_apicid, - (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, - (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, - m->mpc_apicver); + printk("Processor #%d %u:%u APIC version %d\n", + m->mpc_apicid, + (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, + (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, + m->mpc_apicver); return (m->mpc_apicid); } diff --git a/include/asm-x86/mach-generic/gpio.h b/include/asm-x86/mach-generic/gpio.h new file mode 100644 index 000000000000..5305dcb96df2 --- /dev/null +++ b/include/asm-x86/mach-generic/gpio.h @@ -0,0 +1,15 @@ +#ifndef __ASM_MACH_GENERIC_GPIO_H +#define __ASM_MACH_GENERIC_GPIO_H + +int gpio_request(unsigned gpio, const char *label); +void gpio_free(unsigned gpio); +int gpio_direction_input(unsigned gpio); +int gpio_direction_output(unsigned gpio, int value); +int gpio_get_value(unsigned gpio); +void gpio_set_value(unsigned gpio, int value); +int gpio_to_irq(unsigned gpio); +int irq_to_gpio(unsigned irq); + +#include <asm-generic/gpio.h> /* cansleep wrappers */ + +#endif /* __ASM_MACH_GENERIC_GPIO_H */ diff --git a/include/asm-x86/mach-numaq/mach_apic.h b/include/asm-x86/mach-numaq/mach_apic.h index 5e5e7dd2692e..17e183bd39c1 100644 --- a/include/asm-x86/mach-numaq/mach_apic.h +++ b/include/asm-x86/mach-numaq/mach_apic.h @@ -101,11 +101,11 @@ static inline int mpc_apic_id(struct mpc_config_processor *m, int quad = translation_record->trans_quad; int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid); - printk("Processor #%d %ld:%ld APIC version %d (quad %d, apic %d)\n", - m->mpc_apicid, - (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, - (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, - m->mpc_apicver, quad, logical_apicid); + printk("Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n", + m->mpc_apicid, + (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, + (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, + m->mpc_apicver, quad, logical_apicid); return logical_apicid; } diff --git a/include/asm-x86/mach-rdc321x/gpio.h b/include/asm-x86/mach-rdc321x/gpio.h new file mode 100644 index 000000000000..db31b929b990 --- /dev/null +++ b/include/asm-x86/mach-rdc321x/gpio.h @@ -0,0 +1,56 @@ +#ifndef _RDC321X_GPIO_H +#define _RDC321X_GPIO_H + +extern int rdc_gpio_get_value(unsigned gpio); +extern void rdc_gpio_set_value(unsigned gpio, int value); +extern int rdc_gpio_direction_input(unsigned gpio); +extern int rdc_gpio_direction_output(unsigned gpio, int value); + + +/* Wrappers for the arch-neutral GPIO API */ + +static inline int gpio_request(unsigned gpio, const char *label) +{ + /* Not yet implemented */ + return 0; +} + +static inline void gpio_free(unsigned gpio) +{ + /* Not yet implemented */ +} + +static inline int gpio_direction_input(unsigned gpio) +{ + return rdc_gpio_direction_input(gpio); +} + +static inline int gpio_direction_output(unsigned gpio, int value) +{ + return rdc_gpio_direction_output(gpio, value); +} + +static inline int gpio_get_value(unsigned gpio) +{ + return rdc_gpio_get_value(gpio); +} + +static inline void gpio_set_value(unsigned gpio, int value) +{ + rdc_gpio_set_value(gpio, value); +} + +static inline int gpio_to_irq(unsigned gpio) +{ + return gpio; +} + +static inline int irq_to_gpio(unsigned irq) +{ + return irq; +} + +/* For cansleep */ +#include <asm-generic/gpio.h> + +#endif /* _RDC321X_GPIO_H_ */ diff --git a/include/asm-x86/mach-rdc321x/rdc321x_defs.h b/include/asm-x86/mach-rdc321x/rdc321x_defs.h new file mode 100644 index 000000000000..838ba8f64fd3 --- /dev/null +++ b/include/asm-x86/mach-rdc321x/rdc321x_defs.h @@ -0,0 +1,6 @@ +#define PFX "rdc321x: " + +/* General purpose configuration and data registers */ +#define RDC3210_CFGREG_ADDR 0x0CF8 +#define RDC3210_CFGREG_DATA 0x0CFC +#define RDC_MAX_GPIO 0x3A diff --git a/include/asm-x86/mach-summit/mach_apic.h b/include/asm-x86/mach-summit/mach_apic.h index 732f776aab8e..062c97f6100b 100644 --- a/include/asm-x86/mach-summit/mach_apic.h +++ b/include/asm-x86/mach-summit/mach_apic.h @@ -126,15 +126,15 @@ static inline physid_mask_t apicid_to_cpu_present(int apicid) return physid_mask_of_physid(0); } -static inline int mpc_apic_id(struct mpc_config_processor *m, - struct mpc_config_translation *translation_record) -{ - printk("Processor #%d %ld:%ld APIC version %d\n", - m->mpc_apicid, - (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, - (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, - m->mpc_apicver); - return (m->mpc_apicid); +static inline int mpc_apic_id(struct mpc_config_processor *m, + struct mpc_config_translation *translation_record) +{ + printk("Processor #%d %u:%u APIC version %d\n", + m->mpc_apicid, + (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, + (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, + m->mpc_apicver); + return m->mpc_apicid; } static inline void setup_portio_remap(void) diff --git a/include/asm-x86/math_emu.h b/include/asm-x86/math_emu.h index a4b0aa3320e6..9bf4ae93ab10 100644 --- a/include/asm-x86/math_emu.h +++ b/include/asm-x86/math_emu.h @@ -1,11 +1,6 @@ #ifndef _I386_MATH_EMU_H #define _I386_MATH_EMU_H -#include <asm/sigcontext.h> - -int restore_i387_soft(void *s387, struct _fpstate __user *buf); -int save_i387_soft(void *s387, struct _fpstate __user *buf); - /* This structure matches the layout of the data saved to the stack following a device-not-present interrupt, part of it saved automatically by the 80386/80486. diff --git a/include/asm-x86/mc146818rtc.h b/include/asm-x86/mc146818rtc.h index 5c2bb66caf17..cdd9f965835a 100644 --- a/include/asm-x86/mc146818rtc.h +++ b/include/asm-x86/mc146818rtc.h @@ -1,5 +1,100 @@ -#ifdef CONFIG_X86_32 -# include "mc146818rtc_32.h" +/* + * Machine dependent access functions for RTC registers. + */ +#ifndef _ASM_MC146818RTC_H +#define _ASM_MC146818RTC_H + +#include <asm/io.h> +#include <asm/system.h> +#include <asm/processor.h> +#include <linux/mc146818rtc.h> + +#ifndef RTC_PORT +#define RTC_PORT(x) (0x70 + (x)) +#define RTC_ALWAYS_BCD 1 /* RTC operates in binary mode */ +#endif + +#if defined(CONFIG_X86_32) && defined(__HAVE_ARCH_CMPXCHG) +/* + * This lock provides nmi access to the CMOS/RTC registers. It has some + * special properties. It is owned by a CPU and stores the index register + * currently being accessed (if owned). The idea here is that it works + * like a normal lock (normally). However, in an NMI, the NMI code will + * first check to see if its CPU owns the lock, meaning that the NMI + * interrupted during the read/write of the device. If it does, it goes ahead + * and performs the access and then restores the index register. If it does + * not, it locks normally. + * + * Note that since we are working with NMIs, we need this lock even in + * a non-SMP machine just to mark that the lock is owned. + * + * This only works with compare-and-swap. There is no other way to + * atomically claim the lock and set the owner. + */ +#include <linux/smp.h> +extern volatile unsigned long cmos_lock; + +/* + * All of these below must be called with interrupts off, preempt + * disabled, etc. + */ + +static inline void lock_cmos(unsigned char reg) +{ + unsigned long new; + new = ((smp_processor_id()+1) << 8) | reg; + for (;;) { + if (cmos_lock) { + cpu_relax(); + continue; + } + if (__cmpxchg(&cmos_lock, 0, new, sizeof(cmos_lock)) == 0) + return; + } +} + +static inline void unlock_cmos(void) +{ + cmos_lock = 0; +} +static inline int do_i_have_lock_cmos(void) +{ + return (cmos_lock >> 8) == (smp_processor_id()+1); +} +static inline unsigned char current_lock_cmos_reg(void) +{ + return cmos_lock & 0xff; +} +#define lock_cmos_prefix(reg) \ + do { \ + unsigned long cmos_flags; \ + local_irq_save(cmos_flags); \ + lock_cmos(reg) +#define lock_cmos_suffix(reg) \ + unlock_cmos(); \ + local_irq_restore(cmos_flags); \ + } while (0) #else -# include "mc146818rtc_64.h" +#define lock_cmos_prefix(reg) do {} while (0) +#define lock_cmos_suffix(reg) do {} while (0) +#define lock_cmos(reg) +#define unlock_cmos() +#define do_i_have_lock_cmos() 0 +#define current_lock_cmos_reg() 0 #endif + +/* + * The yet supported machines all access the RTC index register via + * an ISA port access but the way to access the date register differs ... + */ +#define CMOS_READ(addr) rtc_cmos_read(addr) +#define CMOS_WRITE(val, addr) rtc_cmos_write(val, addr) +unsigned char rtc_cmos_read(unsigned char addr); +void rtc_cmos_write(unsigned char val, unsigned char addr); + +extern int mach_set_rtc_mmss(unsigned long nowtime); +extern unsigned long mach_get_cmos_time(void); + +#define RTC_IRQ 8 + +#endif /* _ASM_MC146818RTC_H */ diff --git a/include/asm-x86/mc146818rtc_32.h b/include/asm-x86/mc146818rtc_32.h deleted file mode 100644 index 1613b42eaf58..000000000000 --- a/include/asm-x86/mc146818rtc_32.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Machine dependent access functions for RTC registers. - */ -#ifndef _ASM_MC146818RTC_H -#define _ASM_MC146818RTC_H - -#include <asm/io.h> -#include <asm/system.h> -#include <asm/processor.h> -#include <linux/mc146818rtc.h> - -#ifndef RTC_PORT -#define RTC_PORT(x) (0x70 + (x)) -#define RTC_ALWAYS_BCD 1 /* RTC operates in binary mode */ -#endif - -#ifdef __HAVE_ARCH_CMPXCHG -/* - * This lock provides nmi access to the CMOS/RTC registers. It has some - * special properties. It is owned by a CPU and stores the index register - * currently being accessed (if owned). The idea here is that it works - * like a normal lock (normally). However, in an NMI, the NMI code will - * first check to see if its CPU owns the lock, meaning that the NMI - * interrupted during the read/write of the device. If it does, it goes ahead - * and performs the access and then restores the index register. If it does - * not, it locks normally. - * - * Note that since we are working with NMIs, we need this lock even in - * a non-SMP machine just to mark that the lock is owned. - * - * This only works with compare-and-swap. There is no other way to - * atomically claim the lock and set the owner. - */ -#include <linux/smp.h> -extern volatile unsigned long cmos_lock; - -/* - * All of these below must be called with interrupts off, preempt - * disabled, etc. - */ - -static inline void lock_cmos(unsigned char reg) -{ - unsigned long new; - new = ((smp_processor_id()+1) << 8) | reg; - for (;;) { - if (cmos_lock) { - cpu_relax(); - continue; - } - if (__cmpxchg(&cmos_lock, 0, new, sizeof(cmos_lock)) == 0) - return; - } -} - -static inline void unlock_cmos(void) -{ - cmos_lock = 0; -} -static inline int do_i_have_lock_cmos(void) -{ - return (cmos_lock >> 8) == (smp_processor_id()+1); -} -static inline unsigned char current_lock_cmos_reg(void) -{ - return cmos_lock & 0xff; -} -#define lock_cmos_prefix(reg) \ - do { \ - unsigned long cmos_flags; \ - local_irq_save(cmos_flags); \ - lock_cmos(reg) -#define lock_cmos_suffix(reg) \ - unlock_cmos(); \ - local_irq_restore(cmos_flags); \ - } while (0) -#else -#define lock_cmos_prefix(reg) do {} while (0) -#define lock_cmos_suffix(reg) do {} while (0) -#define lock_cmos(reg) -#define unlock_cmos() -#define do_i_have_lock_cmos() 0 -#define current_lock_cmos_reg() 0 -#endif - -/* - * The yet supported machines all access the RTC index register via - * an ISA port access but the way to access the date register differs ... - */ -#define CMOS_READ(addr) rtc_cmos_read(addr) -#define CMOS_WRITE(val, addr) rtc_cmos_write(val, addr) -unsigned char rtc_cmos_read(unsigned char addr); -void rtc_cmos_write(unsigned char val, unsigned char addr); - -#define RTC_IRQ 8 - -#endif /* _ASM_MC146818RTC_H */ diff --git a/include/asm-x86/mc146818rtc_64.h b/include/asm-x86/mc146818rtc_64.h deleted file mode 100644 index d6e3009430c1..000000000000 --- a/include/asm-x86/mc146818rtc_64.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Machine dependent access functions for RTC registers. - */ -#ifndef _ASM_MC146818RTC_H -#define _ASM_MC146818RTC_H - -#include <asm/io.h> - -#ifndef RTC_PORT -#define RTC_PORT(x) (0x70 + (x)) -#define RTC_ALWAYS_BCD 1 /* RTC operates in binary mode */ -#endif - -/* - * The yet supported machines all access the RTC index register via - * an ISA port access but the way to access the date register differs ... - */ -#define CMOS_READ(addr) ({ \ -outb_p((addr),RTC_PORT(0)); \ -inb_p(RTC_PORT(1)); \ -}) -#define CMOS_WRITE(val, addr) ({ \ -outb_p((addr),RTC_PORT(0)); \ -outb_p((val),RTC_PORT(1)); \ -}) - -#define RTC_IRQ 8 - -#endif /* _ASM_MC146818RTC_H */ diff --git a/include/asm-x86/mce.h b/include/asm-x86/mce.h index df304fd89c27..94f1fd79e22a 100644 --- a/include/asm-x86/mce.h +++ b/include/asm-x86/mce.h @@ -13,7 +13,7 @@ #define MCG_CTL_P (1UL<<8) /* MCG_CAP register available */ #define MCG_STATUS_RIPV (1UL<<0) /* restart ip valid */ -#define MCG_STATUS_EIPV (1UL<<1) /* eip points to correct instruction */ +#define MCG_STATUS_EIPV (1UL<<1) /* ip points to correct instruction */ #define MCG_STATUS_MCIP (1UL<<2) /* machine check in progress */ #define MCI_STATUS_VAL (1UL<<63) /* valid error */ @@ -30,7 +30,7 @@ struct mce { __u64 misc; __u64 addr; __u64 mcgstatus; - __u64 rip; + __u64 ip; __u64 tsc; /* cpu time stamp counter */ __u64 res1; /* for future extension */ __u64 res2; /* dito. */ @@ -85,14 +85,7 @@ struct mce_log { #ifdef __KERNEL__ #ifdef CONFIG_X86_32 -#ifdef CONFIG_X86_MCE -extern void mcheck_init(struct cpuinfo_x86 *c); -#else -#define mcheck_init(c) do {} while(0) -#endif - extern int mce_disabled; - #else /* CONFIG_X86_32 */ #include <asm/atomic.h> @@ -121,6 +114,13 @@ extern int mce_notify_user(void); #endif /* !CONFIG_X86_32 */ + + +#ifdef CONFIG_X86_MCE +extern void mcheck_init(struct cpuinfo_x86 *c); +#else +#define mcheck_init(c) do { } while (0) +#endif extern void stop_mce(void); extern void restart_mce(void); diff --git a/include/asm-x86/mmsegment.h b/include/asm-x86/mmsegment.h deleted file mode 100644 index d3f80c996330..000000000000 --- a/include/asm-x86/mmsegment.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef _ASM_MMSEGMENT_H -#define _ASM_MMSEGMENT_H 1 - -typedef struct { - unsigned long seg; -} mm_segment_t; - -#endif diff --git a/include/asm-x86/mmu.h b/include/asm-x86/mmu.h index 3f922c8e1c88..efa962c38897 100644 --- a/include/asm-x86/mmu.h +++ b/include/asm-x86/mmu.h @@ -20,4 +20,12 @@ typedef struct { void *vdso; } mm_context_t; +#ifdef CONFIG_SMP +void leave_mm(int cpu); +#else +static inline void leave_mm(int cpu) +{ +} +#endif + #endif /* _ASM_X86_MMU_H */ diff --git a/include/asm-x86/mmu_context_32.h b/include/asm-x86/mmu_context_32.h index 7eb0b0b1fb3c..8198d1cca1f3 100644 --- a/include/asm-x86/mmu_context_32.h +++ b/include/asm-x86/mmu_context_32.h @@ -32,8 +32,6 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) #endif } -void leave_mm(unsigned long cpu); - static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) diff --git a/include/asm-x86/mmu_context_64.h b/include/asm-x86/mmu_context_64.h index 0cce83a78378..ad6dc821ef9e 100644 --- a/include/asm-x86/mmu_context_64.h +++ b/include/asm-x86/mmu_context_64.h @@ -7,7 +7,9 @@ #include <asm/pda.h> #include <asm/pgtable.h> #include <asm/tlbflush.h> +#ifndef CONFIG_PARAVIRT #include <asm-generic/mm_hooks.h> +#endif /* * possibly do the LDT unload here? @@ -23,11 +25,6 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) #endif } -static inline void load_cr3(pgd_t *pgd) -{ - asm volatile("movq %0,%%cr3" :: "r" (__pa(pgd)) : "memory"); -} - static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { @@ -43,20 +40,20 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, load_cr3(next->pgd); if (unlikely(next->context.ldt != prev->context.ldt)) - load_LDT_nolock(&next->context, cpu); + load_LDT_nolock(&next->context); } #ifdef CONFIG_SMP else { write_pda(mmu_state, TLBSTATE_OK); if (read_pda(active_mm) != next) - out_of_line_bug(); + BUG(); if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { /* We were in lazy tlb mode and leave_mm disabled * tlb flush IPI delivery. We must reload CR3 * to make sure to use no freed page tables. */ load_cr3(next->pgd); - load_LDT_nolock(&next->context, cpu); + load_LDT_nolock(&next->context); } } #endif diff --git a/include/asm-x86/mmzone_32.h b/include/asm-x86/mmzone_32.h index 118e9812778f..5d6f4ce6e6d6 100644 --- a/include/asm-x86/mmzone_32.h +++ b/include/asm-x86/mmzone_32.h @@ -87,9 +87,6 @@ static inline int pfn_to_nid(unsigned long pfn) __pgdat->node_start_pfn + __pgdat->node_spanned_pages; \ }) -/* XXX: FIXME -- wli */ -#define kern_addr_valid(kaddr) (0) - #ifdef CONFIG_X86_NUMAQ /* we have contiguous memory on NUMA-Q */ #define pfn_valid(pfn) ((pfn) < num_physpages) #else diff --git a/include/asm-x86/mmzone_64.h b/include/asm-x86/mmzone_64.h index 19a89377b123..ebaf9663aa8a 100644 --- a/include/asm-x86/mmzone_64.h +++ b/include/asm-x86/mmzone_64.h @@ -15,9 +15,9 @@ struct memnode { int shift; unsigned int mapsize; - u8 *map; - u8 embedded_map[64-16]; -} ____cacheline_aligned; /* total size = 64 bytes */ + s16 *map; + s16 embedded_map[64-8]; +} ____cacheline_aligned; /* total size = 128 bytes */ extern struct memnode memnode; #define memnode_shift memnode.shift #define memnodemap memnode.map @@ -41,11 +41,7 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) #define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \ NODE_DATA(nid)->node_spanned_pages) -#ifdef CONFIG_DISCONTIGMEM -#define pfn_to_nid(pfn) phys_to_nid((unsigned long)(pfn) << PAGE_SHIFT) - -extern int pfn_valid(unsigned long pfn); -#endif +extern int early_pfn_to_nid(unsigned long pfn); #ifdef CONFIG_NUMA_EMU #define FAKE_NODE_MIN_SIZE (64*1024*1024) diff --git a/include/asm-x86/module.h b/include/asm-x86/module.h index 2b2f18d8a531..bfedb247871c 100644 --- a/include/asm-x86/module.h +++ b/include/asm-x86/module.h @@ -1,5 +1,82 @@ +#ifndef _ASM_MODULE_H +#define _ASM_MODULE_H + +/* x86_32/64 are simple */ +struct mod_arch_specific {}; + #ifdef CONFIG_X86_32 -# include "module_32.h" +# define Elf_Shdr Elf32_Shdr +# define Elf_Sym Elf32_Sym +# define Elf_Ehdr Elf32_Ehdr #else -# include "module_64.h" +# define Elf_Shdr Elf64_Shdr +# define Elf_Sym Elf64_Sym +# define Elf_Ehdr Elf64_Ehdr #endif + +#ifdef CONFIG_X86_64 +/* X86_64 does not define MODULE_PROC_FAMILY */ +#elif defined CONFIG_M386 +#define MODULE_PROC_FAMILY "386 " +#elif defined CONFIG_M486 +#define MODULE_PROC_FAMILY "486 " +#elif defined CONFIG_M586 +#define MODULE_PROC_FAMILY "586 " +#elif defined CONFIG_M586TSC +#define MODULE_PROC_FAMILY "586TSC " +#elif defined CONFIG_M586MMX +#define MODULE_PROC_FAMILY "586MMX " +#elif defined CONFIG_MCORE2 +#define MODULE_PROC_FAMILY "CORE2 " +#elif defined CONFIG_M686 +#define MODULE_PROC_FAMILY "686 " +#elif defined CONFIG_MPENTIUMII +#define MODULE_PROC_FAMILY "PENTIUMII " +#elif defined CONFIG_MPENTIUMIII +#define MODULE_PROC_FAMILY "PENTIUMIII " +#elif defined CONFIG_MPENTIUMM +#define MODULE_PROC_FAMILY "PENTIUMM " +#elif defined CONFIG_MPENTIUM4 +#define MODULE_PROC_FAMILY "PENTIUM4 " +#elif defined CONFIG_MK6 +#define MODULE_PROC_FAMILY "K6 " +#elif defined CONFIG_MK7 +#define MODULE_PROC_FAMILY "K7 " +#elif defined CONFIG_MK8 +#define MODULE_PROC_FAMILY "K8 " +#elif defined CONFIG_X86_ELAN +#define MODULE_PROC_FAMILY "ELAN " +#elif defined CONFIG_MCRUSOE +#define MODULE_PROC_FAMILY "CRUSOE " +#elif defined CONFIG_MEFFICEON +#define MODULE_PROC_FAMILY "EFFICEON " +#elif defined CONFIG_MWINCHIPC6 +#define MODULE_PROC_FAMILY "WINCHIPC6 " +#elif defined CONFIG_MWINCHIP2 +#define MODULE_PROC_FAMILY "WINCHIP2 " +#elif defined CONFIG_MWINCHIP3D +#define MODULE_PROC_FAMILY "WINCHIP3D " +#elif defined CONFIG_MCYRIXIII +#define MODULE_PROC_FAMILY "CYRIXIII " +#elif defined CONFIG_MVIAC3_2 +#define MODULE_PROC_FAMILY "VIAC3-2 " +#elif defined CONFIG_MVIAC7 +#define MODULE_PROC_FAMILY "VIAC7 " +#elif defined CONFIG_MGEODEGX1 +#define MODULE_PROC_FAMILY "GEODEGX1 " +#elif defined CONFIG_MGEODE_LX +#define MODULE_PROC_FAMILY "GEODE " +#else +#error unknown processor family +#endif + +#ifdef CONFIG_X86_32 +# ifdef CONFIG_4KSTACKS +# define MODULE_STACKSIZE "4KSTACKS " +# else +# define MODULE_STACKSIZE "" +# endif +# define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_STACKSIZE +#endif + +#endif /* _ASM_MODULE_H */ diff --git a/include/asm-x86/module_32.h b/include/asm-x86/module_32.h deleted file mode 100644 index 7e5fda6c3976..000000000000 --- a/include/asm-x86/module_32.h +++ /dev/null @@ -1,75 +0,0 @@ -#ifndef _ASM_I386_MODULE_H -#define _ASM_I386_MODULE_H - -/* x86 is simple */ -struct mod_arch_specific -{ -}; - -#define Elf_Shdr Elf32_Shdr -#define Elf_Sym Elf32_Sym -#define Elf_Ehdr Elf32_Ehdr - -#ifdef CONFIG_M386 -#define MODULE_PROC_FAMILY "386 " -#elif defined CONFIG_M486 -#define MODULE_PROC_FAMILY "486 " -#elif defined CONFIG_M586 -#define MODULE_PROC_FAMILY "586 " -#elif defined CONFIG_M586TSC -#define MODULE_PROC_FAMILY "586TSC " -#elif defined CONFIG_M586MMX -#define MODULE_PROC_FAMILY "586MMX " -#elif defined CONFIG_MCORE2 -#define MODULE_PROC_FAMILY "CORE2 " -#elif defined CONFIG_M686 -#define MODULE_PROC_FAMILY "686 " -#elif defined CONFIG_MPENTIUMII -#define MODULE_PROC_FAMILY "PENTIUMII " -#elif defined CONFIG_MPENTIUMIII -#define MODULE_PROC_FAMILY "PENTIUMIII " -#elif defined CONFIG_MPENTIUMM -#define MODULE_PROC_FAMILY "PENTIUMM " -#elif defined CONFIG_MPENTIUM4 -#define MODULE_PROC_FAMILY "PENTIUM4 " -#elif defined CONFIG_MK6 -#define MODULE_PROC_FAMILY "K6 " -#elif defined CONFIG_MK7 -#define MODULE_PROC_FAMILY "K7 " -#elif defined CONFIG_MK8 -#define MODULE_PROC_FAMILY "K8 " -#elif defined CONFIG_X86_ELAN -#define MODULE_PROC_FAMILY "ELAN " -#elif defined CONFIG_MCRUSOE -#define MODULE_PROC_FAMILY "CRUSOE " -#elif defined CONFIG_MEFFICEON -#define MODULE_PROC_FAMILY "EFFICEON " -#elif defined CONFIG_MWINCHIPC6 -#define MODULE_PROC_FAMILY "WINCHIPC6 " -#elif defined CONFIG_MWINCHIP2 -#define MODULE_PROC_FAMILY "WINCHIP2 " -#elif defined CONFIG_MWINCHIP3D -#define MODULE_PROC_FAMILY "WINCHIP3D " -#elif defined CONFIG_MCYRIXIII -#define MODULE_PROC_FAMILY "CYRIXIII " -#elif defined CONFIG_MVIAC3_2 -#define MODULE_PROC_FAMILY "VIAC3-2 " -#elif defined CONFIG_MVIAC7 -#define MODULE_PROC_FAMILY "VIAC7 " -#elif defined CONFIG_MGEODEGX1 -#define MODULE_PROC_FAMILY "GEODEGX1 " -#elif defined CONFIG_MGEODE_LX -#define MODULE_PROC_FAMILY "GEODE " -#else -#error unknown processor family -#endif - -#ifdef CONFIG_4KSTACKS -#define MODULE_STACKSIZE "4KSTACKS " -#else -#define MODULE_STACKSIZE "" -#endif - -#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_STACKSIZE - -#endif /* _ASM_I386_MODULE_H */ diff --git a/include/asm-x86/module_64.h b/include/asm-x86/module_64.h deleted file mode 100644 index 67f8f69fa7b1..000000000000 --- a/include/asm-x86/module_64.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef _ASM_X8664_MODULE_H -#define _ASM_X8664_MODULE_H - -struct mod_arch_specific {}; - -#define Elf_Shdr Elf64_Shdr -#define Elf_Sym Elf64_Sym -#define Elf_Ehdr Elf64_Ehdr - -#endif diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h index 8f268e8fd2e9..781ad74ab9e9 100644 --- a/include/asm-x86/mpspec.h +++ b/include/asm-x86/mpspec.h @@ -1,5 +1,117 @@ +#ifndef _AM_X86_MPSPEC_H +#define _AM_X86_MPSPEC_H + +#include <asm/mpspec_def.h> + #ifdef CONFIG_X86_32 -# include "mpspec_32.h" +#include <mach_mpspec.h> + +extern int mp_bus_id_to_type[MAX_MP_BUSSES]; +extern int mp_bus_id_to_node[MAX_MP_BUSSES]; +extern int mp_bus_id_to_local[MAX_MP_BUSSES]; +extern int quad_local_to_mp_bus_id[NR_CPUS/4][4]; + +extern unsigned int def_to_bigsmp; +extern int apic_version[MAX_APICS]; +extern u8 apicid_2_node[]; +extern int pic_mode; + +#define MAX_APICID 256 + #else -# include "mpspec_64.h" + +#define MAX_MP_BUSSES 256 +/* Each PCI slot may be a combo card with its own bus. 4 IRQ pins per slot. */ +#define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4) + +extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); + +#endif + +extern int mp_bus_id_to_pci_bus[MAX_MP_BUSSES]; + +extern unsigned int boot_cpu_physical_apicid; +extern int smp_found_config; +extern int nr_ioapics; +extern int mp_irq_entries; +extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; +extern int mpc_default_type; +extern unsigned long mp_lapic_addr; + +extern void find_smp_config(void); +extern void get_smp_config(void); + +#ifdef CONFIG_ACPI +extern void mp_register_lapic(u8 id, u8 enabled); +extern void mp_register_lapic_address(u64 address); +extern void mp_register_ioapic(u8 id, u32 address, u32 gsi_base); +extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, + u32 gsi); +extern void mp_config_acpi_legacy_irqs(void); +extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low); +#endif /* CONFIG_ACPI */ + +#define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS) + +struct physid_mask +{ + unsigned long mask[PHYSID_ARRAY_SIZE]; +}; + +typedef struct physid_mask physid_mask_t; + +#define physid_set(physid, map) set_bit(physid, (map).mask) +#define physid_clear(physid, map) clear_bit(physid, (map).mask) +#define physid_isset(physid, map) test_bit(physid, (map).mask) +#define physid_test_and_set(physid, map) \ + test_and_set_bit(physid, (map).mask) + +#define physids_and(dst, src1, src2) \ + bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_APICS) + +#define physids_or(dst, src1, src2) \ + bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_APICS) + +#define physids_clear(map) \ + bitmap_zero((map).mask, MAX_APICS) + +#define physids_complement(dst, src) \ + bitmap_complement((dst).mask, (src).mask, MAX_APICS) + +#define physids_empty(map) \ + bitmap_empty((map).mask, MAX_APICS) + +#define physids_equal(map1, map2) \ + bitmap_equal((map1).mask, (map2).mask, MAX_APICS) + +#define physids_weight(map) \ + bitmap_weight((map).mask, MAX_APICS) + +#define physids_shift_right(d, s, n) \ + bitmap_shift_right((d).mask, (s).mask, n, MAX_APICS) + +#define physids_shift_left(d, s, n) \ + bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS) + +#define physids_coerce(map) ((map).mask[0]) + +#define physids_promote(physids) \ + ({ \ + physid_mask_t __physid_mask = PHYSID_MASK_NONE; \ + __physid_mask.mask[0] = physids; \ + __physid_mask; \ + }) + +#define physid_mask_of_physid(physid) \ + ({ \ + physid_mask_t __physid_mask = PHYSID_MASK_NONE; \ + physid_set(physid, __physid_mask); \ + __physid_mask; \ + }) + +#define PHYSID_MASK_ALL { {[0 ... PHYSID_ARRAY_SIZE-1] = ~0UL} } +#define PHYSID_MASK_NONE { {[0 ... PHYSID_ARRAY_SIZE-1] = 0UL} } + +extern physid_mask_t phys_cpu_present_map; + #endif diff --git a/include/asm-x86/mpspec_32.h b/include/asm-x86/mpspec_32.h deleted file mode 100644 index f21349399d14..000000000000 --- a/include/asm-x86/mpspec_32.h +++ /dev/null @@ -1,81 +0,0 @@ -#ifndef __ASM_MPSPEC_H -#define __ASM_MPSPEC_H - -#include <linux/cpumask.h> -#include <asm/mpspec_def.h> -#include <mach_mpspec.h> - -extern int mp_bus_id_to_type [MAX_MP_BUSSES]; -extern int mp_bus_id_to_node [MAX_MP_BUSSES]; -extern int mp_bus_id_to_local [MAX_MP_BUSSES]; -extern int quad_local_to_mp_bus_id [NR_CPUS/4][4]; -extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES]; - -extern unsigned int def_to_bigsmp; -extern unsigned int boot_cpu_physical_apicid; -extern int smp_found_config; -extern void find_smp_config (void); -extern void get_smp_config (void); -extern int nr_ioapics; -extern int apic_version [MAX_APICS]; -extern int mp_irq_entries; -extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES]; -extern int mpc_default_type; -extern unsigned long mp_lapic_addr; -extern int pic_mode; - -#ifdef CONFIG_ACPI -extern void mp_register_lapic (u8 id, u8 enabled); -extern void mp_register_lapic_address (u64 address); -extern void mp_register_ioapic (u8 id, u32 address, u32 gsi_base); -extern void mp_override_legacy_irq (u8 bus_irq, u8 polarity, u8 trigger, u32 gsi); -extern void mp_config_acpi_legacy_irqs (void); -extern int mp_register_gsi (u32 gsi, int edge_level, int active_high_low); -#endif /* CONFIG_ACPI */ - -#define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS) - -struct physid_mask -{ - unsigned long mask[PHYSID_ARRAY_SIZE]; -}; - -typedef struct physid_mask physid_mask_t; - -#define physid_set(physid, map) set_bit(physid, (map).mask) -#define physid_clear(physid, map) clear_bit(physid, (map).mask) -#define physid_isset(physid, map) test_bit(physid, (map).mask) -#define physid_test_and_set(physid, map) test_and_set_bit(physid, (map).mask) - -#define physids_and(dst, src1, src2) bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_APICS) -#define physids_or(dst, src1, src2) bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_APICS) -#define physids_clear(map) bitmap_zero((map).mask, MAX_APICS) -#define physids_complement(dst, src) bitmap_complement((dst).mask,(src).mask, MAX_APICS) -#define physids_empty(map) bitmap_empty((map).mask, MAX_APICS) -#define physids_equal(map1, map2) bitmap_equal((map1).mask, (map2).mask, MAX_APICS) -#define physids_weight(map) bitmap_weight((map).mask, MAX_APICS) -#define physids_shift_right(d, s, n) bitmap_shift_right((d).mask, (s).mask, n, MAX_APICS) -#define physids_shift_left(d, s, n) bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS) -#define physids_coerce(map) ((map).mask[0]) - -#define physids_promote(physids) \ - ({ \ - physid_mask_t __physid_mask = PHYSID_MASK_NONE; \ - __physid_mask.mask[0] = physids; \ - __physid_mask; \ - }) - -#define physid_mask_of_physid(physid) \ - ({ \ - physid_mask_t __physid_mask = PHYSID_MASK_NONE; \ - physid_set(physid, __physid_mask); \ - __physid_mask; \ - }) - -#define PHYSID_MASK_ALL { {[0 ... PHYSID_ARRAY_SIZE-1] = ~0UL} } -#define PHYSID_MASK_NONE { {[0 ... PHYSID_ARRAY_SIZE-1] = 0UL} } - -extern physid_mask_t phys_cpu_present_map; - -#endif - diff --git a/include/asm-x86/mpspec_64.h b/include/asm-x86/mpspec_64.h deleted file mode 100644 index 017fddb61dc5..000000000000 --- a/include/asm-x86/mpspec_64.h +++ /dev/null @@ -1,233 +0,0 @@ -#ifndef __ASM_MPSPEC_H -#define __ASM_MPSPEC_H - -/* - * Structure definitions for SMP machines following the - * Intel Multiprocessing Specification 1.1 and 1.4. - */ - -/* - * This tag identifies where the SMP configuration - * information is. - */ - -#define SMP_MAGIC_IDENT (('_'<<24)|('P'<<16)|('M'<<8)|'_') - -/* - * A maximum of 255 APICs with the current APIC ID architecture. - */ -#define MAX_APICS 255 - -struct intel_mp_floating -{ - char mpf_signature[4]; /* "_MP_" */ - unsigned int mpf_physptr; /* Configuration table address */ - unsigned char mpf_length; /* Our length (paragraphs) */ - unsigned char mpf_specification;/* Specification version */ - unsigned char mpf_checksum; /* Checksum (makes sum 0) */ - unsigned char mpf_feature1; /* Standard or configuration ? */ - unsigned char mpf_feature2; /* Bit7 set for IMCR|PIC */ - unsigned char mpf_feature3; /* Unused (0) */ - unsigned char mpf_feature4; /* Unused (0) */ - unsigned char mpf_feature5; /* Unused (0) */ -}; - -struct mp_config_table -{ - char mpc_signature[4]; -#define MPC_SIGNATURE "PCMP" - unsigned short mpc_length; /* Size of table */ - char mpc_spec; /* 0x01 */ - char mpc_checksum; - char mpc_oem[8]; - char mpc_productid[12]; - unsigned int mpc_oemptr; /* 0 if not present */ - unsigned short mpc_oemsize; /* 0 if not present */ - unsigned short mpc_oemcount; - unsigned int mpc_lapic; /* APIC address */ - unsigned int reserved; -}; - -/* Followed by entries */ - -#define MP_PROCESSOR 0 -#define MP_BUS 1 -#define MP_IOAPIC 2 -#define MP_INTSRC 3 -#define MP_LINTSRC 4 - -struct mpc_config_processor -{ - unsigned char mpc_type; - unsigned char mpc_apicid; /* Local APIC number */ - unsigned char mpc_apicver; /* Its versions */ - unsigned char mpc_cpuflag; -#define CPU_ENABLED 1 /* Processor is available */ -#define CPU_BOOTPROCESSOR 2 /* Processor is the BP */ - unsigned int mpc_cpufeature; -#define CPU_STEPPING_MASK 0x0F -#define CPU_MODEL_MASK 0xF0 -#define CPU_FAMILY_MASK 0xF00 - unsigned int mpc_featureflag; /* CPUID feature value */ - unsigned int mpc_reserved[2]; -}; - -struct mpc_config_bus -{ - unsigned char mpc_type; - unsigned char mpc_busid; - unsigned char mpc_bustype[6]; -}; - -/* List of Bus Type string values, Intel MP Spec. */ -#define BUSTYPE_EISA "EISA" -#define BUSTYPE_ISA "ISA" -#define BUSTYPE_INTERN "INTERN" /* Internal BUS */ -#define BUSTYPE_MCA "MCA" -#define BUSTYPE_VL "VL" /* Local bus */ -#define BUSTYPE_PCI "PCI" -#define BUSTYPE_PCMCIA "PCMCIA" -#define BUSTYPE_CBUS "CBUS" -#define BUSTYPE_CBUSII "CBUSII" -#define BUSTYPE_FUTURE "FUTURE" -#define BUSTYPE_MBI "MBI" -#define BUSTYPE_MBII "MBII" -#define BUSTYPE_MPI "MPI" -#define BUSTYPE_MPSA "MPSA" -#define BUSTYPE_NUBUS "NUBUS" -#define BUSTYPE_TC "TC" -#define BUSTYPE_VME "VME" -#define BUSTYPE_XPRESS "XPRESS" - -struct mpc_config_ioapic -{ - unsigned char mpc_type; - unsigned char mpc_apicid; - unsigned char mpc_apicver; - unsigned char mpc_flags; -#define MPC_APIC_USABLE 0x01 - unsigned int mpc_apicaddr; -}; - -struct mpc_config_intsrc -{ - unsigned char mpc_type; - unsigned char mpc_irqtype; - unsigned short mpc_irqflag; - unsigned char mpc_srcbus; - unsigned char mpc_srcbusirq; - unsigned char mpc_dstapic; - unsigned char mpc_dstirq; -}; - -enum mp_irq_source_types { - mp_INT = 0, - mp_NMI = 1, - mp_SMI = 2, - mp_ExtINT = 3 -}; - -#define MP_IRQDIR_DEFAULT 0 -#define MP_IRQDIR_HIGH 1 -#define MP_IRQDIR_LOW 3 - - -struct mpc_config_lintsrc -{ - unsigned char mpc_type; - unsigned char mpc_irqtype; - unsigned short mpc_irqflag; - unsigned char mpc_srcbusid; - unsigned char mpc_srcbusirq; - unsigned char mpc_destapic; -#define MP_APIC_ALL 0xFF - unsigned char mpc_destapiclint; -}; - -/* - * Default configurations - * - * 1 2 CPU ISA 82489DX - * 2 2 CPU EISA 82489DX neither IRQ 0 timer nor IRQ 13 DMA chaining - * 3 2 CPU EISA 82489DX - * 4 2 CPU MCA 82489DX - * 5 2 CPU ISA+PCI - * 6 2 CPU EISA+PCI - * 7 2 CPU MCA+PCI - */ - -#define MAX_MP_BUSSES 256 -/* Each PCI slot may be a combo card with its own bus. 4 IRQ pins per slot. */ -#define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4) -extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); -extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES]; - -extern unsigned int boot_cpu_physical_apicid; -extern int smp_found_config; -extern void find_smp_config (void); -extern void get_smp_config (void); -extern int nr_ioapics; -extern unsigned char apic_version [MAX_APICS]; -extern int mp_irq_entries; -extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES]; -extern int mpc_default_type; -extern unsigned long mp_lapic_addr; - -#ifdef CONFIG_ACPI -extern void mp_register_lapic (u8 id, u8 enabled); -extern void mp_register_lapic_address (u64 address); - -extern void mp_register_ioapic (u8 id, u32 address, u32 gsi_base); -extern void mp_override_legacy_irq (u8 bus_irq, u8 polarity, u8 trigger, u32 gsi); -extern void mp_config_acpi_legacy_irqs (void); -extern int mp_register_gsi (u32 gsi, int triggering, int polarity); -#endif - -extern int using_apic_timer; - -#define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS) - -struct physid_mask -{ - unsigned long mask[PHYSID_ARRAY_SIZE]; -}; - -typedef struct physid_mask physid_mask_t; - -#define physid_set(physid, map) set_bit(physid, (map).mask) -#define physid_clear(physid, map) clear_bit(physid, (map).mask) -#define physid_isset(physid, map) test_bit(physid, (map).mask) -#define physid_test_and_set(physid, map) test_and_set_bit(physid, (map).mask) - -#define physids_and(dst, src1, src2) bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_APICS) -#define physids_or(dst, src1, src2) bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_APICS) -#define physids_clear(map) bitmap_zero((map).mask, MAX_APICS) -#define physids_complement(dst, src) bitmap_complement((dst).mask, (src).mask, MAX_APICS) -#define physids_empty(map) bitmap_empty((map).mask, MAX_APICS) -#define physids_equal(map1, map2) bitmap_equal((map1).mask, (map2).mask, MAX_APICS) -#define physids_weight(map) bitmap_weight((map).mask, MAX_APICS) -#define physids_shift_right(d, s, n) bitmap_shift_right((d).mask, (s).mask, n, MAX_APICS) -#define physids_shift_left(d, s, n) bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS) -#define physids_coerce(map) ((map).mask[0]) - -#define physids_promote(physids) \ - ({ \ - physid_mask_t __physid_mask = PHYSID_MASK_NONE; \ - __physid_mask.mask[0] = physids; \ - __physid_mask; \ - }) - -#define physid_mask_of_physid(physid) \ - ({ \ - physid_mask_t __physid_mask = PHYSID_MASK_NONE; \ - physid_set(physid, __physid_mask); \ - __physid_mask; \ - }) - -#define PHYSID_MASK_ALL { {[0 ... PHYSID_ARRAY_SIZE-1] = ~0UL} } -#define PHYSID_MASK_NONE { {[0 ... PHYSID_ARRAY_SIZE-1] = 0UL} } - -extern physid_mask_t phys_cpu_present_map; - -#endif - diff --git a/include/asm-x86/mpspec_def.h b/include/asm-x86/mpspec_def.h index 13bafb16e7af..3504617fe648 100644 --- a/include/asm-x86/mpspec_def.h +++ b/include/asm-x86/mpspec_def.h @@ -8,52 +8,68 @@ /* * This tag identifies where the SMP configuration - * information is. + * information is. */ - + #define SMP_MAGIC_IDENT (('_'<<24)|('P'<<16)|('M'<<8)|'_') -#define MAX_MPC_ENTRY 1024 -#define MAX_APICS 256 +#ifdef CONFIG_X86_32 +# define MAX_MPC_ENTRY 1024 +# define MAX_APICS 256 +#else +/* + * A maximum of 255 APICs with the current APIC ID architecture. + */ +# define MAX_APICS 255 +#endif struct intel_mp_floating { - char mpf_signature[4]; /* "_MP_" */ - unsigned long mpf_physptr; /* Configuration table address */ + char mpf_signature[4]; /* "_MP_" */ + unsigned int mpf_physptr; /* Configuration table address */ unsigned char mpf_length; /* Our length (paragraphs) */ unsigned char mpf_specification;/* Specification version */ unsigned char mpf_checksum; /* Checksum (makes sum 0) */ - unsigned char mpf_feature1; /* Standard or configuration ? */ + unsigned char mpf_feature1; /* Standard or configuration ? */ unsigned char mpf_feature2; /* Bit7 set for IMCR|PIC */ unsigned char mpf_feature3; /* Unused (0) */ unsigned char mpf_feature4; /* Unused (0) */ unsigned char mpf_feature5; /* Unused (0) */ }; +#define MPC_SIGNATURE "PCMP" + struct mp_config_table { char mpc_signature[4]; -#define MPC_SIGNATURE "PCMP" unsigned short mpc_length; /* Size of table */ char mpc_spec; /* 0x01 */ char mpc_checksum; char mpc_oem[8]; char mpc_productid[12]; - unsigned long mpc_oemptr; /* 0 if not present */ + unsigned int mpc_oemptr; /* 0 if not present */ unsigned short mpc_oemsize; /* 0 if not present */ unsigned short mpc_oemcount; - unsigned long mpc_lapic; /* APIC address */ - unsigned long reserved; + unsigned int mpc_lapic; /* APIC address */ + unsigned int reserved; }; /* Followed by entries */ -#define MP_PROCESSOR 0 -#define MP_BUS 1 -#define MP_IOAPIC 2 -#define MP_INTSRC 3 -#define MP_LINTSRC 4 -#define MP_TRANSLATION 192 /* Used by IBM NUMA-Q to describe node locality */ +#define MP_PROCESSOR 0 +#define MP_BUS 1 +#define MP_IOAPIC 2 +#define MP_INTSRC 3 +#define MP_LINTSRC 4 +/* Used by IBM NUMA-Q to describe node locality */ +#define MP_TRANSLATION 192 + +#define CPU_ENABLED 1 /* Processor is available */ +#define CPU_BOOTPROCESSOR 2 /* Processor is the BP */ + +#define CPU_STEPPING_MASK 0x000F +#define CPU_MODEL_MASK 0x00F0 +#define CPU_FAMILY_MASK 0x0F00 struct mpc_config_processor { @@ -61,14 +77,9 @@ struct mpc_config_processor unsigned char mpc_apicid; /* Local APIC number */ unsigned char mpc_apicver; /* Its versions */ unsigned char mpc_cpuflag; -#define CPU_ENABLED 1 /* Processor is available */ -#define CPU_BOOTPROCESSOR 2 /* Processor is the BP */ - unsigned long mpc_cpufeature; -#define CPU_STEPPING_MASK 0x0F -#define CPU_MODEL_MASK 0xF0 -#define CPU_FAMILY_MASK 0xF00 - unsigned long mpc_featureflag; /* CPUID feature value */ - unsigned long mpc_reserved[2]; + unsigned int mpc_cpufeature; + unsigned int mpc_featureflag; /* CPUID feature value */ + unsigned int mpc_reserved[2]; }; struct mpc_config_bus @@ -98,14 +109,15 @@ struct mpc_config_bus #define BUSTYPE_VME "VME" #define BUSTYPE_XPRESS "XPRESS" +#define MPC_APIC_USABLE 0x01 + struct mpc_config_ioapic { unsigned char mpc_type; unsigned char mpc_apicid; unsigned char mpc_apicver; unsigned char mpc_flags; -#define MPC_APIC_USABLE 0x01 - unsigned long mpc_apicaddr; + unsigned int mpc_apicaddr; }; struct mpc_config_intsrc @@ -130,6 +142,7 @@ enum mp_irq_source_types { #define MP_IRQDIR_HIGH 1 #define MP_IRQDIR_LOW 3 +#define MP_APIC_ALL 0xFF struct mpc_config_lintsrc { @@ -138,15 +151,15 @@ struct mpc_config_lintsrc unsigned short mpc_irqflag; unsigned char mpc_srcbusid; unsigned char mpc_srcbusirq; - unsigned char mpc_destapic; -#define MP_APIC_ALL 0xFF + unsigned char mpc_destapic; unsigned char mpc_destapiclint; }; +#define MPC_OEM_SIGNATURE "_OEM" + struct mp_config_oemtable { char oem_signature[4]; -#define MPC_OEM_SIGNATURE "_OEM" unsigned short oem_length; /* Size of table */ char oem_rev; /* 0x01 */ char oem_checksum; @@ -155,13 +168,13 @@ struct mp_config_oemtable struct mpc_config_translation { - unsigned char mpc_type; - unsigned char trans_len; - unsigned char trans_type; - unsigned char trans_quad; - unsigned char trans_global; - unsigned char trans_local; - unsigned short trans_reserved; + unsigned char mpc_type; + unsigned char trans_len; + unsigned char trans_type; + unsigned char trans_quad; + unsigned char trans_global; + unsigned char trans_local; + unsigned short trans_reserved; }; /* diff --git a/include/asm-x86/msr-index.h b/include/asm-x86/msr-index.h index a4944732be04..fae118a25278 100644 --- a/include/asm-x86/msr-index.h +++ b/include/asm-x86/msr-index.h @@ -63,6 +63,13 @@ #define MSR_IA32_LASTINTFROMIP 0x000001dd #define MSR_IA32_LASTINTTOIP 0x000001de +/* DEBUGCTLMSR bits (others vary by model): */ +#define _DEBUGCTLMSR_LBR 0 /* last branch recording */ +#define _DEBUGCTLMSR_BTF 1 /* single-step on branches */ + +#define DEBUGCTLMSR_LBR (1UL << _DEBUGCTLMSR_LBR) +#define DEBUGCTLMSR_BTF (1UL << _DEBUGCTLMSR_BTF) + #define MSR_IA32_MC0_CTL 0x00000400 #define MSR_IA32_MC0_STATUS 0x00000401 #define MSR_IA32_MC0_ADDR 0x00000402 @@ -88,6 +95,14 @@ #define MSR_AMD64_IBSDCPHYSAD 0xc0011039 #define MSR_AMD64_IBSCTL 0xc001103a +/* Fam 10h MSRs */ +#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 +#define FAM10H_MMIO_CONF_ENABLE (1<<0) +#define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf +#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2 +#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffff +#define FAM10H_MMIO_CONF_BASE_SHIFT 20 + /* K8 MSRs */ #define MSR_K8_TOP_MEM1 0xc001001a #define MSR_K8_TOP_MEM2 0xc001001d diff --git a/include/asm-x86/msr.h b/include/asm-x86/msr.h index 80b027081b3c..204a8a30fecf 100644 --- a/include/asm-x86/msr.h +++ b/include/asm-x86/msr.h @@ -7,77 +7,109 @@ # include <linux/types.h> #endif -#ifdef __i386__ - #ifdef __KERNEL__ #ifndef __ASSEMBLY__ +#include <asm/asm.h> #include <asm/errno.h> +static inline unsigned long long native_read_tscp(unsigned int *aux) +{ + unsigned long low, high; + asm volatile (".byte 0x0f,0x01,0xf9" + : "=a" (low), "=d" (high), "=c" (*aux)); + return low | ((u64)high >> 32); +} + +/* + * i386 calling convention returns 64-bit value in edx:eax, while + * x86_64 returns at rax. Also, the "A" constraint does not really + * mean rdx:rax in x86_64, so we need specialized behaviour for each + * architecture + */ +#ifdef CONFIG_X86_64 +#define DECLARE_ARGS(val, low, high) unsigned low, high +#define EAX_EDX_VAL(val, low, high) (low | ((u64)(high) << 32)) +#define EAX_EDX_ARGS(val, low, high) "a" (low), "d" (high) +#define EAX_EDX_RET(val, low, high) "=a" (low), "=d" (high) +#else +#define DECLARE_ARGS(val, low, high) unsigned long long val +#define EAX_EDX_VAL(val, low, high) (val) +#define EAX_EDX_ARGS(val, low, high) "A" (val) +#define EAX_EDX_RET(val, low, high) "=A" (val) +#endif + static inline unsigned long long native_read_msr(unsigned int msr) { - unsigned long long val; + DECLARE_ARGS(val, low, high); - asm volatile("rdmsr" : "=A" (val) : "c" (msr)); - return val; + asm volatile("rdmsr" : EAX_EDX_RET(val, low, high) : "c" (msr)); + return EAX_EDX_VAL(val, low, high); } static inline unsigned long long native_read_msr_safe(unsigned int msr, int *err) { - unsigned long long val; + DECLARE_ARGS(val, low, high); - asm volatile("2: rdmsr ; xorl %0,%0\n" + asm volatile("2: rdmsr ; xor %0,%0\n" "1:\n\t" ".section .fixup,\"ax\"\n\t" - "3: movl %3,%0 ; jmp 1b\n\t" + "3: mov %3,%0 ; jmp 1b\n\t" ".previous\n\t" ".section __ex_table,\"a\"\n" - " .align 4\n\t" - " .long 2b,3b\n\t" + _ASM_ALIGN "\n\t" + _ASM_PTR " 2b,3b\n\t" ".previous" - : "=r" (*err), "=A" (val) + : "=r" (*err), EAX_EDX_RET(val, low, high) : "c" (msr), "i" (-EFAULT)); - - return val; + return EAX_EDX_VAL(val, low, high); } -static inline void native_write_msr(unsigned int msr, unsigned long long val) +static inline void native_write_msr(unsigned int msr, + unsigned low, unsigned high) { - asm volatile("wrmsr" : : "c" (msr), "A"(val)); + asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high)); } static inline int native_write_msr_safe(unsigned int msr, - unsigned long long val) + unsigned low, unsigned high) { int err; - asm volatile("2: wrmsr ; xorl %0,%0\n" + asm volatile("2: wrmsr ; xor %0,%0\n" "1:\n\t" ".section .fixup,\"ax\"\n\t" - "3: movl %4,%0 ; jmp 1b\n\t" + "3: mov %4,%0 ; jmp 1b\n\t" ".previous\n\t" ".section __ex_table,\"a\"\n" - " .align 4\n\t" - " .long 2b,3b\n\t" + _ASM_ALIGN "\n\t" + _ASM_PTR " 2b,3b\n\t" ".previous" : "=a" (err) - : "c" (msr), "0" ((u32)val), "d" ((u32)(val>>32)), + : "c" (msr), "0" (low), "d" (high), "i" (-EFAULT)); return err; } -static inline unsigned long long native_read_tsc(void) +extern unsigned long long native_read_tsc(void); + +static __always_inline unsigned long long __native_read_tsc(void) { - unsigned long long val; - asm volatile("rdtsc" : "=A" (val)); - return val; + DECLARE_ARGS(val, low, high); + + rdtsc_barrier(); + asm volatile("rdtsc" : EAX_EDX_RET(val, low, high)); + rdtsc_barrier(); + + return EAX_EDX_VAL(val, low, high); } -static inline unsigned long long native_read_pmc(void) +static inline unsigned long long native_read_pmc(int counter) { - unsigned long long val; - asm volatile("rdpmc" : "=A" (val)); - return val; + DECLARE_ARGS(val, low, high); + + asm volatile("rdpmc" : EAX_EDX_RET(val, low, high) : "c" (counter)); + return EAX_EDX_VAL(val, low, high); } #ifdef CONFIG_PARAVIRT @@ -97,20 +129,21 @@ static inline unsigned long long native_read_pmc(void) (val2) = (u32)(__val >> 32); \ } while(0) -static inline void wrmsr(u32 __msr, u32 __low, u32 __high) +static inline void wrmsr(unsigned msr, unsigned low, unsigned high) { - native_write_msr(__msr, ((u64)__high << 32) | __low); + native_write_msr(msr, low, high); } #define rdmsrl(msr,val) \ ((val) = native_read_msr(msr)) -#define wrmsrl(msr,val) native_write_msr(msr, val) +#define wrmsrl(msr, val) \ + native_write_msr(msr, (u32)((u64)(val)), (u32)((u64)(val) >> 32)) /* wrmsr with exception handling */ -static inline int wrmsr_safe(u32 __msr, u32 __low, u32 __high) +static inline int wrmsr_safe(unsigned msr, unsigned low, unsigned high) { - return native_write_msr_safe(__msr, ((u64)__high << 32) | __low); + return native_write_msr_safe(msr, low, high); } /* rdmsr with exception handling */ @@ -129,204 +162,31 @@ static inline int wrmsr_safe(u32 __msr, u32 __low, u32 __high) #define rdtscll(val) \ ((val) = native_read_tsc()) -#define write_tsc(val1,val2) wrmsr(0x10, val1, val2) - #define rdpmc(counter,low,high) \ do { \ - u64 _l = native_read_pmc(); \ + u64 _l = native_read_pmc(counter); \ (low) = (u32)_l; \ (high) = (u32)(_l >> 32); \ } while(0) -#endif /* !CONFIG_PARAVIRT */ - -#ifdef CONFIG_SMP -void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); -void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); -int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); -int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); -#else /* CONFIG_SMP */ -static inline void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) -{ - rdmsr(msr_no, *l, *h); -} -static inline void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) -{ - wrmsr(msr_no, l, h); -} -static inline int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) -{ - return rdmsr_safe(msr_no, l, h); -} -static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) -{ - return wrmsr_safe(msr_no, l, h); -} -#endif /* CONFIG_SMP */ -#endif /* ! __ASSEMBLY__ */ -#endif /* __KERNEL__ */ - -#else /* __i386__ */ - -#ifndef __ASSEMBLY__ -#include <linux/errno.h> -/* - * Access to machine-specific registers (available on 586 and better only) - * Note: the rd* operations modify the parameters directly (without using - * pointer indirection), this allows gcc to optimize better - */ - -#define rdmsr(msr,val1,val2) \ - __asm__ __volatile__("rdmsr" \ - : "=a" (val1), "=d" (val2) \ - : "c" (msr)) - - -#define rdmsrl(msr,val) do { unsigned long a__,b__; \ - __asm__ __volatile__("rdmsr" \ - : "=a" (a__), "=d" (b__) \ - : "c" (msr)); \ - val = a__ | (b__<<32); \ -} while(0) - -#define wrmsr(msr,val1,val2) \ - __asm__ __volatile__("wrmsr" \ - : /* no outputs */ \ - : "c" (msr), "a" (val1), "d" (val2)) - -#define wrmsrl(msr,val) wrmsr(msr,(__u32)((__u64)(val)),((__u64)(val))>>32) -#define rdtsc(low,high) \ - __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high)) +#define rdtscp(low, high, aux) \ + do { \ + unsigned long long _val = native_read_tscp(&(aux)); \ + (low) = (u32)_val; \ + (high) = (u32)(_val >> 32); \ + } while (0) -#define rdtscl(low) \ - __asm__ __volatile__ ("rdtsc" : "=a" (low) : : "edx") +#define rdtscpll(val, aux) (val) = native_read_tscp(&(aux)) -#define rdtscp(low,high,aux) \ - __asm__ __volatile__ (".byte 0x0f,0x01,0xf9" : "=a" (low), "=d" (high), "=c" (aux)) +#endif /* !CONFIG_PARAVIRT */ -#define rdtscll(val) do { \ - unsigned int __a,__d; \ - __asm__ __volatile__("rdtsc" : "=a" (__a), "=d" (__d)); \ - (val) = ((unsigned long)__a) | (((unsigned long)__d)<<32); \ -} while(0) -#define rdtscpll(val, aux) do { \ - unsigned long __a, __d; \ - __asm__ __volatile__ (".byte 0x0f,0x01,0xf9" : "=a" (__a), "=d" (__d), "=c" (aux)); \ - (val) = (__d << 32) | __a; \ -} while (0) +#define checking_wrmsrl(msr,val) wrmsr_safe(msr,(u32)(val),(u32)((val)>>32)) #define write_tsc(val1,val2) wrmsr(0x10, val1, val2) #define write_rdtscp_aux(val) wrmsr(0xc0000103, val, 0) -#define rdpmc(counter,low,high) \ - __asm__ __volatile__("rdpmc" \ - : "=a" (low), "=d" (high) \ - : "c" (counter)) - - -static inline void cpuid(int op, unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - __asm__("cpuid" - : "=a" (*eax), - "=b" (*ebx), - "=c" (*ecx), - "=d" (*edx) - : "0" (op)); -} - -/* Some CPUID calls want 'count' to be placed in ecx */ -static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, - int *edx) -{ - __asm__("cpuid" - : "=a" (*eax), - "=b" (*ebx), - "=c" (*ecx), - "=d" (*edx) - : "0" (op), "c" (count)); -} - -/* - * CPUID functions returning a single datum - */ -static inline unsigned int cpuid_eax(unsigned int op) -{ - unsigned int eax; - - __asm__("cpuid" - : "=a" (eax) - : "0" (op) - : "bx", "cx", "dx"); - return eax; -} -static inline unsigned int cpuid_ebx(unsigned int op) -{ - unsigned int eax, ebx; - - __asm__("cpuid" - : "=a" (eax), "=b" (ebx) - : "0" (op) - : "cx", "dx" ); - return ebx; -} -static inline unsigned int cpuid_ecx(unsigned int op) -{ - unsigned int eax, ecx; - - __asm__("cpuid" - : "=a" (eax), "=c" (ecx) - : "0" (op) - : "bx", "dx" ); - return ecx; -} -static inline unsigned int cpuid_edx(unsigned int op) -{ - unsigned int eax, edx; - - __asm__("cpuid" - : "=a" (eax), "=d" (edx) - : "0" (op) - : "bx", "cx"); - return edx; -} - -#ifdef __KERNEL__ - -/* wrmsr with exception handling */ -#define wrmsr_safe(msr,a,b) ({ int ret__; \ - asm volatile("2: wrmsr ; xorl %0,%0\n" \ - "1:\n\t" \ - ".section .fixup,\"ax\"\n\t" \ - "3: movl %4,%0 ; jmp 1b\n\t" \ - ".previous\n\t" \ - ".section __ex_table,\"a\"\n" \ - " .align 8\n\t" \ - " .quad 2b,3b\n\t" \ - ".previous" \ - : "=a" (ret__) \ - : "c" (msr), "0" (a), "d" (b), "i" (-EFAULT)); \ - ret__; }) - -#define checking_wrmsrl(msr,val) wrmsr_safe(msr,(u32)(val),(u32)((val)>>32)) - -#define rdmsr_safe(msr,a,b) \ - ({ int ret__; \ - asm volatile ("1: rdmsr\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movl %4,%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 8\n" \ - " .quad 1b,3b\n" \ - ".previous":"=&bDS" (ret__), "=a"(*(a)), "=d"(*(b)) \ - :"c"(msr), "i"(-EIO), "0"(0)); \ - ret__; }) - #ifdef CONFIG_SMP void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); @@ -350,9 +210,8 @@ static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) return wrmsr_safe(msr_no, l, h); } #endif /* CONFIG_SMP */ -#endif /* __KERNEL__ */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLY__ */ +#endif /* __KERNEL__ */ -#endif /* !__i386__ */ #endif diff --git a/include/asm-x86/mtrr.h b/include/asm-x86/mtrr.h index e8320e4e6ca2..319d065800be 100644 --- a/include/asm-x86/mtrr.h +++ b/include/asm-x86/mtrr.h @@ -89,24 +89,25 @@ struct mtrr_gentry extern void mtrr_save_fixed_ranges(void *); extern void mtrr_save_state(void); extern int mtrr_add (unsigned long base, unsigned long size, - unsigned int type, char increment); + unsigned int type, bool increment); extern int mtrr_add_page (unsigned long base, unsigned long size, - unsigned int type, char increment); + unsigned int type, bool increment); extern int mtrr_del (int reg, unsigned long base, unsigned long size); extern int mtrr_del_page (int reg, unsigned long base, unsigned long size); extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi); extern void mtrr_ap_init(void); extern void mtrr_bp_init(void); +extern int mtrr_trim_uncached_memory(unsigned long end_pfn); # else #define mtrr_save_fixed_ranges(arg) do {} while (0) #define mtrr_save_state() do {} while (0) static __inline__ int mtrr_add (unsigned long base, unsigned long size, - unsigned int type, char increment) + unsigned int type, bool increment) { return -ENODEV; } static __inline__ int mtrr_add_page (unsigned long base, unsigned long size, - unsigned int type, char increment) + unsigned int type, bool increment) { return -ENODEV; } @@ -120,7 +121,10 @@ static __inline__ int mtrr_del_page (int reg, unsigned long base, { return -ENODEV; } - +static inline int mtrr_trim_uncached_memory(unsigned long end_pfn) +{ + return 0; +} static __inline__ void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) {;} #define mtrr_ap_init() do {} while (0) diff --git a/include/asm-x86/mutex_32.h b/include/asm-x86/mutex_32.h index 7a17d9e58ad6..bbeefb96ddfd 100644 --- a/include/asm-x86/mutex_32.h +++ b/include/asm-x86/mutex_32.h @@ -26,7 +26,7 @@ do { \ unsigned int dummy; \ \ typecheck(atomic_t *, count); \ - typecheck_fn(fastcall void (*)(atomic_t *), fail_fn); \ + typecheck_fn(void (*)(atomic_t *), fail_fn); \ \ __asm__ __volatile__( \ LOCK_PREFIX " decl (%%eax) \n" \ @@ -51,8 +51,7 @@ do { \ * or anything the slow path function returns */ static inline int -__mutex_fastpath_lock_retval(atomic_t *count, - int fastcall (*fail_fn)(atomic_t *)) +__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *)) { if (unlikely(atomic_dec_return(count) < 0)) return fail_fn(count); @@ -78,7 +77,7 @@ do { \ unsigned int dummy; \ \ typecheck(atomic_t *, count); \ - typecheck_fn(fastcall void (*)(atomic_t *), fail_fn); \ + typecheck_fn(void (*)(atomic_t *), fail_fn); \ \ __asm__ __volatile__( \ LOCK_PREFIX " incl (%%eax) \n" \ diff --git a/include/asm-x86/nmi_32.h b/include/asm-x86/nmi_32.h index 70a958a8e381..7206c7e8a388 100644 --- a/include/asm-x86/nmi_32.h +++ b/include/asm-x86/nmi_32.h @@ -1,6 +1,3 @@ -/* - * linux/include/asm-i386/nmi.h - */ #ifndef ASM_NMI_H #define ASM_NMI_H diff --git a/include/asm-x86/nmi_64.h b/include/asm-x86/nmi_64.h index 65b6acf3bb59..2eeb74e5f3ff 100644 --- a/include/asm-x86/nmi_64.h +++ b/include/asm-x86/nmi_64.h @@ -1,6 +1,3 @@ -/* - * linux/include/asm-i386/nmi.h - */ #ifndef ASM_NMI_H #define ASM_NMI_H @@ -41,7 +38,6 @@ extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); #define get_nmi_reason() inb(0x61) -extern int panic_on_timeout; extern int unknown_nmi_panic; extern int nmi_watchdog_enabled; @@ -60,7 +56,6 @@ extern void enable_timer_nmi_watchdog(void); extern int nmi_watchdog_tick (struct pt_regs * regs, unsigned reason); extern void nmi_watchdog_default(void); -extern int setup_nmi_watchdog(char *); extern atomic_t nmi_active; extern unsigned int nmi_watchdog; diff --git a/include/asm-x86/nops.h b/include/asm-x86/nops.h new file mode 100644 index 000000000000..fec025c7f58c --- /dev/null +++ b/include/asm-x86/nops.h @@ -0,0 +1,90 @@ +#ifndef _ASM_NOPS_H +#define _ASM_NOPS_H 1 + +/* Define nops for use with alternative() */ + +/* generic versions from gas */ +#define GENERIC_NOP1 ".byte 0x90\n" +#define GENERIC_NOP2 ".byte 0x89,0xf6\n" +#define GENERIC_NOP3 ".byte 0x8d,0x76,0x00\n" +#define GENERIC_NOP4 ".byte 0x8d,0x74,0x26,0x00\n" +#define GENERIC_NOP5 GENERIC_NOP1 GENERIC_NOP4 +#define GENERIC_NOP6 ".byte 0x8d,0xb6,0x00,0x00,0x00,0x00\n" +#define GENERIC_NOP7 ".byte 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00\n" +#define GENERIC_NOP8 GENERIC_NOP1 GENERIC_NOP7 + +/* Opteron 64bit nops */ +#define K8_NOP1 GENERIC_NOP1 +#define K8_NOP2 ".byte 0x66,0x90\n" +#define K8_NOP3 ".byte 0x66,0x66,0x90\n" +#define K8_NOP4 ".byte 0x66,0x66,0x66,0x90\n" +#define K8_NOP5 K8_NOP3 K8_NOP2 +#define K8_NOP6 K8_NOP3 K8_NOP3 +#define K8_NOP7 K8_NOP4 K8_NOP3 +#define K8_NOP8 K8_NOP4 K8_NOP4 + +/* K7 nops */ +/* uses eax dependencies (arbitary choice) */ +#define K7_NOP1 GENERIC_NOP1 +#define K7_NOP2 ".byte 0x8b,0xc0\n" +#define K7_NOP3 ".byte 0x8d,0x04,0x20\n" +#define K7_NOP4 ".byte 0x8d,0x44,0x20,0x00\n" +#define K7_NOP5 K7_NOP4 ASM_NOP1 +#define K7_NOP6 ".byte 0x8d,0x80,0,0,0,0\n" +#define K7_NOP7 ".byte 0x8D,0x04,0x05,0,0,0,0\n" +#define K7_NOP8 K7_NOP7 ASM_NOP1 + +/* P6 nops */ +/* uses eax dependencies (Intel-recommended choice) */ +#define P6_NOP1 GENERIC_NOP1 +#define P6_NOP2 ".byte 0x66,0x90\n" +#define P6_NOP3 ".byte 0x0f,0x1f,0x00\n" +#define P6_NOP4 ".byte 0x0f,0x1f,0x40,0\n" +#define P6_NOP5 ".byte 0x0f,0x1f,0x44,0x00,0\n" +#define P6_NOP6 ".byte 0x66,0x0f,0x1f,0x44,0x00,0\n" +#define P6_NOP7 ".byte 0x0f,0x1f,0x80,0,0,0,0\n" +#define P6_NOP8 ".byte 0x0f,0x1f,0x84,0x00,0,0,0,0\n" + +#if defined(CONFIG_MK8) +#define ASM_NOP1 K8_NOP1 +#define ASM_NOP2 K8_NOP2 +#define ASM_NOP3 K8_NOP3 +#define ASM_NOP4 K8_NOP4 +#define ASM_NOP5 K8_NOP5 +#define ASM_NOP6 K8_NOP6 +#define ASM_NOP7 K8_NOP7 +#define ASM_NOP8 K8_NOP8 +#elif defined(CONFIG_MK7) +#define ASM_NOP1 K7_NOP1 +#define ASM_NOP2 K7_NOP2 +#define ASM_NOP3 K7_NOP3 +#define ASM_NOP4 K7_NOP4 +#define ASM_NOP5 K7_NOP5 +#define ASM_NOP6 K7_NOP6 +#define ASM_NOP7 K7_NOP7 +#define ASM_NOP8 K7_NOP8 +#elif defined(CONFIG_M686) || defined(CONFIG_MPENTIUMII) || \ + defined(CONFIG_MPENTIUMIII) || defined(CONFIG_MPENTIUMM) || \ + defined(CONFIG_MCORE2) || defined(CONFIG_PENTIUM4) +#define ASM_NOP1 P6_NOP1 +#define ASM_NOP2 P6_NOP2 +#define ASM_NOP3 P6_NOP3 +#define ASM_NOP4 P6_NOP4 +#define ASM_NOP5 P6_NOP5 +#define ASM_NOP6 P6_NOP6 +#define ASM_NOP7 P6_NOP7 +#define ASM_NOP8 P6_NOP8 +#else +#define ASM_NOP1 GENERIC_NOP1 +#define ASM_NOP2 GENERIC_NOP2 +#define ASM_NOP3 GENERIC_NOP3 +#define ASM_NOP4 GENERIC_NOP4 +#define ASM_NOP5 GENERIC_NOP5 +#define ASM_NOP6 GENERIC_NOP6 +#define ASM_NOP7 GENERIC_NOP7 +#define ASM_NOP8 GENERIC_NOP8 +#endif + +#define ASM_NOP_MAX 8 + +#endif diff --git a/include/asm-x86/numa_32.h b/include/asm-x86/numa_32.h index 96fcb157db1d..03d0f7a9bf02 100644 --- a/include/asm-x86/numa_32.h +++ b/include/asm-x86/numa_32.h @@ -1,3 +1,15 @@ +#ifndef _ASM_X86_32_NUMA_H +#define _ASM_X86_32_NUMA_H 1 -int pxm_to_nid(int pxm); +extern int pxm_to_nid(int pxm); +#ifdef CONFIG_NUMA +extern void __init remap_numa_kva(void); +extern void set_highmem_pages_init(int); +#else +static inline void remap_numa_kva(void) +{ +} +#endif + +#endif /* _ASM_X86_32_NUMA_H */ diff --git a/include/asm-x86/numa_64.h b/include/asm-x86/numa_64.h index 0cc5c97a7fc9..15fe07cde586 100644 --- a/include/asm-x86/numa_64.h +++ b/include/asm-x86/numa_64.h @@ -20,13 +20,19 @@ extern void numa_set_node(int cpu, int node); extern void srat_reserve_add_area(int nodeid); extern int hotadd_percent; -extern unsigned char apicid_to_node[MAX_LOCAL_APIC]; +extern s16 apicid_to_node[MAX_LOCAL_APIC]; + +extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn); +extern unsigned long numa_free_all_bootmem(void); +extern void setup_node_bootmem(int nodeid, unsigned long start, + unsigned long end); + #ifdef CONFIG_NUMA extern void __init init_cpu_to_node(void); static inline void clear_node_cpumask(int cpu) { - clear_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]); + clear_bit(cpu, (unsigned long *)&node_to_cpumask_map[cpu_to_node(cpu)]); } #else @@ -34,6 +40,4 @@ static inline void clear_node_cpumask(int cpu) #define clear_node_cpumask(cpu) do {} while (0) #endif -#define NUMA_NO_NODE 0xff - #endif diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h index a757eb26141d..c8b30efeed85 100644 --- a/include/asm-x86/page.h +++ b/include/asm-x86/page.h @@ -1,13 +1,183 @@ +#ifndef _ASM_X86_PAGE_H +#define _ASM_X86_PAGE_H + +#include <linux/const.h> + +/* PAGE_SHIFT determines the page size */ +#define PAGE_SHIFT 12 +#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) + #ifdef __KERNEL__ -# ifdef CONFIG_X86_32 -# include "page_32.h" -# else -# include "page_64.h" -# endif + +#define PHYSICAL_PAGE_MASK (PAGE_MASK & __PHYSICAL_MASK) +#define PTE_MASK (_AT(long, PHYSICAL_PAGE_MASK)) + +#define LARGE_PAGE_SIZE (_AC(1,UL) << PMD_SHIFT) +#define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1)) + +#define HPAGE_SHIFT PMD_SHIFT +#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) +#define HPAGE_MASK (~(HPAGE_SIZE - 1)) +#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) + +/* to align the pointer to the (next) page boundary */ +#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) + +#define __PHYSICAL_MASK _AT(phys_addr_t, (_AC(1,ULL) << __PHYSICAL_MASK_SHIFT) - 1) +#define __VIRTUAL_MASK ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - 1) + +#ifndef __ASSEMBLY__ +#include <linux/types.h> +#endif + +#ifdef CONFIG_X86_64 +#include <asm/page_64.h> +#define max_pfn_mapped end_pfn_map #else -# ifdef __i386__ -# include "page_32.h" -# else -# include "page_64.h" -# endif +#include <asm/page_32.h> +#define max_pfn_mapped max_low_pfn +#endif /* CONFIG_X86_64 */ + +#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) + +#define VM_DATA_DEFAULT_FLAGS \ + (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \ + VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + + +#ifndef __ASSEMBLY__ + +extern int page_is_ram(unsigned long pagenr); + +struct page; + +static void inline clear_user_page(void *page, unsigned long vaddr, + struct page *pg) +{ + clear_page(page); +} + +static void inline copy_user_page(void *to, void *from, unsigned long vaddr, + struct page *topage) +{ + copy_page(to, from); +} + +#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ + alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) +#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE + +typedef struct { pgdval_t pgd; } pgd_t; +typedef struct { pgprotval_t pgprot; } pgprot_t; + +static inline pgd_t native_make_pgd(pgdval_t val) +{ + return (pgd_t) { val }; +} + +static inline pgdval_t native_pgd_val(pgd_t pgd) +{ + return pgd.pgd; +} + +#if PAGETABLE_LEVELS >= 3 +#if PAGETABLE_LEVELS == 4 +typedef struct { pudval_t pud; } pud_t; + +static inline pud_t native_make_pud(pmdval_t val) +{ + return (pud_t) { val }; +} + +static inline pudval_t native_pud_val(pud_t pud) +{ + return pud.pud; +} +#else /* PAGETABLE_LEVELS == 3 */ +#include <asm-generic/pgtable-nopud.h> + +static inline pudval_t native_pud_val(pud_t pud) +{ + return native_pgd_val(pud.pgd); +} +#endif /* PAGETABLE_LEVELS == 4 */ + +typedef struct { pmdval_t pmd; } pmd_t; + +static inline pmd_t native_make_pmd(pmdval_t val) +{ + return (pmd_t) { val }; +} + +static inline pmdval_t native_pmd_val(pmd_t pmd) +{ + return pmd.pmd; +} +#else /* PAGETABLE_LEVELS == 2 */ +#include <asm-generic/pgtable-nopmd.h> + +static inline pmdval_t native_pmd_val(pmd_t pmd) +{ + return native_pgd_val(pmd.pud.pgd); +} +#endif /* PAGETABLE_LEVELS >= 3 */ + +static inline pte_t native_make_pte(pteval_t val) +{ + return (pte_t) { .pte = val }; +} + +static inline pteval_t native_pte_val(pte_t pte) +{ + return pte.pte; +} + +#define pgprot_val(x) ((x).pgprot) +#define __pgprot(x) ((pgprot_t) { (x) } ) + +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else /* !CONFIG_PARAVIRT */ + +#define pgd_val(x) native_pgd_val(x) +#define __pgd(x) native_make_pgd(x) + +#ifndef __PAGETABLE_PUD_FOLDED +#define pud_val(x) native_pud_val(x) +#define __pud(x) native_make_pud(x) +#endif + +#ifndef __PAGETABLE_PMD_FOLDED +#define pmd_val(x) native_pmd_val(x) +#define __pmd(x) native_make_pmd(x) #endif + +#define pte_val(x) native_pte_val(x) +#define __pte(x) native_make_pte(x) + +#endif /* CONFIG_PARAVIRT */ + +#define __pa(x) __phys_addr((unsigned long)(x)) +/* __pa_symbol should be used for C visible symbols. + This seems to be the official gcc blessed way to do such arithmetic. */ +#define __pa_symbol(x) __pa(__phys_reloc_hide((unsigned long)(x))) + +#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) + +#define __boot_va(x) __va(x) +#define __boot_pa(x) __pa(x) + +#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) +#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) +#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) + +#endif /* __ASSEMBLY__ */ + +#include <asm-generic/memory_model.h> +#include <asm-generic/page.h> + +#define __HAVE_ARCH_GATE_AREA 1 + +#endif /* __KERNEL__ */ +#endif /* _ASM_X86_PAGE_H */ diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h index 80ecc66b6d86..a6fd10f230d2 100644 --- a/include/asm-x86/page_32.h +++ b/include/asm-x86/page_32.h @@ -1,206 +1,107 @@ -#ifndef _I386_PAGE_H -#define _I386_PAGE_H - -/* PAGE_SHIFT determines the page size */ -#define PAGE_SHIFT 12 -#define PAGE_SIZE (1UL << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) - -#define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1)) -#define LARGE_PAGE_SIZE (1UL << PMD_SHIFT) - -#ifdef __KERNEL__ -#ifndef __ASSEMBLY__ - -#ifdef CONFIG_X86_USE_3DNOW - -#include <asm/mmx.h> - -#define clear_page(page) mmx_clear_page((void *)(page)) -#define copy_page(to,from) mmx_copy_page(to,from) - -#else +#ifndef _ASM_X86_PAGE_32_H +#define _ASM_X86_PAGE_32_H /* - * On older X86 processors it's not a win to use MMX here it seems. - * Maybe the K6-III ? - */ - -#define clear_page(page) memset((void *)(page), 0, PAGE_SIZE) -#define copy_page(to,from) memcpy((void *)(to), (void *)(from), PAGE_SIZE) - -#endif - -#define clear_user_page(page, vaddr, pg) clear_page(page) -#define copy_user_page(to, from, vaddr, pg) copy_page(to, from) - -#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ - alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) -#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE - -/* - * These are used to make use of C type-checking.. + * This handles the memory map. + * + * A __PAGE_OFFSET of 0xC0000000 means that the kernel has + * a virtual address space of one gigabyte, which limits the + * amount of physical memory you can use to about 950MB. + * + * If you want more physical memory than this then see the CONFIG_HIGHMEM4G + * and CONFIG_HIGHMEM64G options in the kernel configuration. */ -extern int nx_enabled; +#define __PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) #ifdef CONFIG_X86_PAE -typedef struct { unsigned long pte_low, pte_high; } pte_t; -typedef struct { unsigned long long pmd; } pmd_t; -typedef struct { unsigned long long pgd; } pgd_t; -typedef struct { unsigned long long pgprot; } pgprot_t; +#define __PHYSICAL_MASK_SHIFT 36 +#define __VIRTUAL_MASK_SHIFT 32 +#define PAGETABLE_LEVELS 3 -static inline unsigned long long native_pgd_val(pgd_t pgd) -{ - return pgd.pgd; -} - -static inline unsigned long long native_pmd_val(pmd_t pmd) -{ - return pmd.pmd; -} - -static inline unsigned long long native_pte_val(pte_t pte) -{ - return pte.pte_low | ((unsigned long long)pte.pte_high << 32); -} - -static inline pgd_t native_make_pgd(unsigned long long val) -{ - return (pgd_t) { val }; -} - -static inline pmd_t native_make_pmd(unsigned long long val) -{ - return (pmd_t) { val }; -} - -static inline pte_t native_make_pte(unsigned long long val) -{ - return (pte_t) { .pte_low = val, .pte_high = (val >> 32) } ; -} - -#ifndef CONFIG_PARAVIRT -#define pmd_val(x) native_pmd_val(x) -#define __pmd(x) native_make_pmd(x) -#endif - -#define HPAGE_SHIFT 21 -#include <asm-generic/pgtable-nopud.h> +#ifndef __ASSEMBLY__ +typedef u64 pteval_t; +typedef u64 pmdval_t; +typedef u64 pudval_t; +typedef u64 pgdval_t; +typedef u64 pgprotval_t; +typedef u64 phys_addr_t; + +typedef union { + struct { + unsigned long pte_low, pte_high; + }; + pteval_t pte; +} pte_t; +#endif /* __ASSEMBLY__ + */ #else /* !CONFIG_X86_PAE */ -typedef struct { unsigned long pte_low; } pte_t; -typedef struct { unsigned long pgd; } pgd_t; -typedef struct { unsigned long pgprot; } pgprot_t; -#define boot_pte_t pte_t /* or would you rather have a typedef */ - -static inline unsigned long native_pgd_val(pgd_t pgd) -{ - return pgd.pgd; -} +#define __PHYSICAL_MASK_SHIFT 32 +#define __VIRTUAL_MASK_SHIFT 32 +#define PAGETABLE_LEVELS 2 -static inline unsigned long native_pte_val(pte_t pte) -{ - return pte.pte_low; -} - -static inline pgd_t native_make_pgd(unsigned long val) -{ - return (pgd_t) { val }; -} +#ifndef __ASSEMBLY__ +typedef unsigned long pteval_t; +typedef unsigned long pmdval_t; +typedef unsigned long pudval_t; +typedef unsigned long pgdval_t; +typedef unsigned long pgprotval_t; +typedef unsigned long phys_addr_t; -static inline pte_t native_make_pte(unsigned long val) -{ - return (pte_t) { .pte_low = val }; -} +typedef union { pteval_t pte, pte_low; } pte_t; +typedef pte_t boot_pte_t; -#define HPAGE_SHIFT 22 -#include <asm-generic/pgtable-nopmd.h> +#endif /* __ASSEMBLY__ */ #endif /* CONFIG_X86_PAE */ -#define PTE_MASK PAGE_MASK - #ifdef CONFIG_HUGETLB_PAGE -#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) -#define HPAGE_MASK (~(HPAGE_SIZE - 1)) -#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA #endif -#define pgprot_val(x) ((x).pgprot) -#define __pgprot(x) ((pgprot_t) { (x) } ) - -#ifndef CONFIG_PARAVIRT -#define pgd_val(x) native_pgd_val(x) -#define __pgd(x) native_make_pgd(x) -#define pte_val(x) native_pte_val(x) -#define __pte(x) native_make_pte(x) -#endif - -#endif /* !__ASSEMBLY__ */ - -/* to align the pointer to the (next) page boundary */ -#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) - -/* - * This handles the memory map.. We could make this a config - * option, but too many people screw it up, and too few need - * it. - * - * A __PAGE_OFFSET of 0xC0000000 means that the kernel has - * a virtual address space of one gigabyte, which limits the - * amount of physical memory you can use to about 950MB. - * - * If you want more physical memory than this then see the CONFIG_HIGHMEM4G - * and CONFIG_HIGHMEM64G options in the kernel configuration. - */ - #ifndef __ASSEMBLY__ +#define __phys_addr(x) ((x)-PAGE_OFFSET) +#define __phys_reloc_hide(x) RELOC_HIDE((x), 0) + +#ifdef CONFIG_FLATMEM +#define pfn_valid(pfn) ((pfn) < max_mapnr) +#endif /* CONFIG_FLATMEM */ -struct vm_area_struct; +extern int nx_enabled; /* * This much address space is reserved for vmalloc() and iomap() * as well as fixmap mappings. */ extern unsigned int __VMALLOC_RESERVE; - extern int sysctl_legacy_va_layout; -extern int page_is_ram(unsigned long pagenr); - -#endif /* __ASSEMBLY__ */ - -#ifdef __ASSEMBLY__ -#define __PAGE_OFFSET CONFIG_PAGE_OFFSET -#else -#define __PAGE_OFFSET ((unsigned long)CONFIG_PAGE_OFFSET) -#endif - - -#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) #define VMALLOC_RESERVE ((unsigned long)__VMALLOC_RESERVE) #define MAXMEM (-__PAGE_OFFSET-__VMALLOC_RESERVE) -#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) -/* __pa_symbol should be used for C visible symbols. - This seems to be the official gcc blessed way to do such arithmetic. */ -#define __pa_symbol(x) __pa(RELOC_HIDE((unsigned long)(x),0)) -#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) -#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) -#ifdef CONFIG_FLATMEM -#define pfn_valid(pfn) ((pfn) < max_mapnr) -#endif /* CONFIG_FLATMEM */ -#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) -#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) +#ifdef CONFIG_X86_USE_3DNOW +#include <asm/mmx.h> + +static inline void clear_page(void *page) +{ + mmx_clear_page(page); +} -#define VM_DATA_DEFAULT_FLAGS \ - (VM_READ | VM_WRITE | \ - ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +static inline void copy_page(void *to, void *from) +{ + mmx_copy_page(to, from); +} +#else /* !CONFIG_X86_USE_3DNOW */ +#include <linux/string.h> -#include <asm-generic/memory_model.h> -#include <asm-generic/page.h> +static inline void clear_page(void *page) +{ + memset(page, 0, PAGE_SIZE); +} -#define __HAVE_ARCH_GATE_AREA 1 -#endif /* __KERNEL__ */ +static inline void copy_page(void *to, void *from) +{ + memcpy(to, from, PAGE_SIZE); +} +#endif /* CONFIG_X86_3DNOW */ +#endif /* !__ASSEMBLY__ */ -#endif /* _I386_PAGE_H */ +#endif /* _ASM_X86_PAGE_32_H */ diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h index c3b52bcb171e..c1ac42d8707f 100644 --- a/include/asm-x86/page_64.h +++ b/include/asm-x86/page_64.h @@ -1,15 +1,9 @@ #ifndef _X86_64_PAGE_H #define _X86_64_PAGE_H -#include <linux/const.h> +#define PAGETABLE_LEVELS 4 -/* PAGE_SHIFT determines the page size */ -#define PAGE_SHIFT 12 -#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) -#define PHYSICAL_PAGE_MASK (~(PAGE_SIZE-1) & __PHYSICAL_MASK) - -#define THREAD_ORDER 1 +#define THREAD_ORDER 1 #define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) #define CURRENT_MASK (~(THREAD_SIZE-1)) @@ -29,54 +23,7 @@ #define MCE_STACK 5 #define N_EXCEPTION_STACKS 5 /* hw limit: 7 */ -#define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1)) -#define LARGE_PAGE_SIZE (_AC(1,UL) << PMD_SHIFT) - -#define HPAGE_SHIFT PMD_SHIFT -#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) -#define HPAGE_MASK (~(HPAGE_SIZE - 1)) -#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) - -#ifdef __KERNEL__ -#ifndef __ASSEMBLY__ - -extern unsigned long end_pfn; - -void clear_page(void *); -void copy_page(void *, void *); - -#define clear_user_page(page, vaddr, pg) clear_page(page) -#define copy_user_page(to, from, vaddr, pg) copy_page(to, from) - -#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ - alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) -#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE -/* - * These are used to make use of C type-checking.. - */ -typedef struct { unsigned long pte; } pte_t; -typedef struct { unsigned long pmd; } pmd_t; -typedef struct { unsigned long pud; } pud_t; -typedef struct { unsigned long pgd; } pgd_t; -#define PTE_MASK PHYSICAL_PAGE_MASK - -typedef struct { unsigned long pgprot; } pgprot_t; - -extern unsigned long phys_base; - -#define pte_val(x) ((x).pte) -#define pmd_val(x) ((x).pmd) -#define pud_val(x) ((x).pud) -#define pgd_val(x) ((x).pgd) -#define pgprot_val(x) ((x).pgprot) - -#define __pte(x) ((pte_t) { (x) } ) -#define __pmd(x) ((pmd_t) { (x) } ) -#define __pud(x) ((pud_t) { (x) } ) -#define __pgd(x) ((pgd_t) { (x) } ) -#define __pgprot(x) ((pgprot_t) { (x) } ) - -#endif /* !__ASSEMBLY__ */ +#define __PAGE_OFFSET _AC(0xffff810000000000, UL) #define __PHYSICAL_START CONFIG_PHYSICAL_START #define __KERNEL_ALIGN 0x200000 @@ -92,53 +39,44 @@ extern unsigned long phys_base; #define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START) #define __START_KERNEL_map _AC(0xffffffff80000000, UL) -#define __PAGE_OFFSET _AC(0xffff810000000000, UL) - -/* to align the pointer to the (next) page boundary */ -#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) /* See Documentation/x86_64/mm.txt for a description of the memory map. */ #define __PHYSICAL_MASK_SHIFT 46 -#define __PHYSICAL_MASK ((_AC(1,UL) << __PHYSICAL_MASK_SHIFT) - 1) #define __VIRTUAL_MASK_SHIFT 48 -#define __VIRTUAL_MASK ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - 1) #define KERNEL_TEXT_SIZE (40*1024*1024) #define KERNEL_TEXT_START _AC(0xffffffff80000000, UL) -#define PAGE_OFFSET __PAGE_OFFSET #ifndef __ASSEMBLY__ +void clear_page(void *page); +void copy_page(void *to, void *from); -#include <asm/bug.h> +extern unsigned long end_pfn; +extern unsigned long end_pfn_map; +extern unsigned long phys_base; extern unsigned long __phys_addr(unsigned long); +#define __phys_reloc_hide(x) (x) -#endif /* __ASSEMBLY__ */ - -#define __pa(x) __phys_addr((unsigned long)(x)) -#define __pa_symbol(x) __phys_addr((unsigned long)(x)) - -#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) -#define __boot_va(x) __va(x) -#define __boot_pa(x) __pa(x) -#ifdef CONFIG_FLATMEM -#define pfn_valid(pfn) ((pfn) < end_pfn) -#endif - -#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) -#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) -#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) +/* + * These are used to make use of C type-checking.. + */ +typedef unsigned long pteval_t; +typedef unsigned long pmdval_t; +typedef unsigned long pudval_t; +typedef unsigned long pgdval_t; +typedef unsigned long pgprotval_t; +typedef unsigned long phys_addr_t; -#define VM_DATA_DEFAULT_FLAGS \ - (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \ - VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +typedef struct { pteval_t pte; } pte_t; -#define __HAVE_ARCH_GATE_AREA 1 #define vmemmap ((struct page *)VMEMMAP_START) -#include <asm-generic/memory_model.h> -#include <asm-generic/page.h> +#endif /* !__ASSEMBLY__ */ + +#ifdef CONFIG_FLATMEM +#define pfn_valid(pfn) ((pfn) < end_pfn) +#endif -#endif /* __KERNEL__ */ #endif /* _X86_64_PAGE_H */ diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h index f59d370c5df4..d6236eb46466 100644 --- a/include/asm-x86/paravirt.h +++ b/include/asm-x86/paravirt.h @@ -5,22 +5,37 @@ #ifdef CONFIG_PARAVIRT #include <asm/page.h> +#include <asm/asm.h> /* Bitmask of what can be clobbered: usually at least eax. */ -#define CLBR_NONE 0x0 -#define CLBR_EAX 0x1 -#define CLBR_ECX 0x2 -#define CLBR_EDX 0x4 -#define CLBR_ANY 0x7 +#define CLBR_NONE 0 +#define CLBR_EAX (1 << 0) +#define CLBR_ECX (1 << 1) +#define CLBR_EDX (1 << 2) + +#ifdef CONFIG_X86_64 +#define CLBR_RSI (1 << 3) +#define CLBR_RDI (1 << 4) +#define CLBR_R8 (1 << 5) +#define CLBR_R9 (1 << 6) +#define CLBR_R10 (1 << 7) +#define CLBR_R11 (1 << 8) +#define CLBR_ANY ((1 << 9) - 1) +#include <asm/desc_defs.h> +#else +/* CLBR_ANY should match all regs platform has. For i386, that's just it */ +#define CLBR_ANY ((1 << 3) - 1) +#endif /* X86_64 */ #ifndef __ASSEMBLY__ #include <linux/types.h> #include <linux/cpumask.h> #include <asm/kmap_types.h> +#include <asm/desc_defs.h> struct page; struct thread_struct; -struct Xgt_desc_struct; +struct desc_ptr; struct tss_struct; struct mm_struct; struct desc_struct; @@ -86,22 +101,27 @@ struct pv_cpu_ops { unsigned long (*read_cr4)(void); void (*write_cr4)(unsigned long); +#ifdef CONFIG_X86_64 + unsigned long (*read_cr8)(void); + void (*write_cr8)(unsigned long); +#endif + /* Segment descriptor handling */ void (*load_tr_desc)(void); - void (*load_gdt)(const struct Xgt_desc_struct *); - void (*load_idt)(const struct Xgt_desc_struct *); - void (*store_gdt)(struct Xgt_desc_struct *); - void (*store_idt)(struct Xgt_desc_struct *); + void (*load_gdt)(const struct desc_ptr *); + void (*load_idt)(const struct desc_ptr *); + void (*store_gdt)(struct desc_ptr *); + void (*store_idt)(struct desc_ptr *); void (*set_ldt)(const void *desc, unsigned entries); unsigned long (*store_tr)(void); void (*load_tls)(struct thread_struct *t, unsigned int cpu); - void (*write_ldt_entry)(struct desc_struct *, - int entrynum, u32 low, u32 high); + void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum, + const void *desc); void (*write_gdt_entry)(struct desc_struct *, - int entrynum, u32 low, u32 high); - void (*write_idt_entry)(struct desc_struct *, - int entrynum, u32 low, u32 high); - void (*load_esp0)(struct tss_struct *tss, struct thread_struct *t); + int entrynum, const void *desc, int size); + void (*write_idt_entry)(gate_desc *, + int entrynum, const gate_desc *gate); + void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t); void (*set_iopl_mask)(unsigned mask); @@ -115,15 +135,18 @@ struct pv_cpu_ops { /* MSR, PMC and TSR operations. err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ u64 (*read_msr)(unsigned int msr, int *err); - int (*write_msr)(unsigned int msr, u64 val); + int (*write_msr)(unsigned int msr, unsigned low, unsigned high); u64 (*read_tsc)(void); - u64 (*read_pmc)(void); + u64 (*read_pmc)(int counter); + unsigned long long (*read_tscp)(unsigned int *aux); /* These two are jmp to, not actually called. */ - void (*irq_enable_sysexit)(void); + void (*irq_enable_syscall_ret)(void); void (*iret)(void); + void (*swapgs)(void); + struct pv_lazy_ops lazy_mode; }; @@ -150,9 +173,9 @@ struct pv_apic_ops { * Direct APIC operations, principally for VMI. Ideally * these shouldn't be in this interface. */ - void (*apic_write)(unsigned long reg, unsigned long v); - void (*apic_write_atomic)(unsigned long reg, unsigned long v); - unsigned long (*apic_read)(unsigned long reg); + void (*apic_write)(unsigned long reg, u32 v); + void (*apic_write_atomic)(unsigned long reg, u32 v); + u32 (*apic_read)(unsigned long reg); void (*setup_boot_clock)(void); void (*setup_secondary_clock)(void); @@ -198,7 +221,7 @@ struct pv_mmu_ops { /* Hooks for allocating/releasing pagetable pages */ void (*alloc_pt)(struct mm_struct *mm, u32 pfn); - void (*alloc_pd)(u32 pfn); + void (*alloc_pd)(struct mm_struct *mm, u32 pfn); void (*alloc_pd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count); void (*release_pt)(u32 pfn); void (*release_pd)(u32 pfn); @@ -212,28 +235,34 @@ struct pv_mmu_ops { void (*pte_update_defer)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); + pteval_t (*pte_val)(pte_t); + pte_t (*make_pte)(pteval_t pte); + + pgdval_t (*pgd_val)(pgd_t); + pgd_t (*make_pgd)(pgdval_t pgd); + +#if PAGETABLE_LEVELS >= 3 #ifdef CONFIG_X86_PAE void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); void (*set_pte_present)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); - void (*set_pud)(pud_t *pudp, pud_t pudval); void (*pte_clear)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); void (*pmd_clear)(pmd_t *pmdp); - unsigned long long (*pte_val)(pte_t); - unsigned long long (*pmd_val)(pmd_t); - unsigned long long (*pgd_val)(pgd_t); +#endif /* CONFIG_X86_PAE */ - pte_t (*make_pte)(unsigned long long pte); - pmd_t (*make_pmd)(unsigned long long pmd); - pgd_t (*make_pgd)(unsigned long long pgd); -#else - unsigned long (*pte_val)(pte_t); - unsigned long (*pgd_val)(pgd_t); + void (*set_pud)(pud_t *pudp, pud_t pudval); - pte_t (*make_pte)(unsigned long pte); - pgd_t (*make_pgd)(unsigned long pgd); -#endif + pmdval_t (*pmd_val)(pmd_t); + pmd_t (*make_pmd)(pmdval_t pmd); + +#if PAGETABLE_LEVELS == 4 + pudval_t (*pud_val)(pud_t); + pud_t (*make_pud)(pudval_t pud); + + void (*set_pgd)(pgd_t *pudp, pgd_t pgdval); +#endif /* PAGETABLE_LEVELS == 4 */ +#endif /* PAGETABLE_LEVELS >= 3 */ #ifdef CONFIG_HIGHPTE void *(*kmap_atomic_pte)(struct page *page, enum km_type type); @@ -279,7 +308,8 @@ extern struct pv_mmu_ops pv_mmu_ops; #define _paravirt_alt(insn_string, type, clobber) \ "771:\n\t" insn_string "\n" "772:\n" \ ".pushsection .parainstructions,\"a\"\n" \ - " .long 771b\n" \ + _ASM_ALIGN "\n" \ + _ASM_PTR " 771b\n" \ " .byte " type "\n" \ " .byte 772b-771b\n" \ " .short " clobber "\n" \ @@ -289,6 +319,11 @@ extern struct pv_mmu_ops pv_mmu_ops; #define paravirt_alt(insn_string) \ _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") +/* Simple instruction patching code. */ +#define DEF_NATIVE(ops, name, code) \ + extern const char start_##ops##_##name[], end_##ops##_##name[]; \ + asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") + unsigned paravirt_patch_nop(void); unsigned paravirt_patch_ignore(unsigned len); unsigned paravirt_patch_call(void *insnbuf, @@ -303,6 +338,9 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, unsigned paravirt_patch_insns(void *insnbuf, unsigned len, const char *start, const char *end); +unsigned native_patch(u8 type, u16 clobbers, void *ibuf, + unsigned long addr, unsigned len); + int paravirt_disable_iospace(void); /* @@ -319,7 +357,7 @@ int paravirt_disable_iospace(void); * runtime. * * Normally, a call to a pv_op function is a simple indirect call: - * (paravirt_ops.operations)(args...). + * (pv_op_struct.operations)(args...). * * Unfortunately, this is a relatively slow operation for modern CPUs, * because it cannot necessarily determine what the destination @@ -329,11 +367,17 @@ int paravirt_disable_iospace(void); * calls are essentially free, because the call and return addresses * are completely predictable.) * - * These macros rely on the standard gcc "regparm(3)" calling + * For i386, these macros rely on the standard gcc "regparm(3)" calling * convention, in which the first three arguments are placed in %eax, * %edx, %ecx (in that order), and the remaining arguments are placed * on the stack. All caller-save registers (eax,edx,ecx) are expected * to be modified (either clobbered or used for return values). + * X86_64, on the other hand, already specifies a register-based calling + * conventions, returning at %rax, with parameteres going on %rdi, %rsi, + * %rdx, and %rcx. Note that for this reason, x86_64 does not need any + * special handling for dealing with 4 arguments, unlike i386. + * However, x86_64 also have to clobber all caller saved registers, which + * unfortunately, are quite a bit (r8 - r11) * * The call instruction itself is marked by placing its start address * and size into the .parainstructions section, so that @@ -356,10 +400,12 @@ int paravirt_disable_iospace(void); * the return type. The macro then uses sizeof() on that type to * determine whether its a 32 or 64 bit value, and places the return * in the right register(s) (just %eax for 32-bit, and %edx:%eax for - * 64-bit). + * 64-bit). For x86_64 machines, it just returns at %rax regardless of + * the return value size. * * 64-bit arguments are passed as a pair of adjacent 32-bit arguments - * in low,high order. + * i386 also passes 64-bit arguments as a pair of adjacent 32-bit arguments + * in low,high order * * Small structures are passed and returned in registers. The macro * calling convention can't directly deal with this, so the wrapper @@ -369,46 +415,67 @@ int paravirt_disable_iospace(void); * means that all uses must be wrapped in inline functions. This also * makes sure the incoming and outgoing types are always correct. */ +#ifdef CONFIG_X86_32 +#define PVOP_VCALL_ARGS unsigned long __eax, __edx, __ecx +#define PVOP_CALL_ARGS PVOP_VCALL_ARGS +#define PVOP_VCALL_CLOBBERS "=a" (__eax), "=d" (__edx), \ + "=c" (__ecx) +#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS +#define EXTRA_CLOBBERS +#define VEXTRA_CLOBBERS +#else +#define PVOP_VCALL_ARGS unsigned long __edi, __esi, __edx, __ecx +#define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax +#define PVOP_VCALL_CLOBBERS "=D" (__edi), \ + "=S" (__esi), "=d" (__edx), \ + "=c" (__ecx) + +#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax) + +#define EXTRA_CLOBBERS , "r8", "r9", "r10", "r11" +#define VEXTRA_CLOBBERS , "rax", "r8", "r9", "r10", "r11" +#endif + #define __PVOP_CALL(rettype, op, pre, post, ...) \ ({ \ rettype __ret; \ - unsigned long __eax, __edx, __ecx; \ + PVOP_CALL_ARGS; \ + /* This is 32-bit specific, but is okay in 64-bit */ \ + /* since this condition will never hold */ \ if (sizeof(rettype) > sizeof(unsigned long)) { \ asm volatile(pre \ paravirt_alt(PARAVIRT_CALL) \ post \ - : "=a" (__eax), "=d" (__edx), \ - "=c" (__ecx) \ + : PVOP_CALL_CLOBBERS \ : paravirt_type(op), \ paravirt_clobber(CLBR_ANY), \ ##__VA_ARGS__ \ - : "memory", "cc"); \ + : "memory", "cc" EXTRA_CLOBBERS); \ __ret = (rettype)((((u64)__edx) << 32) | __eax); \ } else { \ asm volatile(pre \ paravirt_alt(PARAVIRT_CALL) \ post \ - : "=a" (__eax), "=d" (__edx), \ - "=c" (__ecx) \ + : PVOP_CALL_CLOBBERS \ : paravirt_type(op), \ paravirt_clobber(CLBR_ANY), \ ##__VA_ARGS__ \ - : "memory", "cc"); \ + : "memory", "cc" EXTRA_CLOBBERS); \ __ret = (rettype)__eax; \ } \ __ret; \ }) #define __PVOP_VCALL(op, pre, post, ...) \ ({ \ - unsigned long __eax, __edx, __ecx; \ + PVOP_VCALL_ARGS; \ asm volatile(pre \ paravirt_alt(PARAVIRT_CALL) \ post \ - : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \ + : PVOP_VCALL_CLOBBERS \ : paravirt_type(op), \ paravirt_clobber(CLBR_ANY), \ ##__VA_ARGS__ \ - : "memory", "cc"); \ + : "memory", "cc" VEXTRA_CLOBBERS); \ }) #define PVOP_CALL0(rettype, op) \ @@ -417,22 +484,26 @@ int paravirt_disable_iospace(void); __PVOP_VCALL(op, "", "") #define PVOP_CALL1(rettype, op, arg1) \ - __PVOP_CALL(rettype, op, "", "", "0" ((u32)(arg1))) + __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1))) #define PVOP_VCALL1(op, arg1) \ - __PVOP_VCALL(op, "", "", "0" ((u32)(arg1))) + __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1))) #define PVOP_CALL2(rettype, op, arg1, arg2) \ - __PVOP_CALL(rettype, op, "", "", "0" ((u32)(arg1)), "1" ((u32)(arg2))) + __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \ + "1" ((unsigned long)(arg2))) #define PVOP_VCALL2(op, arg1, arg2) \ - __PVOP_VCALL(op, "", "", "0" ((u32)(arg1)), "1" ((u32)(arg2))) + __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \ + "1" ((unsigned long)(arg2))) #define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \ - __PVOP_CALL(rettype, op, "", "", "0" ((u32)(arg1)), \ - "1"((u32)(arg2)), "2"((u32)(arg3))) + __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \ + "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3))) #define PVOP_VCALL3(op, arg1, arg2, arg3) \ - __PVOP_VCALL(op, "", "", "0" ((u32)(arg1)), "1"((u32)(arg2)), \ - "2"((u32)(arg3))) + __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \ + "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3))) +/* This is the only difference in x86_64. We can make it much simpler */ +#ifdef CONFIG_X86_32 #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ __PVOP_CALL(rettype, op, \ "push %[_arg4];", "lea 4(%%esp),%%esp;", \ @@ -443,16 +514,26 @@ int paravirt_disable_iospace(void); "push %[_arg4];", "lea 4(%%esp),%%esp;", \ "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) +#else +#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ + __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \ + "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)), \ + "3"((unsigned long)(arg4))) +#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ + __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \ + "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)), \ + "3"((unsigned long)(arg4))) +#endif static inline int paravirt_enabled(void) { return pv_info.paravirt_enabled; } -static inline void load_esp0(struct tss_struct *tss, +static inline void load_sp0(struct tss_struct *tss, struct thread_struct *thread) { - PVOP_VCALL2(pv_cpu_ops.load_esp0, tss, thread); + PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread); } #define ARCH_SETUP pv_init_ops.arch_setup(); @@ -540,6 +621,18 @@ static inline void write_cr4(unsigned long x) PVOP_VCALL1(pv_cpu_ops.write_cr4, x); } +#ifdef CONFIG_X86_64 +static inline unsigned long read_cr8(void) +{ + return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr8); +} + +static inline void write_cr8(unsigned long x) +{ + PVOP_VCALL1(pv_cpu_ops.write_cr8, x); +} +#endif + static inline void raw_safe_halt(void) { PVOP_VCALL0(pv_irq_ops.safe_halt); @@ -613,8 +706,6 @@ static inline unsigned long long paravirt_sched_clock(void) } #define calculate_cpu_khz() (pv_time_ops.get_cpu_khz()) -#define write_tsc(val1,val2) wrmsr(0x10, val1, val2) - static inline unsigned long long paravirt_read_pmc(int counter) { return PVOP_CALL1(u64, pv_cpu_ops.read_pmc, counter); @@ -626,15 +717,36 @@ static inline unsigned long long paravirt_read_pmc(int counter) high = _l >> 32; \ } while(0) +static inline unsigned long long paravirt_rdtscp(unsigned int *aux) +{ + return PVOP_CALL1(u64, pv_cpu_ops.read_tscp, aux); +} + +#define rdtscp(low, high, aux) \ +do { \ + int __aux; \ + unsigned long __val = paravirt_rdtscp(&__aux); \ + (low) = (u32)__val; \ + (high) = (u32)(__val >> 32); \ + (aux) = __aux; \ +} while (0) + +#define rdtscpll(val, aux) \ +do { \ + unsigned long __aux; \ + val = paravirt_rdtscp(&__aux); \ + (aux) = __aux; \ +} while (0) + static inline void load_TR_desc(void) { PVOP_VCALL0(pv_cpu_ops.load_tr_desc); } -static inline void load_gdt(const struct Xgt_desc_struct *dtr) +static inline void load_gdt(const struct desc_ptr *dtr) { PVOP_VCALL1(pv_cpu_ops.load_gdt, dtr); } -static inline void load_idt(const struct Xgt_desc_struct *dtr) +static inline void load_idt(const struct desc_ptr *dtr) { PVOP_VCALL1(pv_cpu_ops.load_idt, dtr); } @@ -642,11 +754,11 @@ static inline void set_ldt(const void *addr, unsigned entries) { PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries); } -static inline void store_gdt(struct Xgt_desc_struct *dtr) +static inline void store_gdt(struct desc_ptr *dtr) { PVOP_VCALL1(pv_cpu_ops.store_gdt, dtr); } -static inline void store_idt(struct Xgt_desc_struct *dtr) +static inline void store_idt(struct desc_ptr *dtr) { PVOP_VCALL1(pv_cpu_ops.store_idt, dtr); } @@ -659,17 +771,22 @@ static inline void load_TLS(struct thread_struct *t, unsigned cpu) { PVOP_VCALL2(pv_cpu_ops.load_tls, t, cpu); } -static inline void write_ldt_entry(void *dt, int entry, u32 low, u32 high) + +static inline void write_ldt_entry(struct desc_struct *dt, int entry, + const void *desc) { - PVOP_VCALL4(pv_cpu_ops.write_ldt_entry, dt, entry, low, high); + PVOP_VCALL3(pv_cpu_ops.write_ldt_entry, dt, entry, desc); } -static inline void write_gdt_entry(void *dt, int entry, u32 low, u32 high) + +static inline void write_gdt_entry(struct desc_struct *dt, int entry, + void *desc, int type) { - PVOP_VCALL4(pv_cpu_ops.write_gdt_entry, dt, entry, low, high); + PVOP_VCALL4(pv_cpu_ops.write_gdt_entry, dt, entry, desc, type); } -static inline void write_idt_entry(void *dt, int entry, u32 low, u32 high) + +static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g) { - PVOP_VCALL4(pv_cpu_ops.write_idt_entry, dt, entry, low, high); + PVOP_VCALL3(pv_cpu_ops.write_idt_entry, dt, entry, g); } static inline void set_iopl_mask(unsigned mask) { @@ -690,17 +807,17 @@ static inline void slow_down_io(void) { /* * Basic functions accessing APICs. */ -static inline void apic_write(unsigned long reg, unsigned long v) +static inline void apic_write(unsigned long reg, u32 v) { PVOP_VCALL2(pv_apic_ops.apic_write, reg, v); } -static inline void apic_write_atomic(unsigned long reg, unsigned long v) +static inline void apic_write_atomic(unsigned long reg, u32 v) { PVOP_VCALL2(pv_apic_ops.apic_write_atomic, reg, v); } -static inline unsigned long apic_read(unsigned long reg) +static inline u32 apic_read(unsigned long reg) { return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg); } @@ -786,9 +903,9 @@ static inline void paravirt_release_pt(unsigned pfn) PVOP_VCALL1(pv_mmu_ops.release_pt, pfn); } -static inline void paravirt_alloc_pd(unsigned pfn) +static inline void paravirt_alloc_pd(struct mm_struct *mm, unsigned pfn) { - PVOP_VCALL1(pv_mmu_ops.alloc_pd, pfn); + PVOP_VCALL2(pv_mmu_ops.alloc_pd, mm, pfn); } static inline void paravirt_alloc_pd_clone(unsigned pfn, unsigned clonepfn, @@ -822,128 +939,236 @@ static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr, PVOP_VCALL3(pv_mmu_ops.pte_update_defer, mm, addr, ptep); } -#ifdef CONFIG_X86_PAE -static inline pte_t __pte(unsigned long long val) +static inline pte_t __pte(pteval_t val) { - unsigned long long ret = PVOP_CALL2(unsigned long long, - pv_mmu_ops.make_pte, - val, val >> 32); - return (pte_t) { ret, ret >> 32 }; + pteval_t ret; + + if (sizeof(pteval_t) > sizeof(long)) + ret = PVOP_CALL2(pteval_t, + pv_mmu_ops.make_pte, + val, (u64)val >> 32); + else + ret = PVOP_CALL1(pteval_t, + pv_mmu_ops.make_pte, + val); + + return (pte_t) { .pte = ret }; } -static inline pmd_t __pmd(unsigned long long val) +static inline pteval_t pte_val(pte_t pte) { - return (pmd_t) { PVOP_CALL2(unsigned long long, pv_mmu_ops.make_pmd, - val, val >> 32) }; + pteval_t ret; + + if (sizeof(pteval_t) > sizeof(long)) + ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_val, + pte.pte, (u64)pte.pte >> 32); + else + ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_val, + pte.pte); + + return ret; } -static inline pgd_t __pgd(unsigned long long val) +static inline pgd_t __pgd(pgdval_t val) { - return (pgd_t) { PVOP_CALL2(unsigned long long, pv_mmu_ops.make_pgd, - val, val >> 32) }; + pgdval_t ret; + + if (sizeof(pgdval_t) > sizeof(long)) + ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.make_pgd, + val, (u64)val >> 32); + else + ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.make_pgd, + val); + + return (pgd_t) { ret }; } -static inline unsigned long long pte_val(pte_t x) +static inline pgdval_t pgd_val(pgd_t pgd) { - return PVOP_CALL2(unsigned long long, pv_mmu_ops.pte_val, - x.pte_low, x.pte_high); + pgdval_t ret; + + if (sizeof(pgdval_t) > sizeof(long)) + ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.pgd_val, + pgd.pgd, (u64)pgd.pgd >> 32); + else + ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.pgd_val, + pgd.pgd); + + return ret; } -static inline unsigned long long pmd_val(pmd_t x) +static inline void set_pte(pte_t *ptep, pte_t pte) { - return PVOP_CALL2(unsigned long long, pv_mmu_ops.pmd_val, - x.pmd, x.pmd >> 32); + if (sizeof(pteval_t) > sizeof(long)) + PVOP_VCALL3(pv_mmu_ops.set_pte, ptep, + pte.pte, (u64)pte.pte >> 32); + else + PVOP_VCALL2(pv_mmu_ops.set_pte, ptep, + pte.pte); } -static inline unsigned long long pgd_val(pgd_t x) +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) { - return PVOP_CALL2(unsigned long long, pv_mmu_ops.pgd_val, - x.pgd, x.pgd >> 32); + if (sizeof(pteval_t) > sizeof(long)) + /* 5 arg words */ + pv_mmu_ops.set_pte_at(mm, addr, ptep, pte); + else + PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte); } -static inline void set_pte(pte_t *ptep, pte_t pteval) +static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { - PVOP_VCALL3(pv_mmu_ops.set_pte, ptep, pteval.pte_low, pteval.pte_high); + pmdval_t val = native_pmd_val(pmd); + + if (sizeof(pmdval_t) > sizeof(long)) + PVOP_VCALL3(pv_mmu_ops.set_pmd, pmdp, val, (u64)val >> 32); + else + PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, val); } -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pteval) +#if PAGETABLE_LEVELS >= 3 +static inline pmd_t __pmd(pmdval_t val) { - /* 5 arg words */ - pv_mmu_ops.set_pte_at(mm, addr, ptep, pteval); + pmdval_t ret; + + if (sizeof(pmdval_t) > sizeof(long)) + ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.make_pmd, + val, (u64)val >> 32); + else + ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.make_pmd, + val); + + return (pmd_t) { ret }; } -static inline void set_pte_atomic(pte_t *ptep, pte_t pteval) +static inline pmdval_t pmd_val(pmd_t pmd) { - PVOP_VCALL3(pv_mmu_ops.set_pte_atomic, ptep, - pteval.pte_low, pteval.pte_high); + pmdval_t ret; + + if (sizeof(pmdval_t) > sizeof(long)) + ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.pmd_val, + pmd.pmd, (u64)pmd.pmd >> 32); + else + ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.pmd_val, + pmd.pmd); + + return ret; } -static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) +static inline void set_pud(pud_t *pudp, pud_t pud) { - /* 5 arg words */ - pv_mmu_ops.set_pte_present(mm, addr, ptep, pte); + pudval_t val = native_pud_val(pud); + + if (sizeof(pudval_t) > sizeof(long)) + PVOP_VCALL3(pv_mmu_ops.set_pud, pudp, + val, (u64)val >> 32); + else + PVOP_VCALL2(pv_mmu_ops.set_pud, pudp, + val); +} +#if PAGETABLE_LEVELS == 4 +static inline pud_t __pud(pudval_t val) +{ + pudval_t ret; + + if (sizeof(pudval_t) > sizeof(long)) + ret = PVOP_CALL2(pudval_t, pv_mmu_ops.make_pud, + val, (u64)val >> 32); + else + ret = PVOP_CALL1(pudval_t, pv_mmu_ops.make_pud, + val); + + return (pud_t) { ret }; } -static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval) +static inline pudval_t pud_val(pud_t pud) { - PVOP_VCALL3(pv_mmu_ops.set_pmd, pmdp, - pmdval.pmd, pmdval.pmd >> 32); + pudval_t ret; + + if (sizeof(pudval_t) > sizeof(long)) + ret = PVOP_CALL2(pudval_t, pv_mmu_ops.pud_val, + pud.pud, (u64)pud.pud >> 32); + else + ret = PVOP_CALL1(pudval_t, pv_mmu_ops.pud_val, + pud.pud); + + return ret; } -static inline void set_pud(pud_t *pudp, pud_t pudval) +static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) { - PVOP_VCALL3(pv_mmu_ops.set_pud, pudp, - pudval.pgd.pgd, pudval.pgd.pgd >> 32); + pgdval_t val = native_pgd_val(pgd); + + if (sizeof(pgdval_t) > sizeof(long)) + PVOP_VCALL3(pv_mmu_ops.set_pgd, pgdp, + val, (u64)val >> 32); + else + PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp, + val); } -static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +static inline void pgd_clear(pgd_t *pgdp) { - PVOP_VCALL3(pv_mmu_ops.pte_clear, mm, addr, ptep); + set_pgd(pgdp, __pgd(0)); } -static inline void pmd_clear(pmd_t *pmdp) +static inline void pud_clear(pud_t *pudp) { - PVOP_VCALL1(pv_mmu_ops.pmd_clear, pmdp); + set_pud(pudp, __pud(0)); } -#else /* !CONFIG_X86_PAE */ +#endif /* PAGETABLE_LEVELS == 4 */ -static inline pte_t __pte(unsigned long val) +#endif /* PAGETABLE_LEVELS >= 3 */ + +#ifdef CONFIG_X86_PAE +/* Special-case pte-setting operations for PAE, which can't update a + 64-bit pte atomically */ +static inline void set_pte_atomic(pte_t *ptep, pte_t pte) { - return (pte_t) { PVOP_CALL1(unsigned long, pv_mmu_ops.make_pte, val) }; + PVOP_VCALL3(pv_mmu_ops.set_pte_atomic, ptep, + pte.pte, pte.pte >> 32); } -static inline pgd_t __pgd(unsigned long val) +static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) { - return (pgd_t) { PVOP_CALL1(unsigned long, pv_mmu_ops.make_pgd, val) }; + /* 5 arg words */ + pv_mmu_ops.set_pte_present(mm, addr, ptep, pte); } -static inline unsigned long pte_val(pte_t x) +static inline void pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) { - return PVOP_CALL1(unsigned long, pv_mmu_ops.pte_val, x.pte_low); + PVOP_VCALL3(pv_mmu_ops.pte_clear, mm, addr, ptep); } -static inline unsigned long pgd_val(pgd_t x) +static inline void pmd_clear(pmd_t *pmdp) +{ + PVOP_VCALL1(pv_mmu_ops.pmd_clear, pmdp); +} +#else /* !CONFIG_X86_PAE */ +static inline void set_pte_atomic(pte_t *ptep, pte_t pte) { - return PVOP_CALL1(unsigned long, pv_mmu_ops.pgd_val, x.pgd); + set_pte(ptep, pte); } -static inline void set_pte(pte_t *ptep, pte_t pteval) +static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) { - PVOP_VCALL2(pv_mmu_ops.set_pte, ptep, pteval.pte_low); + set_pte(ptep, pte); } -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pteval) +static inline void pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) { - PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pteval.pte_low); + set_pte_at(mm, addr, ptep, __pte(0)); } -static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval) +static inline void pmd_clear(pmd_t *pmdp) { - PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, pmdval.pud.pgd.pgd); + set_pmd(pmdp, __pmd(0)); } #endif /* CONFIG_X86_PAE */ @@ -1014,52 +1239,68 @@ struct paravirt_patch_site { extern struct paravirt_patch_site __parainstructions[], __parainstructions_end[]; +#ifdef CONFIG_X86_32 +#define PV_SAVE_REGS "pushl %%ecx; pushl %%edx;" +#define PV_RESTORE_REGS "popl %%edx; popl %%ecx" +#define PV_FLAGS_ARG "0" +#define PV_EXTRA_CLOBBERS +#define PV_VEXTRA_CLOBBERS +#else +/* We save some registers, but all of them, that's too much. We clobber all + * caller saved registers but the argument parameter */ +#define PV_SAVE_REGS "pushq %%rdi;" +#define PV_RESTORE_REGS "popq %%rdi;" +#define PV_EXTRA_CLOBBERS EXTRA_CLOBBERS, "rcx" , "rdx" +#define PV_VEXTRA_CLOBBERS EXTRA_CLOBBERS, "rdi", "rcx" , "rdx" +#define PV_FLAGS_ARG "D" +#endif + static inline unsigned long __raw_local_save_flags(void) { unsigned long f; - asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;" + asm volatile(paravirt_alt(PV_SAVE_REGS PARAVIRT_CALL - "popl %%edx; popl %%ecx") + PV_RESTORE_REGS) : "=a"(f) : paravirt_type(pv_irq_ops.save_fl), paravirt_clobber(CLBR_EAX) - : "memory", "cc"); + : "memory", "cc" PV_VEXTRA_CLOBBERS); return f; } static inline void raw_local_irq_restore(unsigned long f) { - asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;" + asm volatile(paravirt_alt(PV_SAVE_REGS PARAVIRT_CALL - "popl %%edx; popl %%ecx") + PV_RESTORE_REGS) : "=a"(f) - : "0"(f), + : PV_FLAGS_ARG(f), paravirt_type(pv_irq_ops.restore_fl), paravirt_clobber(CLBR_EAX) - : "memory", "cc"); + : "memory", "cc" PV_EXTRA_CLOBBERS); } static inline void raw_local_irq_disable(void) { - asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;" + asm volatile(paravirt_alt(PV_SAVE_REGS PARAVIRT_CALL - "popl %%edx; popl %%ecx") + PV_RESTORE_REGS) : : paravirt_type(pv_irq_ops.irq_disable), paravirt_clobber(CLBR_EAX) - : "memory", "eax", "cc"); + : "memory", "eax", "cc" PV_EXTRA_CLOBBERS); } static inline void raw_local_irq_enable(void) { - asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;" + asm volatile(paravirt_alt(PV_SAVE_REGS PARAVIRT_CALL - "popl %%edx; popl %%ecx") + PV_RESTORE_REGS) : : paravirt_type(pv_irq_ops.irq_enable), paravirt_clobber(CLBR_EAX) - : "memory", "eax", "cc"); + : "memory", "eax", "cc" PV_EXTRA_CLOBBERS); } static inline unsigned long __raw_local_irq_save(void) @@ -1071,27 +1312,6 @@ static inline unsigned long __raw_local_irq_save(void) return f; } -#define CLI_STRING \ - _paravirt_alt("pushl %%ecx; pushl %%edx;" \ - "call *%[paravirt_cli_opptr];" \ - "popl %%edx; popl %%ecx", \ - "%c[paravirt_cli_type]", "%c[paravirt_clobber]") - -#define STI_STRING \ - _paravirt_alt("pushl %%ecx; pushl %%edx;" \ - "call *%[paravirt_sti_opptr];" \ - "popl %%edx; popl %%ecx", \ - "%c[paravirt_sti_type]", "%c[paravirt_clobber]") - -#define CLI_STI_CLOBBERS , "%eax" -#define CLI_STI_INPUT_ARGS \ - , \ - [paravirt_cli_type] "i" (PARAVIRT_PATCH(pv_irq_ops.irq_disable)), \ - [paravirt_cli_opptr] "m" (pv_irq_ops.irq_disable), \ - [paravirt_sti_type] "i" (PARAVIRT_PATCH(pv_irq_ops.irq_enable)), \ - [paravirt_sti_opptr] "m" (pv_irq_ops.irq_enable), \ - paravirt_clobber(CLBR_EAX) - /* Make sure as little as possible of this mess escapes. */ #undef PARAVIRT_CALL #undef __PVOP_CALL @@ -1109,43 +1329,72 @@ static inline unsigned long __raw_local_irq_save(void) #else /* __ASSEMBLY__ */ -#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4) - -#define PARA_SITE(ptype, clobbers, ops) \ +#define _PVSITE(ptype, clobbers, ops, word, algn) \ 771:; \ ops; \ 772:; \ .pushsection .parainstructions,"a"; \ - .long 771b; \ + .align algn; \ + word 771b; \ .byte ptype; \ .byte 772b-771b; \ .short clobbers; \ .popsection + +#ifdef CONFIG_X86_64 +#define PV_SAVE_REGS pushq %rax; pushq %rdi; pushq %rcx; pushq %rdx +#define PV_RESTORE_REGS popq %rdx; popq %rcx; popq %rdi; popq %rax +#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 8) +#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8) +#else +#define PV_SAVE_REGS pushl %eax; pushl %edi; pushl %ecx; pushl %edx +#define PV_RESTORE_REGS popl %edx; popl %ecx; popl %edi; popl %eax +#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4) +#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4) +#endif + #define INTERRUPT_RETURN \ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \ jmp *%cs:pv_cpu_ops+PV_CPU_iret) #define DISABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ - pushl %eax; pushl %ecx; pushl %edx; \ + PV_SAVE_REGS; \ call *%cs:pv_irq_ops+PV_IRQ_irq_disable; \ - popl %edx; popl %ecx; popl %eax) \ + PV_RESTORE_REGS;) \ #define ENABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \ - pushl %eax; pushl %ecx; pushl %edx; \ + PV_SAVE_REGS; \ call *%cs:pv_irq_ops+PV_IRQ_irq_enable; \ - popl %edx; popl %ecx; popl %eax) + PV_RESTORE_REGS;) + +#define ENABLE_INTERRUPTS_SYSCALL_RET \ + PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_syscall_ret),\ + CLBR_NONE, \ + jmp *%cs:pv_cpu_ops+PV_CPU_irq_enable_syscall_ret) -#define ENABLE_INTERRUPTS_SYSEXIT \ - PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), CLBR_NONE,\ - jmp *%cs:pv_cpu_ops+PV_CPU_irq_enable_sysexit) +#ifdef CONFIG_X86_32 #define GET_CR0_INTO_EAX \ push %ecx; push %edx; \ call *pv_cpu_ops+PV_CPU_read_cr0; \ pop %edx; pop %ecx +#else +#define SWAPGS \ + PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ + PV_SAVE_REGS; \ + call *pv_cpu_ops+PV_CPU_swapgs; \ + PV_RESTORE_REGS \ + ) + +#define GET_CR2_INTO_RCX \ + call *pv_mmu_ops+PV_MMU_read_cr2; \ + movq %rax, %rcx; \ + xorq %rax, %rax; + +#endif #endif /* __ASSEMBLY__ */ #endif /* CONFIG_PARAVIRT */ diff --git a/include/asm-x86/pci.h b/include/asm-x86/pci.h index e88361966347..c61190cb9e12 100644 --- a/include/asm-x86/pci.h +++ b/include/asm-x86/pci.h @@ -66,6 +66,7 @@ extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, #ifdef CONFIG_PCI +extern void early_quirks(void); static inline void pci_dma_burst_advice(struct pci_dev *pdev, enum pci_dma_burst_strategy *strat, unsigned long *strategy_parameter) @@ -73,9 +74,10 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev, *strat = PCI_DMA_BURST_INFINITY; *strategy_parameter = ~0UL; } +#else +static inline void early_quirks(void) { } #endif - #endif /* __KERNEL__ */ #ifdef CONFIG_X86_32 @@ -90,6 +92,19 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev, /* generic pci stuff */ #include <asm-generic/pci.h> +#ifdef CONFIG_NUMA +/* Returns the node based on pci bus */ +static inline int __pcibus_to_node(struct pci_bus *bus) +{ + struct pci_sysdata *sd = bus->sysdata; + return sd->node; +} + +static inline cpumask_t __pcibus_to_cpumask(struct pci_bus *bus) +{ + return node_to_cpumask(__pcibus_to_node(bus)); +} +#endif #endif diff --git a/include/asm-x86/pci_64.h b/include/asm-x86/pci_64.h index ef54226a9325..374690314539 100644 --- a/include/asm-x86/pci_64.h +++ b/include/asm-x86/pci_64.h @@ -26,7 +26,6 @@ extern int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int l extern void pci_iommu_alloc(void); -extern int iommu_setup(char *opt); /* The PCI address space does equal the physical memory * address space. The networking and block device layers use diff --git a/include/asm-x86/pda.h b/include/asm-x86/pda.h index 35962bbe5e72..c0305bff0f19 100644 --- a/include/asm-x86/pda.h +++ b/include/asm-x86/pda.h @@ -7,22 +7,22 @@ #include <linux/cache.h> #include <asm/page.h> -/* Per processor datastructure. %gs points to it while the kernel runs */ +/* Per processor datastructure. %gs points to it while the kernel runs */ struct x8664_pda { struct task_struct *pcurrent; /* 0 Current process */ unsigned long data_offset; /* 8 Per cpu data offset from linker address */ - unsigned long kernelstack; /* 16 top of kernel stack for current */ - unsigned long oldrsp; /* 24 user rsp for system call */ - int irqcount; /* 32 Irq nesting counter. Starts with -1 */ - int cpunumber; /* 36 Logical CPU number */ + unsigned long kernelstack; /* 16 top of kernel stack for current */ + unsigned long oldrsp; /* 24 user rsp for system call */ + int irqcount; /* 32 Irq nesting counter. Starts -1 */ + unsigned int cpunumber; /* 36 Logical CPU number */ #ifdef CONFIG_CC_STACKPROTECTOR unsigned long stack_canary; /* 40 stack canary value */ /* gcc-ABI: this canary MUST be at offset 40!!! */ #endif char *irqstackptr; - int nodenumber; /* number of current node */ + unsigned int nodenumber; /* number of current node */ unsigned int __softirq_pending; unsigned int __nmi_count; /* number of NMI on this CPUs */ short mmu_state; @@ -40,13 +40,14 @@ struct x8664_pda { extern struct x8664_pda *_cpu_pda[]; extern struct x8664_pda boot_cpu_pda[]; +extern void pda_init(int); #define cpu_pda(i) (_cpu_pda[i]) -/* +/* * There is no fast way to get the base address of the PDA, all the accesses * have to mention %fs/%gs. So it needs to be done this Torvaldian way. - */ + */ extern void __bad_pda_field(void) __attribute__((noreturn)); /* @@ -57,70 +58,70 @@ extern struct x8664_pda _proxy_pda; #define pda_offset(field) offsetof(struct x8664_pda, field) -#define pda_to_op(op,field,val) do { \ +#define pda_to_op(op, field, val) do { \ typedef typeof(_proxy_pda.field) T__; \ if (0) { T__ tmp__; tmp__ = (val); } /* type checking */ \ switch (sizeof(_proxy_pda.field)) { \ case 2: \ - asm(op "w %1,%%gs:%c2" : \ + asm(op "w %1,%%gs:%c2" : \ "+m" (_proxy_pda.field) : \ "ri" ((T__)val), \ - "i"(pda_offset(field))); \ - break; \ + "i"(pda_offset(field))); \ + break; \ case 4: \ - asm(op "l %1,%%gs:%c2" : \ + asm(op "l %1,%%gs:%c2" : \ "+m" (_proxy_pda.field) : \ "ri" ((T__)val), \ - "i" (pda_offset(field))); \ + "i" (pda_offset(field))); \ break; \ case 8: \ - asm(op "q %1,%%gs:%c2": \ + asm(op "q %1,%%gs:%c2": \ "+m" (_proxy_pda.field) : \ "ri" ((T__)val), \ - "i"(pda_offset(field))); \ + "i"(pda_offset(field))); \ break; \ - default: \ + default: \ __bad_pda_field(); \ - } \ - } while (0) + } \ + } while (0) #define pda_from_op(op,field) ({ \ typeof(_proxy_pda.field) ret__; \ switch (sizeof(_proxy_pda.field)) { \ - case 2: \ - asm(op "w %%gs:%c1,%0" : \ + case 2: \ + asm(op "w %%gs:%c1,%0" : \ "=r" (ret__) : \ - "i" (pda_offset(field)), \ - "m" (_proxy_pda.field)); \ + "i" (pda_offset(field)), \ + "m" (_proxy_pda.field)); \ break; \ case 4: \ asm(op "l %%gs:%c1,%0": \ "=r" (ret__): \ - "i" (pda_offset(field)), \ - "m" (_proxy_pda.field)); \ + "i" (pda_offset(field)), \ + "m" (_proxy_pda.field)); \ break; \ - case 8: \ + case 8: \ asm(op "q %%gs:%c1,%0": \ "=r" (ret__) : \ - "i" (pda_offset(field)), \ - "m" (_proxy_pda.field)); \ + "i" (pda_offset(field)), \ + "m" (_proxy_pda.field)); \ break; \ - default: \ + default: \ __bad_pda_field(); \ } \ ret__; }) -#define read_pda(field) pda_from_op("mov",field) -#define write_pda(field,val) pda_to_op("mov",field,val) -#define add_pda(field,val) pda_to_op("add",field,val) -#define sub_pda(field,val) pda_to_op("sub",field,val) -#define or_pda(field,val) pda_to_op("or",field,val) +#define read_pda(field) pda_from_op("mov", field) +#define write_pda(field, val) pda_to_op("mov", field, val) +#define add_pda(field, val) pda_to_op("add", field, val) +#define sub_pda(field, val) pda_to_op("sub", field, val) +#define or_pda(field, val) pda_to_op("or", field, val) /* This is not atomic against other CPUs -- CPU preemption needs to be off */ -#define test_and_clear_bit_pda(bit,field) ({ \ +#define test_and_clear_bit_pda(bit, field) ({ \ int old__; \ asm volatile("btr %2,%%gs:%c3\n\tsbbl %0,%0" \ - : "=r" (old__), "+m" (_proxy_pda.field) \ + : "=r" (old__), "+m" (_proxy_pda.field) \ : "dIr" (bit), "i" (pda_offset(field)) : "memory"); \ old__; \ }) diff --git a/include/asm-x86/percpu.h b/include/asm-x86/percpu.h index a1aaad274cca..0dec00f27eb4 100644 --- a/include/asm-x86/percpu.h +++ b/include/asm-x86/percpu.h @@ -1,5 +1,142 @@ -#ifdef CONFIG_X86_32 -# include "percpu_32.h" -#else -# include "percpu_64.h" +#ifndef _ASM_X86_PERCPU_H_ +#define _ASM_X86_PERCPU_H_ + +#ifdef CONFIG_X86_64 +#include <linux/compiler.h> + +/* Same as asm-generic/percpu.h, except that we store the per cpu offset + in the PDA. Longer term the PDA and every per cpu variable + should be just put into a single section and referenced directly + from %gs */ + +#ifdef CONFIG_SMP +#include <asm/pda.h> + +#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset) +#define __my_cpu_offset read_pda(data_offset) + +#define per_cpu_offset(x) (__per_cpu_offset(x)) + #endif +#include <asm-generic/percpu.h> + +DECLARE_PER_CPU(struct x8664_pda, pda); + +#else /* CONFIG_X86_64 */ + +#ifdef __ASSEMBLY__ + +/* + * PER_CPU finds an address of a per-cpu variable. + * + * Args: + * var - variable name + * reg - 32bit register + * + * The resulting address is stored in the "reg" argument. + * + * Example: + * PER_CPU(cpu_gdt_descr, %ebx) + */ +#ifdef CONFIG_SMP +#define PER_CPU(var, reg) \ + movl %fs:per_cpu__##this_cpu_off, reg; \ + lea per_cpu__##var(reg), reg +#define PER_CPU_VAR(var) %fs:per_cpu__##var +#else /* ! SMP */ +#define PER_CPU(var, reg) \ + movl $per_cpu__##var, reg +#define PER_CPU_VAR(var) per_cpu__##var +#endif /* SMP */ + +#else /* ...!ASSEMBLY */ + +/* + * PER_CPU finds an address of a per-cpu variable. + * + * Args: + * var - variable name + * cpu - 32bit register containing the current CPU number + * + * The resulting address is stored in the "cpu" argument. + * + * Example: + * PER_CPU(cpu_gdt_descr, %ebx) + */ +#ifdef CONFIG_SMP + +#define __my_cpu_offset x86_read_percpu(this_cpu_off) + +/* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */ +#define __percpu_seg "%%fs:" + +#else /* !SMP */ + +#define __percpu_seg "" + +#endif /* SMP */ + +#include <asm-generic/percpu.h> + +/* We can use this directly for local CPU (faster). */ +DECLARE_PER_CPU(unsigned long, this_cpu_off); + +/* For arch-specific code, we can use direct single-insn ops (they + * don't give an lvalue though). */ +extern void __bad_percpu_size(void); + +#define percpu_to_op(op,var,val) \ + do { \ + typedef typeof(var) T__; \ + if (0) { T__ tmp__; tmp__ = (val); } \ + switch (sizeof(var)) { \ + case 1: \ + asm(op "b %1,"__percpu_seg"%0" \ + : "+m" (var) \ + :"ri" ((T__)val)); \ + break; \ + case 2: \ + asm(op "w %1,"__percpu_seg"%0" \ + : "+m" (var) \ + :"ri" ((T__)val)); \ + break; \ + case 4: \ + asm(op "l %1,"__percpu_seg"%0" \ + : "+m" (var) \ + :"ri" ((T__)val)); \ + break; \ + default: __bad_percpu_size(); \ + } \ + } while (0) + +#define percpu_from_op(op,var) \ + ({ \ + typeof(var) ret__; \ + switch (sizeof(var)) { \ + case 1: \ + asm(op "b "__percpu_seg"%1,%0" \ + : "=r" (ret__) \ + : "m" (var)); \ + break; \ + case 2: \ + asm(op "w "__percpu_seg"%1,%0" \ + : "=r" (ret__) \ + : "m" (var)); \ + break; \ + case 4: \ + asm(op "l "__percpu_seg"%1,%0" \ + : "=r" (ret__) \ + : "m" (var)); \ + break; \ + default: __bad_percpu_size(); \ + } \ + ret__; }) + +#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var) +#define x86_write_percpu(var,val) percpu_to_op("mov", per_cpu__##var, val) +#define x86_add_percpu(var,val) percpu_to_op("add", per_cpu__##var, val) +#define x86_sub_percpu(var,val) percpu_to_op("sub", per_cpu__##var, val) +#define x86_or_percpu(var,val) percpu_to_op("or", per_cpu__##var, val) +#endif /* !__ASSEMBLY__ */ +#endif /* !CONFIG_X86_64 */ +#endif /* _ASM_X86_PERCPU_H_ */ diff --git a/include/asm-x86/percpu_32.h b/include/asm-x86/percpu_32.h deleted file mode 100644 index a7ebd436f3cc..000000000000 --- a/include/asm-x86/percpu_32.h +++ /dev/null @@ -1,154 +0,0 @@ -#ifndef __ARCH_I386_PERCPU__ -#define __ARCH_I386_PERCPU__ - -#ifdef __ASSEMBLY__ - -/* - * PER_CPU finds an address of a per-cpu variable. - * - * Args: - * var - variable name - * reg - 32bit register - * - * The resulting address is stored in the "reg" argument. - * - * Example: - * PER_CPU(cpu_gdt_descr, %ebx) - */ -#ifdef CONFIG_SMP -#define PER_CPU(var, reg) \ - movl %fs:per_cpu__##this_cpu_off, reg; \ - lea per_cpu__##var(reg), reg -#define PER_CPU_VAR(var) %fs:per_cpu__##var -#else /* ! SMP */ -#define PER_CPU(var, reg) \ - movl $per_cpu__##var, reg -#define PER_CPU_VAR(var) per_cpu__##var -#endif /* SMP */ - -#else /* ...!ASSEMBLY */ - -/* - * PER_CPU finds an address of a per-cpu variable. - * - * Args: - * var - variable name - * cpu - 32bit register containing the current CPU number - * - * The resulting address is stored in the "cpu" argument. - * - * Example: - * PER_CPU(cpu_gdt_descr, %ebx) - */ -#ifdef CONFIG_SMP -/* Same as generic implementation except for optimized local access. */ -#define __GENERIC_PER_CPU - -/* This is used for other cpus to find our section. */ -extern unsigned long __per_cpu_offset[]; - -#define per_cpu_offset(x) (__per_cpu_offset[x]) - -/* Separate out the type, so (int[3], foo) works. */ -#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name -#define DEFINE_PER_CPU(type, name) \ - __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name - -#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ - __attribute__((__section__(".data.percpu.shared_aligned"))) \ - __typeof__(type) per_cpu__##name \ - ____cacheline_aligned_in_smp - -/* We can use this directly for local CPU (faster). */ -DECLARE_PER_CPU(unsigned long, this_cpu_off); - -/* var is in discarded region: offset to particular copy we want */ -#define per_cpu(var, cpu) (*({ \ - extern int simple_indentifier_##var(void); \ - RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]); })) - -#define __raw_get_cpu_var(var) (*({ \ - extern int simple_indentifier_##var(void); \ - RELOC_HIDE(&per_cpu__##var, x86_read_percpu(this_cpu_off)); \ -})) - -#define __get_cpu_var(var) __raw_get_cpu_var(var) - -/* A macro to avoid #include hell... */ -#define percpu_modcopy(pcpudst, src, size) \ -do { \ - unsigned int __i; \ - for_each_possible_cpu(__i) \ - memcpy((pcpudst)+__per_cpu_offset[__i], \ - (src), (size)); \ -} while (0) - -#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) -#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) - -/* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */ -#define __percpu_seg "%%fs:" -#else /* !SMP */ -#include <asm-generic/percpu.h> -#define __percpu_seg "" -#endif /* SMP */ - -/* For arch-specific code, we can use direct single-insn ops (they - * don't give an lvalue though). */ -extern void __bad_percpu_size(void); - -#define percpu_to_op(op,var,val) \ - do { \ - typedef typeof(var) T__; \ - if (0) { T__ tmp__; tmp__ = (val); } \ - switch (sizeof(var)) { \ - case 1: \ - asm(op "b %1,"__percpu_seg"%0" \ - : "+m" (var) \ - :"ri" ((T__)val)); \ - break; \ - case 2: \ - asm(op "w %1,"__percpu_seg"%0" \ - : "+m" (var) \ - :"ri" ((T__)val)); \ - break; \ - case 4: \ - asm(op "l %1,"__percpu_seg"%0" \ - : "+m" (var) \ - :"ri" ((T__)val)); \ - break; \ - default: __bad_percpu_size(); \ - } \ - } while (0) - -#define percpu_from_op(op,var) \ - ({ \ - typeof(var) ret__; \ - switch (sizeof(var)) { \ - case 1: \ - asm(op "b "__percpu_seg"%1,%0" \ - : "=r" (ret__) \ - : "m" (var)); \ - break; \ - case 2: \ - asm(op "w "__percpu_seg"%1,%0" \ - : "=r" (ret__) \ - : "m" (var)); \ - break; \ - case 4: \ - asm(op "l "__percpu_seg"%1,%0" \ - : "=r" (ret__) \ - : "m" (var)); \ - break; \ - default: __bad_percpu_size(); \ - } \ - ret__; }) - -#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var) -#define x86_write_percpu(var,val) percpu_to_op("mov", per_cpu__##var, val) -#define x86_add_percpu(var,val) percpu_to_op("add", per_cpu__##var, val) -#define x86_sub_percpu(var,val) percpu_to_op("sub", per_cpu__##var, val) -#define x86_or_percpu(var,val) percpu_to_op("or", per_cpu__##var, val) -#endif /* !__ASSEMBLY__ */ - -#endif /* __ARCH_I386_PERCPU__ */ diff --git a/include/asm-x86/percpu_64.h b/include/asm-x86/percpu_64.h deleted file mode 100644 index 5abd48270101..000000000000 --- a/include/asm-x86/percpu_64.h +++ /dev/null @@ -1,68 +0,0 @@ -#ifndef _ASM_X8664_PERCPU_H_ -#define _ASM_X8664_PERCPU_H_ -#include <linux/compiler.h> - -/* Same as asm-generic/percpu.h, except that we store the per cpu offset - in the PDA. Longer term the PDA and every per cpu variable - should be just put into a single section and referenced directly - from %gs */ - -#ifdef CONFIG_SMP - -#include <asm/pda.h> - -#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset) -#define __my_cpu_offset() read_pda(data_offset) - -#define per_cpu_offset(x) (__per_cpu_offset(x)) - -/* Separate out the type, so (int[3], foo) works. */ -#define DEFINE_PER_CPU(type, name) \ - __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name - -#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ - __attribute__((__section__(".data.percpu.shared_aligned"))) \ - __typeof__(type) per_cpu__##name \ - ____cacheline_internodealigned_in_smp - -/* var is in discarded region: offset to particular copy we want */ -#define per_cpu(var, cpu) (*({ \ - extern int simple_identifier_##var(void); \ - RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu)); })) -#define __get_cpu_var(var) (*({ \ - extern int simple_identifier_##var(void); \ - RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()); })) -#define __raw_get_cpu_var(var) (*({ \ - extern int simple_identifier_##var(void); \ - RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()); })) - -/* A macro to avoid #include hell... */ -#define percpu_modcopy(pcpudst, src, size) \ -do { \ - unsigned int __i; \ - for_each_possible_cpu(__i) \ - memcpy((pcpudst)+__per_cpu_offset(__i), \ - (src), (size)); \ -} while (0) - -extern void setup_per_cpu_areas(void); - -#else /* ! SMP */ - -#define DEFINE_PER_CPU(type, name) \ - __typeof__(type) per_cpu__##name -#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ - DEFINE_PER_CPU(type, name) - -#define per_cpu(var, cpu) (*((void)(cpu), &per_cpu__##var)) -#define __get_cpu_var(var) per_cpu__##var -#define __raw_get_cpu_var(var) per_cpu__##var - -#endif /* SMP */ - -#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name - -#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) -#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) - -#endif /* _ASM_X8664_PERCPU_H_ */ diff --git a/include/asm-x86/pgalloc_32.h b/include/asm-x86/pgalloc_32.h index f2fc33ceb9f2..10c2b452e64c 100644 --- a/include/asm-x86/pgalloc_32.h +++ b/include/asm-x86/pgalloc_32.h @@ -3,31 +3,33 @@ #include <linux/threads.h> #include <linux/mm.h> /* for struct page */ +#include <asm/tlb.h> +#include <asm-generic/tlb.h> #ifdef CONFIG_PARAVIRT #include <asm/paravirt.h> #else #define paravirt_alloc_pt(mm, pfn) do { } while (0) -#define paravirt_alloc_pd(pfn) do { } while (0) -#define paravirt_alloc_pd(pfn) do { } while (0) +#define paravirt_alloc_pd(mm, pfn) do { } while (0) #define paravirt_alloc_pd_clone(pfn, clonepfn, start, count) do { } while (0) #define paravirt_release_pt(pfn) do { } while (0) #define paravirt_release_pd(pfn) do { } while (0) #endif -#define pmd_populate_kernel(mm, pmd, pte) \ -do { \ - paravirt_alloc_pt(mm, __pa(pte) >> PAGE_SHIFT); \ - set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))); \ -} while (0) +static inline void pmd_populate_kernel(struct mm_struct *mm, + pmd_t *pmd, pte_t *pte) +{ + paravirt_alloc_pt(mm, __pa(pte) >> PAGE_SHIFT); + set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE)); +} + +static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte) +{ + unsigned long pfn = page_to_pfn(pte); -#define pmd_populate(mm, pmd, pte) \ -do { \ - paravirt_alloc_pt(mm, page_to_pfn(pte)); \ - set_pmd(pmd, __pmd(_PAGE_TABLE + \ - ((unsigned long long)page_to_pfn(pte) << \ - (unsigned long long) PAGE_SHIFT))); \ -} while (0) + paravirt_alloc_pt(mm, pfn); + set_pmd(pmd, __pmd(((pteval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE)); +} /* * Allocate and free page tables. @@ -49,20 +51,55 @@ static inline void pte_free(struct page *pte) } -#define __pte_free_tlb(tlb,pte) \ -do { \ - paravirt_release_pt(page_to_pfn(pte)); \ - tlb_remove_page((tlb),(pte)); \ -} while (0) +static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte) +{ + paravirt_release_pt(page_to_pfn(pte)); + tlb_remove_page(tlb, pte); +} #ifdef CONFIG_X86_PAE /* * In the PAE case we free the pmds as part of the pgd. */ -#define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); }) -#define pmd_free(x) do { } while (0) -#define __pmd_free_tlb(tlb,x) do { } while (0) -#define pud_populate(mm, pmd, pte) BUG() -#endif +static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); +} + +static inline void pmd_free(pmd_t *pmd) +{ + BUG_ON((unsigned long)pmd & (PAGE_SIZE-1)); + free_page((unsigned long)pmd); +} + +static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) +{ + /* This is called just after the pmd has been detached from + the pgd, which requires a full tlb flush to be recognized + by the CPU. Rather than incurring multiple tlb flushes + while the address space is being pulled down, make the tlb + gathering machinery do a full flush when we're done. */ + tlb->fullmm = 1; + + paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); + tlb_remove_page(tlb, virt_to_page(pmd)); +} + +static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) +{ + paravirt_alloc_pd(mm, __pa(pmd) >> PAGE_SHIFT); + + /* Note: almost everything apart from _PAGE_PRESENT is + reserved at the pmd (PDPT) level. */ + set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT)); + + /* + * Pentium-II erratum A13: in PAE mode we explicitly have to flush + * the TLB via cr3 if the top-level pgd is changed... + */ + if (mm == current->active_mm) + write_cr3(read_cr3()); +} +#endif /* CONFIG_X86_PAE */ #endif /* _I386_PGALLOC_H */ diff --git a/include/asm-x86/pgtable-2level.h b/include/asm-x86/pgtable-2level.h index 84b03cf56a79..701404fab308 100644 --- a/include/asm-x86/pgtable-2level.h +++ b/include/asm-x86/pgtable-2level.h @@ -15,30 +15,31 @@ static inline void native_set_pte(pte_t *ptep , pte_t pte) { *ptep = pte; } -static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep , pte_t pte) -{ - native_set_pte(ptep, pte); -} + static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) { *pmdp = pmd; } -#ifndef CONFIG_PARAVIRT -#define set_pte(pteptr, pteval) native_set_pte(pteptr, pteval) -#define set_pte_at(mm,addr,ptep,pteval) native_set_pte_at(mm, addr, ptep, pteval) -#define set_pmd(pmdptr, pmdval) native_set_pmd(pmdptr, pmdval) -#endif -#define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval) -#define set_pte_present(mm,addr,ptep,pteval) set_pte_at(mm,addr,ptep,pteval) +static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) +{ + native_set_pte(ptep, pte); +} -#define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0) -#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) +static inline void native_set_pte_present(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + native_set_pte(ptep, pte); +} + +static inline void native_pmd_clear(pmd_t *pmdp) +{ + native_set_pmd(pmdp, __pmd(0)); +} static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *xp) { - *xp = __pte(0); + *xp = native_make_pte(0); } #ifdef CONFIG_SMP @@ -53,16 +54,6 @@ static inline pte_t native_ptep_get_and_clear(pte_t *xp) #define pte_page(x) pfn_to_page(pte_pfn(x)) #define pte_none(x) (!(x).pte_low) #define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT) -#define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) -#define pfn_pmd(pfn, prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) - -/* - * All present pages are kernel-executable: - */ -static inline int pte_exec_kernel(pte_t pte) -{ - return 1; -} /* * Bits 0, 6 and 7 are taken, split up the 29 bits of offset @@ -74,13 +65,13 @@ static inline int pte_exec_kernel(pte_t pte) ((((pte).pte_low >> 1) & 0x1f ) + (((pte).pte_low >> 8) << 5 )) #define pgoff_to_pte(off) \ - ((pte_t) { (((off) & 0x1f) << 1) + (((off) >> 5) << 8) + _PAGE_FILE }) + ((pte_t) { .pte_low = (((off) & 0x1f) << 1) + (((off) >> 5) << 8) + _PAGE_FILE }) /* Encode and de-code a swap entry */ #define __swp_type(x) (((x).val >> 1) & 0x1f) #define __swp_offset(x) ((x).val >> 8) #define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 8) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) -#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) +#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) #endif /* _I386_PGTABLE_2LEVEL_H */ diff --git a/include/asm-x86/pgtable-3level.h b/include/asm-x86/pgtable-3level.h index 948a33414118..a195c3e757b9 100644 --- a/include/asm-x86/pgtable-3level.h +++ b/include/asm-x86/pgtable-3level.h @@ -15,16 +15,18 @@ #define pgd_ERROR(e) \ printk("%s:%d: bad pgd %p(%016Lx).\n", __FILE__, __LINE__, &(e), pgd_val(e)) -#define pud_none(pud) 0 -#define pud_bad(pud) 0 -#define pud_present(pud) 1 -/* - * All present pages with !NX bit are kernel-executable: - */ -static inline int pte_exec_kernel(pte_t pte) +static inline int pud_none(pud_t pud) +{ + return pud_val(pud) == 0; +} +static inline int pud_bad(pud_t pud) +{ + return (pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER)) != 0; +} +static inline int pud_present(pud_t pud) { - return !(pte_val(pte) & _PAGE_NX); + return pud_val(pud) & _PAGE_PRESENT; } /* Rules for using set_pte: the pte being assigned *must* be @@ -39,11 +41,6 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte) smp_wmb(); ptep->pte_low = pte.pte_low; } -static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep , pte_t pte) -{ - native_set_pte(ptep, pte); -} /* * Since this is only called on user PTEs, and the page fault handler @@ -71,7 +68,7 @@ static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) } static inline void native_set_pud(pud_t *pudp, pud_t pud) { - *pudp = pud; + set_64bit((unsigned long long *)(pudp),native_pud_val(pud)); } /* @@ -94,24 +91,29 @@ static inline void native_pmd_clear(pmd_t *pmd) *(tmp + 1) = 0; } -#ifndef CONFIG_PARAVIRT -#define set_pte(ptep, pte) native_set_pte(ptep, pte) -#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte) -#define set_pte_present(mm, addr, ptep, pte) native_set_pte_present(mm, addr, ptep, pte) -#define set_pte_atomic(ptep, pte) native_set_pte_atomic(ptep, pte) -#define set_pmd(pmdp, pmd) native_set_pmd(pmdp, pmd) -#define set_pud(pudp, pud) native_set_pud(pudp, pud) -#define pte_clear(mm, addr, ptep) native_pte_clear(mm, addr, ptep) -#define pmd_clear(pmd) native_pmd_clear(pmd) -#endif - -/* - * Pentium-II erratum A13: in PAE mode we explicitly have to flush - * the TLB via cr3 if the top-level pgd is changed... - * We do not let the generic code free and clear pgd entries due to - * this erratum. - */ -static inline void pud_clear (pud_t * pud) { } +static inline void pud_clear(pud_t *pudp) +{ + set_pud(pudp, __pud(0)); + + /* + * In principle we need to do a cr3 reload here to make sure + * the processor recognizes the changed pgd. In practice, all + * the places where pud_clear() gets called are followed by + * full tlb flushes anyway, so we can defer the cost here. + * + * Specifically: + * + * mm/memory.c:free_pmd_range() - immediately after the + * pud_clear() it does a pmd_free_tlb(). We change the + * mmu_gather structure to do a full tlb flush (which has the + * effect of reloading cr3) when the pagetable free is + * complete. + * + * arch/x86/mm/hugetlbpage.c:huge_pmd_unshare() - the call to + * this is followed by a flush_tlb_range, which on x86 does a + * full tlb flush. + */ +} #define pud_page(pud) \ ((struct page *) __va(pud_val(pud) & PAGE_MASK)) @@ -155,21 +157,7 @@ static inline int pte_none(pte_t pte) static inline unsigned long pte_pfn(pte_t pte) { - return pte_val(pte) >> PAGE_SHIFT; -} - -extern unsigned long long __supported_pte_mask; - -static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) -{ - return __pte((((unsigned long long)page_nr << PAGE_SHIFT) | - pgprot_val(pgprot)) & __supported_pte_mask); -} - -static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) -{ - return __pmd((((unsigned long long)page_nr << PAGE_SHIFT) | - pgprot_val(pgprot)) & __supported_pte_mask); + return (pte_val(pte) & ~_PAGE_NX) >> PAGE_SHIFT; } /* @@ -177,7 +165,7 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) * put the 32 bits of offset into the high part. */ #define pte_to_pgoff(pte) ((pte).pte_high) -#define pgoff_to_pte(off) ((pte_t) { _PAGE_FILE, (off) }) +#define pgoff_to_pte(off) ((pte_t) { { .pte_low = _PAGE_FILE, .pte_high = (off) } }) #define PTE_FILE_MAX_BITS 32 /* Encode and de-code a swap entry */ @@ -185,8 +173,6 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) #define __swp_offset(x) ((x).val >> 5) #define __swp_entry(type, offset) ((swp_entry_t){(type) | (offset) << 5}) #define __pte_to_swp_entry(pte) ((swp_entry_t){ (pte).pte_high }) -#define __swp_entry_to_pte(x) ((pte_t){ 0, (x).val }) - -#define __pmd_free_tlb(tlb, x) do { } while (0) +#define __swp_entry_to_pte(x) ((pte_t){ { .pte_high = (x).val } }) #endif /* _I386_PGTABLE_3LEVEL_H */ diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h index 1039140652af..cd2524f07452 100644 --- a/include/asm-x86/pgtable.h +++ b/include/asm-x86/pgtable.h @@ -1,5 +1,364 @@ +#ifndef _ASM_X86_PGTABLE_H +#define _ASM_X86_PGTABLE_H + +#define USER_PTRS_PER_PGD ((TASK_SIZE-1)/PGDIR_SIZE+1) +#define FIRST_USER_ADDRESS 0 + +#define _PAGE_BIT_PRESENT 0 +#define _PAGE_BIT_RW 1 +#define _PAGE_BIT_USER 2 +#define _PAGE_BIT_PWT 3 +#define _PAGE_BIT_PCD 4 +#define _PAGE_BIT_ACCESSED 5 +#define _PAGE_BIT_DIRTY 6 +#define _PAGE_BIT_FILE 6 +#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */ +#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ +#define _PAGE_BIT_UNUSED1 9 /* available for programmer */ +#define _PAGE_BIT_UNUSED2 10 +#define _PAGE_BIT_UNUSED3 11 +#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ + +/* + * Note: we use _AC(1, L) instead of _AC(1, UL) so that we get a + * sign-extended value on 32-bit with all 1's in the upper word, + * which preserves the upper pte values on 64-bit ptes: + */ +#define _PAGE_PRESENT (_AC(1, L)<<_PAGE_BIT_PRESENT) +#define _PAGE_RW (_AC(1, L)<<_PAGE_BIT_RW) +#define _PAGE_USER (_AC(1, L)<<_PAGE_BIT_USER) +#define _PAGE_PWT (_AC(1, L)<<_PAGE_BIT_PWT) +#define _PAGE_PCD (_AC(1, L)<<_PAGE_BIT_PCD) +#define _PAGE_ACCESSED (_AC(1, L)<<_PAGE_BIT_ACCESSED) +#define _PAGE_DIRTY (_AC(1, L)<<_PAGE_BIT_DIRTY) +#define _PAGE_PSE (_AC(1, L)<<_PAGE_BIT_PSE) /* 2MB page */ +#define _PAGE_GLOBAL (_AC(1, L)<<_PAGE_BIT_GLOBAL) /* Global TLB entry */ +#define _PAGE_UNUSED1 (_AC(1, L)<<_PAGE_BIT_UNUSED1) +#define _PAGE_UNUSED2 (_AC(1, L)<<_PAGE_BIT_UNUSED2) +#define _PAGE_UNUSED3 (_AC(1, L)<<_PAGE_BIT_UNUSED3) + +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) +#define _PAGE_NX (_AC(1, ULL) << _PAGE_BIT_NX) +#else +#define _PAGE_NX 0 +#endif + +/* If _PAGE_PRESENT is clear, we use these: */ +#define _PAGE_FILE _PAGE_DIRTY /* nonlinear file mapping, saved PTE; unset:swap */ +#define _PAGE_PROTNONE _PAGE_PSE /* if the user mapped it with PROT_NONE; + pte_present gives true */ + +#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) +#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) + +#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) + +#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) +#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX) + +#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED) +#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX) +#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) +#define PAGE_COPY PAGE_COPY_NOEXEC +#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX) +#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) + +#ifdef CONFIG_X86_32 +#define _PAGE_KERNEL_EXEC \ + (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) +#define _PAGE_KERNEL (_PAGE_KERNEL_EXEC | _PAGE_NX) + +#ifndef __ASSEMBLY__ +extern pteval_t __PAGE_KERNEL, __PAGE_KERNEL_EXEC; +#endif /* __ASSEMBLY__ */ +#else +#define __PAGE_KERNEL_EXEC \ + (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) +#define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX) +#endif + +#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) +#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) +#define __PAGE_KERNEL_EXEC_NOCACHE (__PAGE_KERNEL_EXEC | _PAGE_PCD | _PAGE_PWT) +#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT) +#define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER) +#define __PAGE_KERNEL_VSYSCALL_NOCACHE (__PAGE_KERNEL_VSYSCALL | _PAGE_PCD | _PAGE_PWT) +#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) +#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) + +#ifdef CONFIG_X86_32 +# define MAKE_GLOBAL(x) __pgprot((x)) +#else +# define MAKE_GLOBAL(x) __pgprot((x) | _PAGE_GLOBAL) +#endif + +#define PAGE_KERNEL MAKE_GLOBAL(__PAGE_KERNEL) +#define PAGE_KERNEL_RO MAKE_GLOBAL(__PAGE_KERNEL_RO) +#define PAGE_KERNEL_EXEC MAKE_GLOBAL(__PAGE_KERNEL_EXEC) +#define PAGE_KERNEL_RX MAKE_GLOBAL(__PAGE_KERNEL_RX) +#define PAGE_KERNEL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE) +#define PAGE_KERNEL_EXEC_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_EXEC_NOCACHE) +#define PAGE_KERNEL_LARGE MAKE_GLOBAL(__PAGE_KERNEL_LARGE) +#define PAGE_KERNEL_LARGE_EXEC MAKE_GLOBAL(__PAGE_KERNEL_LARGE_EXEC) +#define PAGE_KERNEL_VSYSCALL MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL) +#define PAGE_KERNEL_VSYSCALL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL_NOCACHE) + +/* xwr */ +#define __P000 PAGE_NONE +#define __P001 PAGE_READONLY +#define __P010 PAGE_COPY +#define __P011 PAGE_COPY +#define __P100 PAGE_READONLY_EXEC +#define __P101 PAGE_READONLY_EXEC +#define __P110 PAGE_COPY_EXEC +#define __P111 PAGE_COPY_EXEC + +#define __S000 PAGE_NONE +#define __S001 PAGE_READONLY +#define __S010 PAGE_SHARED +#define __S011 PAGE_SHARED +#define __S100 PAGE_READONLY_EXEC +#define __S101 PAGE_READONLY_EXEC +#define __S110 PAGE_SHARED_EXEC +#define __S111 PAGE_SHARED_EXEC + +#ifndef __ASSEMBLY__ + +/* + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. + */ +extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; +#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) + +extern spinlock_t pgd_lock; +extern struct list_head pgd_list; + +/* + * The following only work if pte_present() is true. + * Undefined behaviour if not.. + */ +static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } +static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } +static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; } +static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } +static inline int pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_PSE; } +static inline int pte_global(pte_t pte) { return pte_val(pte) & _PAGE_GLOBAL; } +static inline int pte_exec(pte_t pte) { return !(pte_val(pte) & _PAGE_NX); } + +static inline int pmd_large(pmd_t pte) { + return (pmd_val(pte) & (_PAGE_PSE|_PAGE_PRESENT)) == + (_PAGE_PSE|_PAGE_PRESENT); +} + +static inline pte_t pte_mkclean(pte_t pte) { return __pte(pte_val(pte) & ~(pteval_t)_PAGE_DIRTY); } +static inline pte_t pte_mkold(pte_t pte) { return __pte(pte_val(pte) & ~(pteval_t)_PAGE_ACCESSED); } +static inline pte_t pte_wrprotect(pte_t pte) { return __pte(pte_val(pte) & ~(pteval_t)_PAGE_RW); } +static inline pte_t pte_mkexec(pte_t pte) { return __pte(pte_val(pte) & ~(pteval_t)_PAGE_NX); } +static inline pte_t pte_mkdirty(pte_t pte) { return __pte(pte_val(pte) | _PAGE_DIRTY); } +static inline pte_t pte_mkyoung(pte_t pte) { return __pte(pte_val(pte) | _PAGE_ACCESSED); } +static inline pte_t pte_mkwrite(pte_t pte) { return __pte(pte_val(pte) | _PAGE_RW); } +static inline pte_t pte_mkhuge(pte_t pte) { return __pte(pte_val(pte) | _PAGE_PSE); } +static inline pte_t pte_clrhuge(pte_t pte) { return __pte(pte_val(pte) & ~(pteval_t)_PAGE_PSE); } +static inline pte_t pte_mkglobal(pte_t pte) { return __pte(pte_val(pte) | _PAGE_GLOBAL); } +static inline pte_t pte_clrglobal(pte_t pte) { return __pte(pte_val(pte) & ~(pteval_t)_PAGE_GLOBAL); } + +extern pteval_t __supported_pte_mask; + +static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) +{ + return __pte((((phys_addr_t)page_nr << PAGE_SHIFT) | + pgprot_val(pgprot)) & __supported_pte_mask); +} + +static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) +{ + return __pmd((((phys_addr_t)page_nr << PAGE_SHIFT) | + pgprot_val(pgprot)) & __supported_pte_mask); +} + +static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) +{ + pteval_t val = pte_val(pte); + + /* + * Chop off the NX bit (if present), and add the NX portion of + * the newprot (if present): + */ + val &= _PAGE_CHG_MASK & ~_PAGE_NX; + val |= pgprot_val(newprot) & __supported_pte_mask; + + return __pte(val); +} + +#define pte_pgprot(x) __pgprot(pte_val(x) & (0xfff | _PAGE_NX)) + +#define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask) + +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else /* !CONFIG_PARAVIRT */ +#define set_pte(ptep, pte) native_set_pte(ptep, pte) +#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte) + +#define set_pte_present(mm, addr, ptep, pte) \ + native_set_pte_present(mm, addr, ptep, pte) +#define set_pte_atomic(ptep, pte) \ + native_set_pte_atomic(ptep, pte) + +#define set_pmd(pmdp, pmd) native_set_pmd(pmdp, pmd) + +#ifndef __PAGETABLE_PUD_FOLDED +#define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd) +#define pgd_clear(pgd) native_pgd_clear(pgd) +#endif + +#ifndef set_pud +# define set_pud(pudp, pud) native_set_pud(pudp, pud) +#endif + +#ifndef __PAGETABLE_PMD_FOLDED +#define pud_clear(pud) native_pud_clear(pud) +#endif + +#define pte_clear(mm, addr, ptep) native_pte_clear(mm, addr, ptep) +#define pmd_clear(pmd) native_pmd_clear(pmd) + +#define pte_update(mm, addr, ptep) do { } while (0) +#define pte_update_defer(mm, addr, ptep) do { } while (0) +#endif /* CONFIG_PARAVIRT */ + +#endif /* __ASSEMBLY__ */ + #ifdef CONFIG_X86_32 # include "pgtable_32.h" #else # include "pgtable_64.h" #endif + +#ifndef __ASSEMBLY__ + +enum { + PG_LEVEL_NONE, + PG_LEVEL_4K, + PG_LEVEL_2M, + PG_LEVEL_1G, +}; + +/* + * Helper function that returns the kernel pagetable entry controlling + * the virtual address 'address'. NULL means no pagetable entry present. + * NOTE: the return type is pte_t but if the pmd is PSE then we return it + * as a pte too. + */ +extern pte_t *lookup_address(unsigned long address, int *level); + +/* local pte updates need not use xchg for locking */ +static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) +{ + pte_t res = *ptep; + + /* Pure native function needs no input for mm, addr */ + native_pte_clear(NULL, 0, ptep); + return res; +} + +static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep , pte_t pte) +{ + native_set_pte(ptep, pte); +} + +#ifndef CONFIG_PARAVIRT +/* + * Rules for using pte_update - it must be called after any PTE update which + * has not been done using the set_pte / clear_pte interfaces. It is used by + * shadow mode hypervisors to resynchronize the shadow page tables. Kernel PTE + * updates should either be sets, clears, or set_pte_atomic for P->P + * transitions, which means this hook should only be called for user PTEs. + * This hook implies a P->P protection or access change has taken place, which + * requires a subsequent TLB flush. The notification can optionally be delayed + * until the TLB flush event by using the pte_update_defer form of the + * interface, but care must be taken to assure that the flush happens while + * still holding the same page table lock so that the shadow and primary pages + * do not become out of sync on SMP. + */ +#define pte_update(mm, addr, ptep) do { } while (0) +#define pte_update_defer(mm, addr, ptep) do { } while (0) +#endif + +/* + * We only update the dirty/accessed state if we set + * the dirty bit by hand in the kernel, since the hardware + * will do the accessed bit for us, and we don't want to + * race with other CPU's that might be updating the dirty + * bit at the same time. + */ +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS +#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \ +({ \ + int __changed = !pte_same(*(ptep), entry); \ + if (__changed && dirty) { \ + *ptep = entry; \ + pte_update_defer((vma)->vm_mm, (address), (ptep)); \ + flush_tlb_page(vma, address); \ + } \ + __changed; \ +}) + +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +#define ptep_test_and_clear_young(vma, addr, ptep) ({ \ + int __ret = 0; \ + if (pte_young(*(ptep))) \ + __ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, \ + &(ptep)->pte); \ + if (__ret) \ + pte_update((vma)->vm_mm, addr, ptep); \ + __ret; \ +}) + +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH +#define ptep_clear_flush_young(vma, address, ptep) \ +({ \ + int __young; \ + __young = ptep_test_and_clear_young((vma), (address), (ptep)); \ + if (__young) \ + flush_tlb_page(vma, address); \ + __young; \ +}) + +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + pte_t pte = native_ptep_get_and_clear(ptep); + pte_update(mm, addr, ptep); + return pte; +} + +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL +static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full) +{ + pte_t pte; + if (full) { + /* + * Full address destruction in progress; paravirt does not + * care about updates and native needs no locking + */ + pte = native_local_ptep_get_and_clear(ptep); + } else { + pte = ptep_get_and_clear(mm, addr, ptep); + } + return pte; +} + +#define __HAVE_ARCH_PTEP_SET_WRPROTECT +static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte); + pte_update(mm, addr, ptep); +} + +#include <asm-generic/pgtable.h> +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_PGTABLE_H */ diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h index ed3e70d8d04b..21e70fbf1dae 100644 --- a/include/asm-x86/pgtable_32.h +++ b/include/asm-x86/pgtable_32.h @@ -25,20 +25,11 @@ struct mm_struct; struct vm_area_struct; -/* - * ZERO_PAGE is a global shared page that is always zero: used - * for zero-mapped memory areas etc.. - */ -#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) -extern unsigned long empty_zero_page[1024]; extern pgd_t swapper_pg_dir[1024]; extern struct kmem_cache *pmd_cache; -extern spinlock_t pgd_lock; -extern struct page *pgd_list; void check_pgt_cache(void); -void pmd_ctor(struct kmem_cache *, void *); -void pgtable_cache_init(void); +static inline void pgtable_cache_init(void) {} void paging_init(void); @@ -58,9 +49,6 @@ void paging_init(void); #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) -#define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE) -#define FIRST_USER_ADDRESS 0 - #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT) #define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS) @@ -85,113 +73,6 @@ void paging_init(void); #endif /* - * _PAGE_PSE set in the page directory entry just means that - * the page directory entry points directly to a 4MB-aligned block of - * memory. - */ -#define _PAGE_BIT_PRESENT 0 -#define _PAGE_BIT_RW 1 -#define _PAGE_BIT_USER 2 -#define _PAGE_BIT_PWT 3 -#define _PAGE_BIT_PCD 4 -#define _PAGE_BIT_ACCESSED 5 -#define _PAGE_BIT_DIRTY 6 -#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page, Pentium+, if present.. */ -#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ -#define _PAGE_BIT_UNUSED1 9 /* available for programmer */ -#define _PAGE_BIT_UNUSED2 10 -#define _PAGE_BIT_UNUSED3 11 -#define _PAGE_BIT_NX 63 - -#define _PAGE_PRESENT 0x001 -#define _PAGE_RW 0x002 -#define _PAGE_USER 0x004 -#define _PAGE_PWT 0x008 -#define _PAGE_PCD 0x010 -#define _PAGE_ACCESSED 0x020 -#define _PAGE_DIRTY 0x040 -#define _PAGE_PSE 0x080 /* 4 MB (or 2MB) page, Pentium+, if present.. */ -#define _PAGE_GLOBAL 0x100 /* Global TLB entry PPro+ */ -#define _PAGE_UNUSED1 0x200 /* available for programmer */ -#define _PAGE_UNUSED2 0x400 -#define _PAGE_UNUSED3 0x800 - -/* If _PAGE_PRESENT is clear, we use these: */ -#define _PAGE_FILE 0x040 /* nonlinear file mapping, saved PTE; unset:swap */ -#define _PAGE_PROTNONE 0x080 /* if the user mapped it with PROT_NONE; - pte_present gives true */ -#ifdef CONFIG_X86_PAE -#define _PAGE_NX (1ULL<<_PAGE_BIT_NX) -#else -#define _PAGE_NX 0 -#endif - -#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) -#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) -#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) - -#define PAGE_NONE \ - __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) -#define PAGE_SHARED \ - __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED) - -#define PAGE_SHARED_EXEC \ - __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED) -#define PAGE_COPY_NOEXEC \ - __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX) -#define PAGE_COPY_EXEC \ - __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) -#define PAGE_COPY \ - PAGE_COPY_NOEXEC -#define PAGE_READONLY \ - __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX) -#define PAGE_READONLY_EXEC \ - __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) - -#define _PAGE_KERNEL \ - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX) -#define _PAGE_KERNEL_EXEC \ - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) - -extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC; -#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) -#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) -#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD) -#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) -#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) - -#define PAGE_KERNEL __pgprot(__PAGE_KERNEL) -#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO) -#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC) -#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX) -#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE) -#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE) -#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC) - -/* - * The i386 can't do page protection for execute, and considers that - * the same are read. Also, write permissions imply read permissions. - * This is the closest we can get.. - */ -#define __P000 PAGE_NONE -#define __P001 PAGE_READONLY -#define __P010 PAGE_COPY -#define __P011 PAGE_COPY -#define __P100 PAGE_READONLY_EXEC -#define __P101 PAGE_READONLY_EXEC -#define __P110 PAGE_COPY_EXEC -#define __P111 PAGE_COPY_EXEC - -#define __S000 PAGE_NONE -#define __S001 PAGE_READONLY -#define __S010 PAGE_SHARED -#define __S011 PAGE_SHARED -#define __S100 PAGE_READONLY_EXEC -#define __S101 PAGE_READONLY_EXEC -#define __S110 PAGE_SHARED_EXEC -#define __S111 PAGE_SHARED_EXEC - -/* * Define this if things work differently on an i386 and an i486: * it will (on an i486) warn about kernel memory accesses that are * done without a 'access_ok(VERIFY_WRITE,..)' @@ -211,133 +92,12 @@ extern unsigned long pg0[]; #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) -/* - * The following only work if pte_present() is true. - * Undefined behaviour if not.. - */ -static inline int pte_dirty(pte_t pte) { return (pte).pte_low & _PAGE_DIRTY; } -static inline int pte_young(pte_t pte) { return (pte).pte_low & _PAGE_ACCESSED; } -static inline int pte_write(pte_t pte) { return (pte).pte_low & _PAGE_RW; } -static inline int pte_huge(pte_t pte) { return (pte).pte_low & _PAGE_PSE; } - -/* - * The following only works if pte_present() is not true. - */ -static inline int pte_file(pte_t pte) { return (pte).pte_low & _PAGE_FILE; } - -static inline pte_t pte_mkclean(pte_t pte) { (pte).pte_low &= ~_PAGE_DIRTY; return pte; } -static inline pte_t pte_mkold(pte_t pte) { (pte).pte_low &= ~_PAGE_ACCESSED; return pte; } -static inline pte_t pte_wrprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_RW; return pte; } -static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte_low |= _PAGE_DIRTY; return pte; } -static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; return pte; } -static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; } -static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return pte; } - #ifdef CONFIG_X86_PAE # include <asm/pgtable-3level.h> #else # include <asm/pgtable-2level.h> #endif -#ifndef CONFIG_PARAVIRT -/* - * Rules for using pte_update - it must be called after any PTE update which - * has not been done using the set_pte / clear_pte interfaces. It is used by - * shadow mode hypervisors to resynchronize the shadow page tables. Kernel PTE - * updates should either be sets, clears, or set_pte_atomic for P->P - * transitions, which means this hook should only be called for user PTEs. - * This hook implies a P->P protection or access change has taken place, which - * requires a subsequent TLB flush. The notification can optionally be delayed - * until the TLB flush event by using the pte_update_defer form of the - * interface, but care must be taken to assure that the flush happens while - * still holding the same page table lock so that the shadow and primary pages - * do not become out of sync on SMP. - */ -#define pte_update(mm, addr, ptep) do { } while (0) -#define pte_update_defer(mm, addr, ptep) do { } while (0) -#endif - -/* local pte updates need not use xchg for locking */ -static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) -{ - pte_t res = *ptep; - - /* Pure native function needs no input for mm, addr */ - native_pte_clear(NULL, 0, ptep); - return res; -} - -/* - * We only update the dirty/accessed state if we set - * the dirty bit by hand in the kernel, since the hardware - * will do the accessed bit for us, and we don't want to - * race with other CPU's that might be updating the dirty - * bit at the same time. - */ -#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS -#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \ -({ \ - int __changed = !pte_same(*(ptep), entry); \ - if (__changed && dirty) { \ - (ptep)->pte_low = (entry).pte_low; \ - pte_update_defer((vma)->vm_mm, (address), (ptep)); \ - flush_tlb_page(vma, address); \ - } \ - __changed; \ -}) - -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -#define ptep_test_and_clear_young(vma, addr, ptep) ({ \ - int __ret = 0; \ - if (pte_young(*(ptep))) \ - __ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, \ - &(ptep)->pte_low); \ - if (__ret) \ - pte_update((vma)->vm_mm, addr, ptep); \ - __ret; \ -}) - -#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH -#define ptep_clear_flush_young(vma, address, ptep) \ -({ \ - int __young; \ - __young = ptep_test_and_clear_young((vma), (address), (ptep)); \ - if (__young) \ - flush_tlb_page(vma, address); \ - __young; \ -}) - -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR -static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) -{ - pte_t pte = native_ptep_get_and_clear(ptep); - pte_update(mm, addr, ptep); - return pte; -} - -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL -static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full) -{ - pte_t pte; - if (full) { - /* - * Full address destruction in progress; paravirt does not - * care about updates and native needs no locking - */ - pte = native_local_ptep_get_and_clear(ptep); - } else { - pte = ptep_get_and_clear(mm, addr, ptep); - } - return pte; -} - -#define __HAVE_ARCH_PTEP_SET_WRPROTECT -static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) -{ - clear_bit(_PAGE_BIT_RW, &ptep->pte_low); - pte_update(mm, addr, ptep); -} - /* * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); * @@ -367,25 +127,6 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) -static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) -{ - pte.pte_low &= _PAGE_CHG_MASK; - pte.pte_low |= pgprot_val(newprot); -#ifdef CONFIG_X86_PAE - /* - * Chop off the NX bit (if present), and add the NX portion of - * the newprot (if present): - */ - pte.pte_high &= ~(1 << (_PAGE_BIT_NX - 32)); - pte.pte_high |= (pgprot_val(newprot) >> 32) & \ - (__supported_pte_mask >> 32); -#endif - return pte; -} - -#define pmd_large(pmd) \ -((pmd_val(pmd) & (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT)) - /* * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD] * @@ -432,26 +173,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #define pmd_page_vaddr(pmd) \ ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) -/* - * Helper function that returns the kernel pagetable entry controlling - * the virtual address 'address'. NULL means no pagetable entry present. - * NOTE: the return type is pte_t but if the pmd is PSE then we return it - * as a pte too. - */ -extern pte_t *lookup_address(unsigned long address); - -/* - * Make a given kernel text page executable/non-executable. - * Returns the previous executability setting of that page (which - * is used to restore the previous state). Used by the SMP bootup code. - * NOTE: this is an __init function for security reasons. - */ -#ifdef CONFIG_X86_PAE - extern int set_kernel_exec(unsigned long vaddr, int enable); -#else - static inline int set_kernel_exec(unsigned long vaddr, int enable) { return 0;} -#endif - #if defined(CONFIG_HIGHPTE) #define pte_offset_map(dir, address) \ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + pte_index(address)) @@ -497,13 +218,17 @@ static inline void paravirt_pagetable_setup_done(pgd_t *base) #endif /* !__ASSEMBLY__ */ +/* + * kern_addr_valid() is (1) for FLATMEM and (0) for + * SPARSEMEM and DISCONTIGMEM + */ #ifdef CONFIG_FLATMEM #define kern_addr_valid(addr) (1) -#endif /* CONFIG_FLATMEM */ +#else +#define kern_addr_valid(kaddr) (0) +#endif #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ remap_pfn_range(vma, vaddr, pfn, size, prot) -#include <asm-generic/pgtable.h> - #endif /* _I386_PGTABLE_H */ diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h index 9b0ff477b39e..6e615a103c2f 100644 --- a/include/asm-x86/pgtable_64.h +++ b/include/asm-x86/pgtable_64.h @@ -17,22 +17,16 @@ extern pud_t level3_kernel_pgt[512]; extern pud_t level3_ident_pgt[512]; extern pmd_t level2_kernel_pgt[512]; extern pgd_t init_level4_pgt[]; -extern unsigned long __supported_pte_mask; #define swapper_pg_dir init_level4_pgt extern void paging_init(void); extern void clear_kernel_mapping(unsigned long addr, unsigned long size); -/* - * ZERO_PAGE is a global shared page that is always zero: used - * for zero-mapped memory areas etc.. - */ -extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; -#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) - #endif /* !__ASSEMBLY__ */ +#define SHARED_KERNEL_PMD 1 + /* * PGDIR_SHIFT determines what a top-level page table entry can map */ @@ -71,57 +65,68 @@ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; #define pgd_none(x) (!pgd_val(x)) #define pud_none(x) (!pud_val(x)) -static inline void set_pte(pte_t *dst, pte_t val) +struct mm_struct; + +static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + *ptep = native_make_pte(0); +} + +static inline void native_set_pte(pte_t *ptep, pte_t pte) { - pte_val(*dst) = pte_val(val); -} -#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval) + *ptep = pte; +} -static inline void set_pmd(pmd_t *dst, pmd_t val) +static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) { - pmd_val(*dst) = pmd_val(val); -} + native_set_pte(ptep, pte); +} -static inline void set_pud(pud_t *dst, pud_t val) +static inline pte_t native_ptep_get_and_clear(pte_t *xp) { - pud_val(*dst) = pud_val(val); +#ifdef CONFIG_SMP + return native_make_pte(xchg(&xp->pte, 0)); +#else + /* native_local_ptep_get_and_clear, but duplicated because of cyclic dependency */ + pte_t ret = *xp; + native_pte_clear(NULL, 0, xp); + return ret; +#endif } -static inline void pud_clear (pud_t *pud) +static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) { - set_pud(pud, __pud(0)); + *pmdp = pmd; } -static inline void set_pgd(pgd_t *dst, pgd_t val) +static inline void native_pmd_clear(pmd_t *pmd) { - pgd_val(*dst) = pgd_val(val); -} + native_set_pmd(pmd, native_make_pmd(0)); +} -static inline void pgd_clear (pgd_t * pgd) +static inline void native_set_pud(pud_t *pudp, pud_t pud) { - set_pgd(pgd, __pgd(0)); + *pudp = pud; } -#define ptep_get_and_clear(mm,addr,xp) __pte(xchg(&(xp)->pte, 0)) +static inline void native_pud_clear(pud_t *pud) +{ + native_set_pud(pud, native_make_pud(0)); +} -struct mm_struct; +static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) +{ + *pgdp = pgd; +} -static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full) +static inline void native_pgd_clear(pgd_t * pgd) { - pte_t pte; - if (full) { - pte = *ptep; - *ptep = __pte(0); - } else { - pte = ptep_get_and_clear(mm, addr, ptep); - } - return pte; + native_set_pgd(pgd, native_make_pgd(0)); } #define pte_same(a, b) ((a).pte == (b).pte) -#define pte_pgprot(a) (__pgprot((a).pte & ~PHYSICAL_PAGE_MASK)) - #endif /* !__ASSEMBLY__ */ #define PMD_SIZE (_AC(1,UL) << PMD_SHIFT) @@ -131,8 +136,6 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long #define PGDIR_SIZE (_AC(1,UL) << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) -#define USER_PTRS_PER_PGD ((TASK_SIZE-1)/PGDIR_SIZE+1) -#define FIRST_USER_ADDRESS 0 #define MAXMEM _AC(0x3fffffffffff, UL) #define VMALLOC_START _AC(0xffffc20000000000, UL) @@ -142,91 +145,6 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long #define MODULES_END _AC(0xfffffffffff00000, UL) #define MODULES_LEN (MODULES_END - MODULES_VADDR) -#define _PAGE_BIT_PRESENT 0 -#define _PAGE_BIT_RW 1 -#define _PAGE_BIT_USER 2 -#define _PAGE_BIT_PWT 3 -#define _PAGE_BIT_PCD 4 -#define _PAGE_BIT_ACCESSED 5 -#define _PAGE_BIT_DIRTY 6 -#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */ -#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ -#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ - -#define _PAGE_PRESENT 0x001 -#define _PAGE_RW 0x002 -#define _PAGE_USER 0x004 -#define _PAGE_PWT 0x008 -#define _PAGE_PCD 0x010 -#define _PAGE_ACCESSED 0x020 -#define _PAGE_DIRTY 0x040 -#define _PAGE_PSE 0x080 /* 2MB page */ -#define _PAGE_FILE 0x040 /* nonlinear file mapping, saved PTE; unset:swap */ -#define _PAGE_GLOBAL 0x100 /* Global TLB entry */ - -#define _PAGE_PROTNONE 0x080 /* If not present */ -#define _PAGE_NX (_AC(1,UL)<<_PAGE_BIT_NX) - -#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) -#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) - -#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) - -#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) -#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX) -#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED) -#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX) -#define PAGE_COPY PAGE_COPY_NOEXEC -#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) -#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX) -#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) -#define __PAGE_KERNEL \ - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX) -#define __PAGE_KERNEL_EXEC \ - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) -#define __PAGE_KERNEL_NOCACHE \ - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED | _PAGE_NX) -#define __PAGE_KERNEL_RO \ - (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX) -#define __PAGE_KERNEL_VSYSCALL \ - (_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) -#define __PAGE_KERNEL_VSYSCALL_NOCACHE \ - (_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_PCD) -#define __PAGE_KERNEL_LARGE \ - (__PAGE_KERNEL | _PAGE_PSE) -#define __PAGE_KERNEL_LARGE_EXEC \ - (__PAGE_KERNEL_EXEC | _PAGE_PSE) - -#define MAKE_GLOBAL(x) __pgprot((x) | _PAGE_GLOBAL) - -#define PAGE_KERNEL MAKE_GLOBAL(__PAGE_KERNEL) -#define PAGE_KERNEL_EXEC MAKE_GLOBAL(__PAGE_KERNEL_EXEC) -#define PAGE_KERNEL_RO MAKE_GLOBAL(__PAGE_KERNEL_RO) -#define PAGE_KERNEL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE) -#define PAGE_KERNEL_VSYSCALL32 __pgprot(__PAGE_KERNEL_VSYSCALL) -#define PAGE_KERNEL_VSYSCALL MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL) -#define PAGE_KERNEL_LARGE MAKE_GLOBAL(__PAGE_KERNEL_LARGE) -#define PAGE_KERNEL_VSYSCALL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL_NOCACHE) - -/* xwr */ -#define __P000 PAGE_NONE -#define __P001 PAGE_READONLY -#define __P010 PAGE_COPY -#define __P011 PAGE_COPY -#define __P100 PAGE_READONLY_EXEC -#define __P101 PAGE_READONLY_EXEC -#define __P110 PAGE_COPY_EXEC -#define __P111 PAGE_COPY_EXEC - -#define __S000 PAGE_NONE -#define __S001 PAGE_READONLY -#define __S010 PAGE_SHARED -#define __S011 PAGE_SHARED -#define __S100 PAGE_READONLY_EXEC -#define __S101 PAGE_READONLY_EXEC -#define __S110 PAGE_SHARED_EXEC -#define __S111 PAGE_SHARED_EXEC - #ifndef __ASSEMBLY__ static inline unsigned long pgd_bad(pgd_t pgd) @@ -246,66 +164,16 @@ static inline unsigned long pmd_bad(pmd_t pmd) #define pte_none(x) (!pte_val(x)) #define pte_present(x) (pte_val(x) & (_PAGE_PRESENT | _PAGE_PROTNONE)) -#define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0) -#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) /* FIXME: is this - right? */ +#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) /* FIXME: is this right? */ #define pte_page(x) pfn_to_page(pte_pfn(x)) #define pte_pfn(x) ((pte_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT) -static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) -{ - pte_t pte; - pte_val(pte) = (page_nr << PAGE_SHIFT); - pte_val(pte) |= pgprot_val(pgprot); - pte_val(pte) &= __supported_pte_mask; - return pte; -} - -/* - * The following only work if pte_present() is true. - * Undefined behaviour if not.. - */ -#define __LARGE_PTE (_PAGE_PSE|_PAGE_PRESENT) -static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } -static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; } -static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } -static inline int pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_PSE; } - -static inline pte_t pte_mkclean(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_DIRTY)); return pte; } -static inline pte_t pte_mkold(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_ACCESSED)); return pte; } -static inline pte_t pte_wrprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_RW)); return pte; } -static inline pte_t pte_mkexec(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_NX)); return pte; } -static inline pte_t pte_mkdirty(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); return pte; } -static inline pte_t pte_mkyoung(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; } -static inline pte_t pte_mkwrite(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_RW)); return pte; } -static inline pte_t pte_mkhuge(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_PSE)); return pte; } -static inline pte_t pte_clrhuge(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_PSE)); return pte; } - -struct vm_area_struct; - -static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) -{ - if (!pte_young(*ptep)) - return 0; - return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte); -} - -static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) -{ - clear_bit(_PAGE_BIT_RW, &ptep->pte); -} - /* * Macro to mark a page protection value as "uncacheable". */ #define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT)) -static inline int pmd_large(pmd_t pte) { - return (pmd_val(pte) & __LARGE_PTE) == __LARGE_PTE; -} - /* * Conversion functions: convert a page and protection to a page entry, @@ -340,29 +208,18 @@ static inline int pmd_large(pmd_t pte) { pmd_index(address)) #define pmd_none(x) (!pmd_val(x)) #define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) -#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) #define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot))) #define pmd_pfn(x) ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT) #define pte_to_pgoff(pte) ((pte_val(pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT) -#define pgoff_to_pte(off) ((pte_t) { ((off) << PAGE_SHIFT) | _PAGE_FILE }) +#define pgoff_to_pte(off) ((pte_t) { .pte = ((off) << PAGE_SHIFT) | _PAGE_FILE }) #define PTE_FILE_MAX_BITS __PHYSICAL_MASK_SHIFT /* PTE - Level 1 access. */ /* page, protection -> pte */ #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) -#define mk_pte_huge(entry) (pte_val(entry) |= _PAGE_PRESENT | _PAGE_PSE) -/* Change flags of a PTE */ -static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) -{ - pte_val(pte) &= _PAGE_CHG_MASK; - pte_val(pte) |= pgprot_val(newprot); - pte_val(pte) &= __supported_pte_mask; - return pte; -} - #define pte_index(address) \ (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) #define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \ @@ -376,40 +233,20 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #define update_mmu_cache(vma,address,pte) do { } while (0) -/* We only update the dirty/accessed state if we set - * the dirty bit by hand in the kernel, since the hardware - * will do the accessed bit for us, and we don't want to - * race with other CPU's that might be updating the dirty - * bit at the same time. */ -#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS -#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \ -({ \ - int __changed = !pte_same(*(__ptep), __entry); \ - if (__changed && __dirty) { \ - set_pte(__ptep, __entry); \ - flush_tlb_page(__vma, __address); \ - } \ - __changed; \ -}) - /* Encode and de-code a swap entry */ #define __swp_type(x) (((x).val >> 1) & 0x3f) #define __swp_offset(x) ((x).val >> 8) #define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 8) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) -#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) - -extern spinlock_t pgd_lock; -extern struct list_head pgd_list; +#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) extern int kern_addr_valid(unsigned long addr); -pte_t *lookup_address(unsigned long addr); - #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ remap_pfn_range(vma, vaddr, pfn, size, prot) #define HAVE_ARCH_UNMAPPED_AREA +#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN #define pgtable_cache_init() do { } while (0) #define check_pgt_cache() do { } while (0) @@ -422,12 +259,7 @@ pte_t *lookup_address(unsigned long addr); #define kc_offset_to_vaddr(o) \ (((o) & (1UL << (__VIRTUAL_MASK_SHIFT-1))) ? ((o) | (~__VIRTUAL_MASK)) : (o)) -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL -#define __HAVE_ARCH_PTEP_SET_WRPROTECT #define __HAVE_ARCH_PTE_SAME -#include <asm-generic/pgtable.h> #endif /* !__ASSEMBLY__ */ #endif /* _X86_64_PGTABLE_H */ diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index 46e1c04e309c..ab4d0c2a3f8f 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h @@ -1,5 +1,842 @@ +#ifndef __ASM_X86_PROCESSOR_H +#define __ASM_X86_PROCESSOR_H + +#include <asm/processor-flags.h> + +/* migration helpers, for KVM - will be removed in 2.6.25: */ +#include <asm/vm86.h> +#define Xgt_desc_struct desc_ptr + +/* Forward declaration, a strange C thing */ +struct task_struct; +struct mm_struct; + +#include <asm/vm86.h> +#include <asm/math_emu.h> +#include <asm/segment.h> +#include <asm/types.h> +#include <asm/sigcontext.h> +#include <asm/current.h> +#include <asm/cpufeature.h> +#include <asm/system.h> +#include <asm/page.h> +#include <asm/percpu.h> +#include <asm/msr.h> +#include <asm/desc_defs.h> +#include <asm/nops.h> +#include <linux/personality.h> +#include <linux/cpumask.h> +#include <linux/cache.h> +#include <linux/threads.h> +#include <linux/init.h> + +/* + * Default implementation of macro that returns current + * instruction pointer ("program counter"). + */ +static inline void *current_text_addr(void) +{ + void *pc; + asm volatile("mov $1f,%0\n1:":"=r" (pc)); + return pc; +} + +#ifdef CONFIG_X86_VSMP +#define ARCH_MIN_TASKALIGN (1 << INTERNODE_CACHE_SHIFT) +#define ARCH_MIN_MMSTRUCT_ALIGN (1 << INTERNODE_CACHE_SHIFT) +#else +#define ARCH_MIN_TASKALIGN 16 +#define ARCH_MIN_MMSTRUCT_ALIGN 0 +#endif + +/* + * CPU type and hardware bug flags. Kept separately for each CPU. + * Members of this structure are referenced in head.S, so think twice + * before touching them. [mj] + */ + +struct cpuinfo_x86 { + __u8 x86; /* CPU family */ + __u8 x86_vendor; /* CPU vendor */ + __u8 x86_model; + __u8 x86_mask; +#ifdef CONFIG_X86_32 + char wp_works_ok; /* It doesn't on 386's */ + char hlt_works_ok; /* Problems on some 486Dx4's and old 386's */ + char hard_math; + char rfu; + char fdiv_bug; + char f00f_bug; + char coma_bug; + char pad0; +#else + /* number of 4K pages in DTLB/ITLB combined(in pages)*/ + int x86_tlbsize; + __u8 x86_virt_bits, x86_phys_bits; + /* cpuid returned core id bits */ + __u8 x86_coreid_bits; + /* Max extended CPUID function supported */ + __u32 extended_cpuid_level; +#endif + int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */ + __u32 x86_capability[NCAPINTS]; + char x86_vendor_id[16]; + char x86_model_id[64]; + int x86_cache_size; /* in KB - valid for CPUS which support this + call */ + int x86_cache_alignment; /* In bytes */ + int x86_power; + unsigned long loops_per_jiffy; +#ifdef CONFIG_SMP + cpumask_t llc_shared_map; /* cpus sharing the last level cache */ +#endif + u16 x86_max_cores; /* cpuid returned max cores value */ + u16 apicid; + u16 x86_clflush_size; +#ifdef CONFIG_SMP + u16 booted_cores; /* number of cores as seen by OS */ + u16 phys_proc_id; /* Physical processor id. */ + u16 cpu_core_id; /* Core id */ + u16 cpu_index; /* index into per_cpu list */ +#endif +} __attribute__((__aligned__(SMP_CACHE_BYTES))); + +#define X86_VENDOR_INTEL 0 +#define X86_VENDOR_CYRIX 1 +#define X86_VENDOR_AMD 2 +#define X86_VENDOR_UMC 3 +#define X86_VENDOR_NEXGEN 4 +#define X86_VENDOR_CENTAUR 5 +#define X86_VENDOR_TRANSMETA 7 +#define X86_VENDOR_NSC 8 +#define X86_VENDOR_NUM 9 +#define X86_VENDOR_UNKNOWN 0xff + +/* + * capabilities of CPUs + */ +extern struct cpuinfo_x86 boot_cpu_data; +extern struct cpuinfo_x86 new_cpu_data; +extern struct tss_struct doublefault_tss; +extern __u32 cleared_cpu_caps[NCAPINTS]; + +#ifdef CONFIG_SMP +DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info); +#define cpu_data(cpu) per_cpu(cpu_info, cpu) +#define current_cpu_data cpu_data(smp_processor_id()) +#else +#define cpu_data(cpu) boot_cpu_data +#define current_cpu_data boot_cpu_data +#endif + +void cpu_detect(struct cpuinfo_x86 *c); + +extern void identify_cpu(struct cpuinfo_x86 *); +extern void identify_boot_cpu(void); +extern void identify_secondary_cpu(struct cpuinfo_x86 *); +extern void print_cpu_info(struct cpuinfo_x86 *); +extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); +extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); +extern unsigned short num_cache_leaves; + +#if defined(CONFIG_X86_HT) || defined(CONFIG_X86_64) +extern void detect_ht(struct cpuinfo_x86 *c); +#else +static inline void detect_ht(struct cpuinfo_x86 *c) {} +#endif + +static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + /* ecx is often an input as well as an output. */ + __asm__("cpuid" + : "=a" (*eax), + "=b" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (*eax), "2" (*ecx)); +} + +static inline void load_cr3(pgd_t *pgdir) +{ + write_cr3(__pa(pgdir)); +} + +#ifdef CONFIG_X86_32 +/* This is the TSS defined by the hardware. */ +struct x86_hw_tss { + unsigned short back_link, __blh; + unsigned long sp0; + unsigned short ss0, __ss0h; + unsigned long sp1; + unsigned short ss1, __ss1h; /* ss1 caches MSR_IA32_SYSENTER_CS */ + unsigned long sp2; + unsigned short ss2, __ss2h; + unsigned long __cr3; + unsigned long ip; + unsigned long flags; + unsigned long ax, cx, dx, bx; + unsigned long sp, bp, si, di; + unsigned short es, __esh; + unsigned short cs, __csh; + unsigned short ss, __ssh; + unsigned short ds, __dsh; + unsigned short fs, __fsh; + unsigned short gs, __gsh; + unsigned short ldt, __ldth; + unsigned short trace, io_bitmap_base; +} __attribute__((packed)); +#else +struct x86_hw_tss { + u32 reserved1; + u64 sp0; + u64 sp1; + u64 sp2; + u64 reserved2; + u64 ist[7]; + u32 reserved3; + u32 reserved4; + u16 reserved5; + u16 io_bitmap_base; +} __attribute__((packed)) ____cacheline_aligned; +#endif + +/* + * Size of io_bitmap. + */ +#define IO_BITMAP_BITS 65536 +#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8) +#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) +#define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap) +#define INVALID_IO_BITMAP_OFFSET 0x8000 +#define INVALID_IO_BITMAP_OFFSET_LAZY 0x9000 + +struct tss_struct { + struct x86_hw_tss x86_tss; + + /* + * The extra 1 is there because the CPU will access an + * additional byte beyond the end of the IO permission + * bitmap. The extra byte must be all 1 bits, and must + * be within the limit. + */ + unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; + /* + * Cache the current maximum and the last task that used the bitmap: + */ + unsigned long io_bitmap_max; + struct thread_struct *io_bitmap_owner; + /* + * pads the TSS to be cacheline-aligned (size is 0x100) + */ + unsigned long __cacheline_filler[35]; + /* + * .. and then another 0x100 bytes for emergency kernel stack + */ + unsigned long stack[64]; +} __attribute__((packed)); + +DECLARE_PER_CPU(struct tss_struct, init_tss); + +/* Save the original ist values for checking stack pointers during debugging */ +struct orig_ist { + unsigned long ist[7]; +}; + +#define MXCSR_DEFAULT 0x1f80 + +struct i387_fsave_struct { + u32 cwd; + u32 swd; + u32 twd; + u32 fip; + u32 fcs; + u32 foo; + u32 fos; + u32 st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ + u32 status; /* software status information */ +}; + +struct i387_fxsave_struct { + u16 cwd; + u16 swd; + u16 twd; + u16 fop; + union { + struct { + u64 rip; + u64 rdp; + }; + struct { + u32 fip; + u32 fcs; + u32 foo; + u32 fos; + }; + }; + u32 mxcsr; + u32 mxcsr_mask; + u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ + u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */ + u32 padding[24]; +} __attribute__((aligned(16))); + +struct i387_soft_struct { + u32 cwd; + u32 swd; + u32 twd; + u32 fip; + u32 fcs; + u32 foo; + u32 fos; + u32 st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ + u8 ftop, changed, lookahead, no_update, rm, alimit; + struct info *info; + u32 entry_eip; +}; + +union i387_union { + struct i387_fsave_struct fsave; + struct i387_fxsave_struct fxsave; + struct i387_soft_struct soft; +}; + +#ifdef CONFIG_X86_32 +/* + * the following now lives in the per cpu area: + * extern int cpu_llc_id[NR_CPUS]; + */ +DECLARE_PER_CPU(u8, cpu_llc_id); +#else +DECLARE_PER_CPU(struct orig_ist, orig_ist); +#endif + +extern void print_cpu_info(struct cpuinfo_x86 *); +extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); +extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); +extern unsigned short num_cache_leaves; + +struct thread_struct { +/* cached TLS descriptors. */ + struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; + unsigned long sp0; + unsigned long sp; +#ifdef CONFIG_X86_32 + unsigned long sysenter_cs; +#else + unsigned long usersp; /* Copy from PDA */ + unsigned short es, ds, fsindex, gsindex; +#endif + unsigned long ip; + unsigned long fs; + unsigned long gs; +/* Hardware debugging registers */ + unsigned long debugreg0; + unsigned long debugreg1; + unsigned long debugreg2; + unsigned long debugreg3; + unsigned long debugreg6; + unsigned long debugreg7; +/* fault info */ + unsigned long cr2, trap_no, error_code; +/* floating point info */ + union i387_union i387 __attribute__((aligned(16)));; +#ifdef CONFIG_X86_32 +/* virtual 86 mode info */ + struct vm86_struct __user *vm86_info; + unsigned long screen_bitmap; + unsigned long v86flags, v86mask, saved_sp0; + unsigned int saved_fs, saved_gs; +#endif +/* IO permissions */ + unsigned long *io_bitmap_ptr; + unsigned long iopl; +/* max allowed port in the bitmap, in bytes: */ + unsigned io_bitmap_max; +/* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ + unsigned long debugctlmsr; +/* Debug Store - if not 0 points to a DS Save Area configuration; + * goes into MSR_IA32_DS_AREA */ + unsigned long ds_area_msr; +}; + +static inline unsigned long native_get_debugreg(int regno) +{ + unsigned long val = 0; /* Damn you, gcc! */ + + switch (regno) { + case 0: + asm("mov %%db0, %0" :"=r" (val)); break; + case 1: + asm("mov %%db1, %0" :"=r" (val)); break; + case 2: + asm("mov %%db2, %0" :"=r" (val)); break; + case 3: + asm("mov %%db3, %0" :"=r" (val)); break; + case 6: + asm("mov %%db6, %0" :"=r" (val)); break; + case 7: + asm("mov %%db7, %0" :"=r" (val)); break; + default: + BUG(); + } + return val; +} + +static inline void native_set_debugreg(int regno, unsigned long value) +{ + switch (regno) { + case 0: + asm("mov %0,%%db0" : /* no output */ :"r" (value)); + break; + case 1: + asm("mov %0,%%db1" : /* no output */ :"r" (value)); + break; + case 2: + asm("mov %0,%%db2" : /* no output */ :"r" (value)); + break; + case 3: + asm("mov %0,%%db3" : /* no output */ :"r" (value)); + break; + case 6: + asm("mov %0,%%db6" : /* no output */ :"r" (value)); + break; + case 7: + asm("mov %0,%%db7" : /* no output */ :"r" (value)); + break; + default: + BUG(); + } +} + +/* + * Set IOPL bits in EFLAGS from given mask + */ +static inline void native_set_iopl_mask(unsigned mask) +{ +#ifdef CONFIG_X86_32 + unsigned int reg; + __asm__ __volatile__ ("pushfl;" + "popl %0;" + "andl %1, %0;" + "orl %2, %0;" + "pushl %0;" + "popfl" + : "=&r" (reg) + : "i" (~X86_EFLAGS_IOPL), "r" (mask)); +#endif +} + +static inline void native_load_sp0(struct tss_struct *tss, + struct thread_struct *thread) +{ + tss->x86_tss.sp0 = thread->sp0; +#ifdef CONFIG_X86_32 + /* Only happens when SEP is enabled, no need to test "SEP"arately */ + if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) { + tss->x86_tss.ss1 = thread->sysenter_cs; + wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); + } +#endif +} + +static inline void native_swapgs(void) +{ +#ifdef CONFIG_X86_64 + asm volatile("swapgs" ::: "memory"); +#endif +} + +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else +#define __cpuid native_cpuid +#define paravirt_enabled() 0 + +/* + * These special macros can be used to get or set a debugging register + */ +#define get_debugreg(var, register) \ + (var) = native_get_debugreg(register) +#define set_debugreg(value, register) \ + native_set_debugreg(register, value) + +static inline void load_sp0(struct tss_struct *tss, + struct thread_struct *thread) +{ + native_load_sp0(tss, thread); +} + +#define set_iopl_mask native_set_iopl_mask +#define SWAPGS swapgs +#endif /* CONFIG_PARAVIRT */ + +/* + * Save the cr4 feature set we're using (ie + * Pentium 4MB enable and PPro Global page + * enable), so that any CPU's that boot up + * after us can get the correct flags. + */ +extern unsigned long mmu_cr4_features; + +static inline void set_in_cr4(unsigned long mask) +{ + unsigned cr4; + mmu_cr4_features |= mask; + cr4 = read_cr4(); + cr4 |= mask; + write_cr4(cr4); +} + +static inline void clear_in_cr4(unsigned long mask) +{ + unsigned cr4; + mmu_cr4_features &= ~mask; + cr4 = read_cr4(); + cr4 &= ~mask; + write_cr4(cr4); +} + +struct microcode_header { + unsigned int hdrver; + unsigned int rev; + unsigned int date; + unsigned int sig; + unsigned int cksum; + unsigned int ldrver; + unsigned int pf; + unsigned int datasize; + unsigned int totalsize; + unsigned int reserved[3]; +}; + +struct microcode { + struct microcode_header hdr; + unsigned int bits[0]; +}; + +typedef struct microcode microcode_t; +typedef struct microcode_header microcode_header_t; + +/* microcode format is extended from prescott processors */ +struct extended_signature { + unsigned int sig; + unsigned int pf; + unsigned int cksum; +}; + +struct extended_sigtable { + unsigned int count; + unsigned int cksum; + unsigned int reserved[3]; + struct extended_signature sigs[0]; +}; + +typedef struct { + unsigned long seg; +} mm_segment_t; + + +/* + * create a kernel thread without removing it from tasklists + */ +extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); + +/* Free all resources held by a thread. */ +extern void release_thread(struct task_struct *); + +/* Prepare to copy thread state - unlazy all lazy status */ +extern void prepare_to_copy(struct task_struct *tsk); + +unsigned long get_wchan(struct task_struct *p); + +/* + * Generic CPUID function + * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx + * resulting in stale register contents being returned. + */ +static inline void cpuid(unsigned int op, + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + *eax = op; + *ecx = 0; + __cpuid(eax, ebx, ecx, edx); +} + +/* Some CPUID calls want 'count' to be placed in ecx */ +static inline void cpuid_count(unsigned int op, int count, + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + *eax = op; + *ecx = count; + __cpuid(eax, ebx, ecx, edx); +} + +/* + * CPUID functions returning a single datum + */ +static inline unsigned int cpuid_eax(unsigned int op) +{ + unsigned int eax, ebx, ecx, edx; + + cpuid(op, &eax, &ebx, &ecx, &edx); + return eax; +} +static inline unsigned int cpuid_ebx(unsigned int op) +{ + unsigned int eax, ebx, ecx, edx; + + cpuid(op, &eax, &ebx, &ecx, &edx); + return ebx; +} +static inline unsigned int cpuid_ecx(unsigned int op) +{ + unsigned int eax, ebx, ecx, edx; + + cpuid(op, &eax, &ebx, &ecx, &edx); + return ecx; +} +static inline unsigned int cpuid_edx(unsigned int op) +{ + unsigned int eax, ebx, ecx, edx; + + cpuid(op, &eax, &ebx, &ecx, &edx); + return edx; +} + +/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ +static inline void rep_nop(void) +{ + __asm__ __volatile__("rep;nop": : :"memory"); +} + +/* Stop speculative execution */ +static inline void sync_core(void) +{ + int tmp; + asm volatile("cpuid" : "=a" (tmp) : "0" (1) + : "ebx", "ecx", "edx", "memory"); +} + +#define cpu_relax() rep_nop() + +static inline void __monitor(const void *eax, unsigned long ecx, + unsigned long edx) +{ + /* "monitor %eax,%ecx,%edx;" */ + asm volatile( + ".byte 0x0f,0x01,0xc8;" + : :"a" (eax), "c" (ecx), "d"(edx)); +} + +static inline void __mwait(unsigned long eax, unsigned long ecx) +{ + /* "mwait %eax,%ecx;" */ + asm volatile( + ".byte 0x0f,0x01,0xc9;" + : :"a" (eax), "c" (ecx)); +} + +static inline void __sti_mwait(unsigned long eax, unsigned long ecx) +{ + /* "mwait %eax,%ecx;" */ + asm volatile( + "sti; .byte 0x0f,0x01,0xc9;" + : :"a" (eax), "c" (ecx)); +} + +extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx); + +extern int force_mwait; + +extern void select_idle_routine(const struct cpuinfo_x86 *c); + +extern unsigned long boot_option_idle_override; + +extern void enable_sep_cpu(void); +extern int sysenter_setup(void); + +/* Defined in head.S */ +extern struct desc_ptr early_gdt_descr; + +extern void cpu_set_gdt(int); +extern void switch_to_new_gdt(void); +extern void cpu_init(void); +extern void init_gdt(int cpu); + +/* from system description table in BIOS. Mostly for MCA use, but + * others may find it useful. */ +extern unsigned int machine_id; +extern unsigned int machine_submodel_id; +extern unsigned int BIOS_revision; +extern unsigned int mca_pentium_flag; + +/* Boot loader type from the setup header */ +extern int bootloader_type; + +extern char ignore_fpu_irq; +#define cache_line_size() (boot_cpu_data.x86_cache_alignment) + +#define HAVE_ARCH_PICK_MMAP_LAYOUT 1 +#define ARCH_HAS_PREFETCHW +#define ARCH_HAS_SPINLOCK_PREFETCH + +#ifdef CONFIG_X86_32 +#define BASE_PREFETCH ASM_NOP4 +#define ARCH_HAS_PREFETCH +#else +#define BASE_PREFETCH "prefetcht0 (%1)" +#endif + +/* Prefetch instructions for Pentium III and AMD Athlon */ +/* It's not worth to care about 3dnow! prefetches for the K6 + because they are microcoded there and very slow. + However we don't do prefetches for pre XP Athlons currently + That should be fixed. */ +static inline void prefetch(const void *x) +{ + alternative_input(BASE_PREFETCH, + "prefetchnta (%1)", + X86_FEATURE_XMM, + "r" (x)); +} + +/* 3dnow! prefetch to get an exclusive cache line. Useful for + spinlocks to avoid one state transition in the cache coherency protocol. */ +static inline void prefetchw(const void *x) +{ + alternative_input(BASE_PREFETCH, + "prefetchw (%1)", + X86_FEATURE_3DNOW, + "r" (x)); +} + +#define spin_lock_prefetch(x) prefetchw(x) #ifdef CONFIG_X86_32 -# include "processor_32.h" +/* + * User space process size: 3GB (default). + */ +#define TASK_SIZE (PAGE_OFFSET) + +#define INIT_THREAD { \ + .sp0 = sizeof(init_stack) + (long)&init_stack, \ + .vm86_info = NULL, \ + .sysenter_cs = __KERNEL_CS, \ + .io_bitmap_ptr = NULL, \ + .fs = __KERNEL_PERCPU, \ +} + +/* + * Note that the .io_bitmap member must be extra-big. This is because + * the CPU will access an additional byte beyond the end of the IO + * permission bitmap. The extra byte must be all 1 bits, and must + * be within the limit. + */ +#define INIT_TSS { \ + .x86_tss = { \ + .sp0 = sizeof(init_stack) + (long)&init_stack, \ + .ss0 = __KERNEL_DS, \ + .ss1 = __KERNEL_CS, \ + .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \ + }, \ + .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, \ +} + +#define start_thread(regs, new_eip, new_esp) do { \ + __asm__("movl %0,%%gs": :"r" (0)); \ + regs->fs = 0; \ + set_fs(USER_DS); \ + regs->ds = __USER_DS; \ + regs->es = __USER_DS; \ + regs->ss = __USER_DS; \ + regs->cs = __USER_CS; \ + regs->ip = new_eip; \ + regs->sp = new_esp; \ +} while (0) + + +extern unsigned long thread_saved_pc(struct task_struct *tsk); + +#define THREAD_SIZE_LONGS (THREAD_SIZE/sizeof(unsigned long)) +#define KSTK_TOP(info) \ +({ \ + unsigned long *__ptr = (unsigned long *)(info); \ + (unsigned long)(&__ptr[THREAD_SIZE_LONGS]); \ +}) + +/* + * The below -8 is to reserve 8 bytes on top of the ring0 stack. + * This is necessary to guarantee that the entire "struct pt_regs" + * is accessable even if the CPU haven't stored the SS/ESP registers + * on the stack (interrupt gate does not save these registers + * when switching to the same priv ring). + * Therefore beware: accessing the ss/esp fields of the + * "struct pt_regs" is possible, but they may contain the + * completely wrong values. + */ +#define task_pt_regs(task) \ +({ \ + struct pt_regs *__regs__; \ + __regs__ = (struct pt_regs *)(KSTK_TOP(task_stack_page(task))-8); \ + __regs__ - 1; \ +}) + +#define KSTK_ESP(task) (task_pt_regs(task)->sp) + #else -# include "processor_64.h" +/* + * User space process size. 47bits minus one guard page. + */ +#define TASK_SIZE64 (0x800000000000UL - 4096) + +/* This decides where the kernel will search for a free chunk of vm + * space during mmap's. + */ +#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \ + 0xc0000000 : 0xFFFFe000) + +#define TASK_SIZE (test_thread_flag(TIF_IA32) ? \ + IA32_PAGE_OFFSET : TASK_SIZE64) +#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_IA32)) ? \ + IA32_PAGE_OFFSET : TASK_SIZE64) + +#define INIT_THREAD { \ + .sp0 = (unsigned long)&init_stack + sizeof(init_stack) \ +} + +#define INIT_TSS { \ + .x86_tss.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \ +} + +#define start_thread(regs, new_rip, new_rsp) do { \ + asm volatile("movl %0,%%fs; movl %0,%%es; movl %0,%%ds": :"r" (0)); \ + load_gs_index(0); \ + (regs)->ip = (new_rip); \ + (regs)->sp = (new_rsp); \ + write_pda(oldrsp, (new_rsp)); \ + (regs)->cs = __USER_CS; \ + (regs)->ss = __USER_DS; \ + (regs)->flags = 0x200; \ + set_fs(USER_DS); \ +} while (0) + +/* + * Return saved PC of a blocked thread. + * What is this good for? it will be always the scheduler or ret_from_fork. + */ +#define thread_saved_pc(t) (*(unsigned long *)((t)->thread.sp - 8)) + +#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) +#define KSTK_ESP(tsk) -1 /* sorry. doesn't work for syscall. */ +#endif /* CONFIG_X86_64 */ + +/* This decides where the kernel will search for a free chunk of vm + * space during mmap's. + */ +#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3)) + +#define KSTK_EIP(task) (task_pt_regs(task)->ip) + #endif diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h deleted file mode 100644 index 13976b086837..000000000000 --- a/include/asm-x86/processor_32.h +++ /dev/null @@ -1,786 +0,0 @@ -/* - * include/asm-i386/processor.h - * - * Copyright (C) 1994 Linus Torvalds - */ - -#ifndef __ASM_I386_PROCESSOR_H -#define __ASM_I386_PROCESSOR_H - -#include <asm/vm86.h> -#include <asm/math_emu.h> -#include <asm/segment.h> -#include <asm/page.h> -#include <asm/types.h> -#include <asm/sigcontext.h> -#include <asm/cpufeature.h> -#include <asm/msr.h> -#include <asm/system.h> -#include <linux/cache.h> -#include <linux/threads.h> -#include <asm/percpu.h> -#include <linux/cpumask.h> -#include <linux/init.h> -#include <asm/processor-flags.h> - -/* flag for disabling the tsc */ -extern int tsc_disable; - -struct desc_struct { - unsigned long a,b; -}; - -#define desc_empty(desc) \ - (!((desc)->a | (desc)->b)) - -#define desc_equal(desc1, desc2) \ - (((desc1)->a == (desc2)->a) && ((desc1)->b == (desc2)->b)) -/* - * Default implementation of macro that returns current - * instruction pointer ("program counter"). - */ -#define current_text_addr() ({ void *pc; __asm__("movl $1f,%0\n1:":"=g" (pc)); pc; }) - -/* - * CPU type and hardware bug flags. Kept separately for each CPU. - * Members of this structure are referenced in head.S, so think twice - * before touching them. [mj] - */ - -struct cpuinfo_x86 { - __u8 x86; /* CPU family */ - __u8 x86_vendor; /* CPU vendor */ - __u8 x86_model; - __u8 x86_mask; - char wp_works_ok; /* It doesn't on 386's */ - char hlt_works_ok; /* Problems on some 486Dx4's and old 386's */ - char hard_math; - char rfu; - int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */ - unsigned long x86_capability[NCAPINTS]; - char x86_vendor_id[16]; - char x86_model_id[64]; - int x86_cache_size; /* in KB - valid for CPUS which support this - call */ - int x86_cache_alignment; /* In bytes */ - char fdiv_bug; - char f00f_bug; - char coma_bug; - char pad0; - int x86_power; - unsigned long loops_per_jiffy; -#ifdef CONFIG_SMP - cpumask_t llc_shared_map; /* cpus sharing the last level cache */ -#endif - unsigned char x86_max_cores; /* cpuid returned max cores value */ - unsigned char apicid; - unsigned short x86_clflush_size; -#ifdef CONFIG_SMP - unsigned char booted_cores; /* number of cores as seen by OS */ - __u8 phys_proc_id; /* Physical processor id. */ - __u8 cpu_core_id; /* Core id */ - __u8 cpu_index; /* index into per_cpu list */ -#endif -} __attribute__((__aligned__(SMP_CACHE_BYTES))); - -#define X86_VENDOR_INTEL 0 -#define X86_VENDOR_CYRIX 1 -#define X86_VENDOR_AMD 2 -#define X86_VENDOR_UMC 3 -#define X86_VENDOR_NEXGEN 4 -#define X86_VENDOR_CENTAUR 5 -#define X86_VENDOR_TRANSMETA 7 -#define X86_VENDOR_NSC 8 -#define X86_VENDOR_NUM 9 -#define X86_VENDOR_UNKNOWN 0xff - -/* - * capabilities of CPUs - */ - -extern struct cpuinfo_x86 boot_cpu_data; -extern struct cpuinfo_x86 new_cpu_data; -extern struct tss_struct doublefault_tss; -DECLARE_PER_CPU(struct tss_struct, init_tss); - -#ifdef CONFIG_SMP -DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info); -#define cpu_data(cpu) per_cpu(cpu_info, cpu) -#define current_cpu_data cpu_data(smp_processor_id()) -#else -#define cpu_data(cpu) boot_cpu_data -#define current_cpu_data boot_cpu_data -#endif - -/* - * the following now lives in the per cpu area: - * extern int cpu_llc_id[NR_CPUS]; - */ -DECLARE_PER_CPU(u8, cpu_llc_id); -extern char ignore_fpu_irq; - -void __init cpu_detect(struct cpuinfo_x86 *c); - -extern void identify_boot_cpu(void); -extern void identify_secondary_cpu(struct cpuinfo_x86 *); -extern void print_cpu_info(struct cpuinfo_x86 *); -extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); -extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); -extern unsigned short num_cache_leaves; - -#ifdef CONFIG_X86_HT -extern void detect_ht(struct cpuinfo_x86 *c); -#else -static inline void detect_ht(struct cpuinfo_x86 *c) {} -#endif - -static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - /* ecx is often an input as well as an output. */ - __asm__("cpuid" - : "=a" (*eax), - "=b" (*ebx), - "=c" (*ecx), - "=d" (*edx) - : "0" (*eax), "2" (*ecx)); -} - -#define load_cr3(pgdir) write_cr3(__pa(pgdir)) - -/* - * Save the cr4 feature set we're using (ie - * Pentium 4MB enable and PPro Global page - * enable), so that any CPU's that boot up - * after us can get the correct flags. - */ -extern unsigned long mmu_cr4_features; - -static inline void set_in_cr4 (unsigned long mask) -{ - unsigned cr4; - mmu_cr4_features |= mask; - cr4 = read_cr4(); - cr4 |= mask; - write_cr4(cr4); -} - -static inline void clear_in_cr4 (unsigned long mask) -{ - unsigned cr4; - mmu_cr4_features &= ~mask; - cr4 = read_cr4(); - cr4 &= ~mask; - write_cr4(cr4); -} - -/* Stop speculative execution */ -static inline void sync_core(void) -{ - int tmp; - asm volatile("cpuid" : "=a" (tmp) : "0" (1) : "ebx","ecx","edx","memory"); -} - -static inline void __monitor(const void *eax, unsigned long ecx, - unsigned long edx) -{ - /* "monitor %eax,%ecx,%edx;" */ - asm volatile( - ".byte 0x0f,0x01,0xc8;" - : :"a" (eax), "c" (ecx), "d"(edx)); -} - -static inline void __mwait(unsigned long eax, unsigned long ecx) -{ - /* "mwait %eax,%ecx;" */ - asm volatile( - ".byte 0x0f,0x01,0xc9;" - : :"a" (eax), "c" (ecx)); -} - -extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx); - -/* from system description table in BIOS. Mostly for MCA use, but -others may find it useful. */ -extern unsigned int machine_id; -extern unsigned int machine_submodel_id; -extern unsigned int BIOS_revision; -extern unsigned int mca_pentium_flag; - -/* Boot loader type from the setup header */ -extern int bootloader_type; - -/* - * User space process size: 3GB (default). - */ -#define TASK_SIZE (PAGE_OFFSET) - -/* This decides where the kernel will search for a free chunk of vm - * space during mmap's. - */ -#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3)) - -#define HAVE_ARCH_PICK_MMAP_LAYOUT - -extern void hard_disable_TSC(void); -extern void disable_TSC(void); -extern void hard_enable_TSC(void); - -/* - * Size of io_bitmap. - */ -#define IO_BITMAP_BITS 65536 -#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8) -#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) -#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap) -#define INVALID_IO_BITMAP_OFFSET 0x8000 -#define INVALID_IO_BITMAP_OFFSET_LAZY 0x9000 - -struct i387_fsave_struct { - long cwd; - long swd; - long twd; - long fip; - long fcs; - long foo; - long fos; - long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ - long status; /* software status information */ -}; - -struct i387_fxsave_struct { - unsigned short cwd; - unsigned short swd; - unsigned short twd; - unsigned short fop; - long fip; - long fcs; - long foo; - long fos; - long mxcsr; - long mxcsr_mask; - long st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ - long xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */ - long padding[56]; -} __attribute__ ((aligned (16))); - -struct i387_soft_struct { - long cwd; - long swd; - long twd; - long fip; - long fcs; - long foo; - long fos; - long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ - unsigned char ftop, changed, lookahead, no_update, rm, alimit; - struct info *info; - unsigned long entry_eip; -}; - -union i387_union { - struct i387_fsave_struct fsave; - struct i387_fxsave_struct fxsave; - struct i387_soft_struct soft; -}; - -typedef struct { - unsigned long seg; -} mm_segment_t; - -struct thread_struct; - -/* This is the TSS defined by the hardware. */ -struct i386_hw_tss { - unsigned short back_link,__blh; - unsigned long esp0; - unsigned short ss0,__ss0h; - unsigned long esp1; - unsigned short ss1,__ss1h; /* ss1 is used to cache MSR_IA32_SYSENTER_CS */ - unsigned long esp2; - unsigned short ss2,__ss2h; - unsigned long __cr3; - unsigned long eip; - unsigned long eflags; - unsigned long eax,ecx,edx,ebx; - unsigned long esp; - unsigned long ebp; - unsigned long esi; - unsigned long edi; - unsigned short es, __esh; - unsigned short cs, __csh; - unsigned short ss, __ssh; - unsigned short ds, __dsh; - unsigned short fs, __fsh; - unsigned short gs, __gsh; - unsigned short ldt, __ldth; - unsigned short trace, io_bitmap_base; -} __attribute__((packed)); - -struct tss_struct { - struct i386_hw_tss x86_tss; - - /* - * The extra 1 is there because the CPU will access an - * additional byte beyond the end of the IO permission - * bitmap. The extra byte must be all 1 bits, and must - * be within the limit. - */ - unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; - /* - * Cache the current maximum and the last task that used the bitmap: - */ - unsigned long io_bitmap_max; - struct thread_struct *io_bitmap_owner; - /* - * pads the TSS to be cacheline-aligned (size is 0x100) - */ - unsigned long __cacheline_filler[35]; - /* - * .. and then another 0x100 bytes for emergency kernel stack - */ - unsigned long stack[64]; -} __attribute__((packed)); - -#define ARCH_MIN_TASKALIGN 16 - -struct thread_struct { -/* cached TLS descriptors. */ - struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; - unsigned long esp0; - unsigned long sysenter_cs; - unsigned long eip; - unsigned long esp; - unsigned long fs; - unsigned long gs; -/* Hardware debugging registers */ - unsigned long debugreg[8]; /* %%db0-7 debug registers */ -/* fault info */ - unsigned long cr2, trap_no, error_code; -/* floating point info */ - union i387_union i387; -/* virtual 86 mode info */ - struct vm86_struct __user * vm86_info; - unsigned long screen_bitmap; - unsigned long v86flags, v86mask, saved_esp0; - unsigned int saved_fs, saved_gs; -/* IO permissions */ - unsigned long *io_bitmap_ptr; - unsigned long iopl; -/* max allowed port in the bitmap, in bytes: */ - unsigned long io_bitmap_max; -}; - -#define INIT_THREAD { \ - .esp0 = sizeof(init_stack) + (long)&init_stack, \ - .vm86_info = NULL, \ - .sysenter_cs = __KERNEL_CS, \ - .io_bitmap_ptr = NULL, \ - .fs = __KERNEL_PERCPU, \ -} - -/* - * Note that the .io_bitmap member must be extra-big. This is because - * the CPU will access an additional byte beyond the end of the IO - * permission bitmap. The extra byte must be all 1 bits, and must - * be within the limit. - */ -#define INIT_TSS { \ - .x86_tss = { \ - .esp0 = sizeof(init_stack) + (long)&init_stack, \ - .ss0 = __KERNEL_DS, \ - .ss1 = __KERNEL_CS, \ - .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \ - }, \ - .io_bitmap = { [ 0 ... IO_BITMAP_LONGS] = ~0 }, \ -} - -#define start_thread(regs, new_eip, new_esp) do { \ - __asm__("movl %0,%%gs": :"r" (0)); \ - regs->xfs = 0; \ - set_fs(USER_DS); \ - regs->xds = __USER_DS; \ - regs->xes = __USER_DS; \ - regs->xss = __USER_DS; \ - regs->xcs = __USER_CS; \ - regs->eip = new_eip; \ - regs->esp = new_esp; \ -} while (0) - -/* Forward declaration, a strange C thing */ -struct task_struct; -struct mm_struct; - -/* Free all resources held by a thread. */ -extern void release_thread(struct task_struct *); - -/* Prepare to copy thread state - unlazy all lazy status */ -extern void prepare_to_copy(struct task_struct *tsk); - -/* - * create a kernel thread without removing it from tasklists - */ -extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); - -extern unsigned long thread_saved_pc(struct task_struct *tsk); -void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack); - -unsigned long get_wchan(struct task_struct *p); - -#define THREAD_SIZE_LONGS (THREAD_SIZE/sizeof(unsigned long)) -#define KSTK_TOP(info) \ -({ \ - unsigned long *__ptr = (unsigned long *)(info); \ - (unsigned long)(&__ptr[THREAD_SIZE_LONGS]); \ -}) - -/* - * The below -8 is to reserve 8 bytes on top of the ring0 stack. - * This is necessary to guarantee that the entire "struct pt_regs" - * is accessable even if the CPU haven't stored the SS/ESP registers - * on the stack (interrupt gate does not save these registers - * when switching to the same priv ring). - * Therefore beware: accessing the xss/esp fields of the - * "struct pt_regs" is possible, but they may contain the - * completely wrong values. - */ -#define task_pt_regs(task) \ -({ \ - struct pt_regs *__regs__; \ - __regs__ = (struct pt_regs *)(KSTK_TOP(task_stack_page(task))-8); \ - __regs__ - 1; \ -}) - -#define KSTK_EIP(task) (task_pt_regs(task)->eip) -#define KSTK_ESP(task) (task_pt_regs(task)->esp) - - -struct microcode_header { - unsigned int hdrver; - unsigned int rev; - unsigned int date; - unsigned int sig; - unsigned int cksum; - unsigned int ldrver; - unsigned int pf; - unsigned int datasize; - unsigned int totalsize; - unsigned int reserved[3]; -}; - -struct microcode { - struct microcode_header hdr; - unsigned int bits[0]; -}; - -typedef struct microcode microcode_t; -typedef struct microcode_header microcode_header_t; - -/* microcode format is extended from prescott processors */ -struct extended_signature { - unsigned int sig; - unsigned int pf; - unsigned int cksum; -}; - -struct extended_sigtable { - unsigned int count; - unsigned int cksum; - unsigned int reserved[3]; - struct extended_signature sigs[0]; -}; - -/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ -static inline void rep_nop(void) -{ - __asm__ __volatile__("rep;nop": : :"memory"); -} - -#define cpu_relax() rep_nop() - -static inline void native_load_esp0(struct tss_struct *tss, struct thread_struct *thread) -{ - tss->x86_tss.esp0 = thread->esp0; - /* This can only happen when SEP is enabled, no need to test "SEP"arately */ - if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) { - tss->x86_tss.ss1 = thread->sysenter_cs; - wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); - } -} - - -static inline unsigned long native_get_debugreg(int regno) -{ - unsigned long val = 0; /* Damn you, gcc! */ - - switch (regno) { - case 0: - asm("movl %%db0, %0" :"=r" (val)); break; - case 1: - asm("movl %%db1, %0" :"=r" (val)); break; - case 2: - asm("movl %%db2, %0" :"=r" (val)); break; - case 3: - asm("movl %%db3, %0" :"=r" (val)); break; - case 6: - asm("movl %%db6, %0" :"=r" (val)); break; - case 7: - asm("movl %%db7, %0" :"=r" (val)); break; - default: - BUG(); - } - return val; -} - -static inline void native_set_debugreg(int regno, unsigned long value) -{ - switch (regno) { - case 0: - asm("movl %0,%%db0" : /* no output */ :"r" (value)); - break; - case 1: - asm("movl %0,%%db1" : /* no output */ :"r" (value)); - break; - case 2: - asm("movl %0,%%db2" : /* no output */ :"r" (value)); - break; - case 3: - asm("movl %0,%%db3" : /* no output */ :"r" (value)); - break; - case 6: - asm("movl %0,%%db6" : /* no output */ :"r" (value)); - break; - case 7: - asm("movl %0,%%db7" : /* no output */ :"r" (value)); - break; - default: - BUG(); - } -} - -/* - * Set IOPL bits in EFLAGS from given mask - */ -static inline void native_set_iopl_mask(unsigned mask) -{ - unsigned int reg; - __asm__ __volatile__ ("pushfl;" - "popl %0;" - "andl %1, %0;" - "orl %2, %0;" - "pushl %0;" - "popfl" - : "=&r" (reg) - : "i" (~X86_EFLAGS_IOPL), "r" (mask)); -} - -#ifdef CONFIG_PARAVIRT -#include <asm/paravirt.h> -#else -#define paravirt_enabled() 0 -#define __cpuid native_cpuid - -static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread) -{ - native_load_esp0(tss, thread); -} - -/* - * These special macros can be used to get or set a debugging register - */ -#define get_debugreg(var, register) \ - (var) = native_get_debugreg(register) -#define set_debugreg(value, register) \ - native_set_debugreg(register, value) - -#define set_iopl_mask native_set_iopl_mask -#endif /* CONFIG_PARAVIRT */ - -/* - * Generic CPUID function - * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx - * resulting in stale register contents being returned. - */ -static inline void cpuid(unsigned int op, - unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - *eax = op; - *ecx = 0; - __cpuid(eax, ebx, ecx, edx); -} - -/* Some CPUID calls want 'count' to be placed in ecx */ -static inline void cpuid_count(unsigned int op, int count, - unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - *eax = op; - *ecx = count; - __cpuid(eax, ebx, ecx, edx); -} - -/* - * CPUID functions returning a single datum - */ -static inline unsigned int cpuid_eax(unsigned int op) -{ - unsigned int eax, ebx, ecx, edx; - - cpuid(op, &eax, &ebx, &ecx, &edx); - return eax; -} -static inline unsigned int cpuid_ebx(unsigned int op) -{ - unsigned int eax, ebx, ecx, edx; - - cpuid(op, &eax, &ebx, &ecx, &edx); - return ebx; -} -static inline unsigned int cpuid_ecx(unsigned int op) -{ - unsigned int eax, ebx, ecx, edx; - - cpuid(op, &eax, &ebx, &ecx, &edx); - return ecx; -} -static inline unsigned int cpuid_edx(unsigned int op) -{ - unsigned int eax, ebx, ecx, edx; - - cpuid(op, &eax, &ebx, &ecx, &edx); - return edx; -} - -/* generic versions from gas */ -#define GENERIC_NOP1 ".byte 0x90\n" -#define GENERIC_NOP2 ".byte 0x89,0xf6\n" -#define GENERIC_NOP3 ".byte 0x8d,0x76,0x00\n" -#define GENERIC_NOP4 ".byte 0x8d,0x74,0x26,0x00\n" -#define GENERIC_NOP5 GENERIC_NOP1 GENERIC_NOP4 -#define GENERIC_NOP6 ".byte 0x8d,0xb6,0x00,0x00,0x00,0x00\n" -#define GENERIC_NOP7 ".byte 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00\n" -#define GENERIC_NOP8 GENERIC_NOP1 GENERIC_NOP7 - -/* Opteron nops */ -#define K8_NOP1 GENERIC_NOP1 -#define K8_NOP2 ".byte 0x66,0x90\n" -#define K8_NOP3 ".byte 0x66,0x66,0x90\n" -#define K8_NOP4 ".byte 0x66,0x66,0x66,0x90\n" -#define K8_NOP5 K8_NOP3 K8_NOP2 -#define K8_NOP6 K8_NOP3 K8_NOP3 -#define K8_NOP7 K8_NOP4 K8_NOP3 -#define K8_NOP8 K8_NOP4 K8_NOP4 - -/* K7 nops */ -/* uses eax dependencies (arbitary choice) */ -#define K7_NOP1 GENERIC_NOP1 -#define K7_NOP2 ".byte 0x8b,0xc0\n" -#define K7_NOP3 ".byte 0x8d,0x04,0x20\n" -#define K7_NOP4 ".byte 0x8d,0x44,0x20,0x00\n" -#define K7_NOP5 K7_NOP4 ASM_NOP1 -#define K7_NOP6 ".byte 0x8d,0x80,0,0,0,0\n" -#define K7_NOP7 ".byte 0x8D,0x04,0x05,0,0,0,0\n" -#define K7_NOP8 K7_NOP7 ASM_NOP1 - -/* P6 nops */ -/* uses eax dependencies (Intel-recommended choice) */ -#define P6_NOP1 GENERIC_NOP1 -#define P6_NOP2 ".byte 0x66,0x90\n" -#define P6_NOP3 ".byte 0x0f,0x1f,0x00\n" -#define P6_NOP4 ".byte 0x0f,0x1f,0x40,0\n" -#define P6_NOP5 ".byte 0x0f,0x1f,0x44,0x00,0\n" -#define P6_NOP6 ".byte 0x66,0x0f,0x1f,0x44,0x00,0\n" -#define P6_NOP7 ".byte 0x0f,0x1f,0x80,0,0,0,0\n" -#define P6_NOP8 ".byte 0x0f,0x1f,0x84,0x00,0,0,0,0\n" - -#ifdef CONFIG_MK8 -#define ASM_NOP1 K8_NOP1 -#define ASM_NOP2 K8_NOP2 -#define ASM_NOP3 K8_NOP3 -#define ASM_NOP4 K8_NOP4 -#define ASM_NOP5 K8_NOP5 -#define ASM_NOP6 K8_NOP6 -#define ASM_NOP7 K8_NOP7 -#define ASM_NOP8 K8_NOP8 -#elif defined(CONFIG_MK7) -#define ASM_NOP1 K7_NOP1 -#define ASM_NOP2 K7_NOP2 -#define ASM_NOP3 K7_NOP3 -#define ASM_NOP4 K7_NOP4 -#define ASM_NOP5 K7_NOP5 -#define ASM_NOP6 K7_NOP6 -#define ASM_NOP7 K7_NOP7 -#define ASM_NOP8 K7_NOP8 -#elif defined(CONFIG_M686) || defined(CONFIG_MPENTIUMII) || \ - defined(CONFIG_MPENTIUMIII) || defined(CONFIG_MPENTIUMM) || \ - defined(CONFIG_MCORE2) || defined(CONFIG_PENTIUM4) -#define ASM_NOP1 P6_NOP1 -#define ASM_NOP2 P6_NOP2 -#define ASM_NOP3 P6_NOP3 -#define ASM_NOP4 P6_NOP4 -#define ASM_NOP5 P6_NOP5 -#define ASM_NOP6 P6_NOP6 -#define ASM_NOP7 P6_NOP7 -#define ASM_NOP8 P6_NOP8 -#else -#define ASM_NOP1 GENERIC_NOP1 -#define ASM_NOP2 GENERIC_NOP2 -#define ASM_NOP3 GENERIC_NOP3 -#define ASM_NOP4 GENERIC_NOP4 -#define ASM_NOP5 GENERIC_NOP5 -#define ASM_NOP6 GENERIC_NOP6 -#define ASM_NOP7 GENERIC_NOP7 -#define ASM_NOP8 GENERIC_NOP8 -#endif - -#define ASM_NOP_MAX 8 - -/* Prefetch instructions for Pentium III and AMD Athlon */ -/* It's not worth to care about 3dnow! prefetches for the K6 - because they are microcoded there and very slow. - However we don't do prefetches for pre XP Athlons currently - That should be fixed. */ -#define ARCH_HAS_PREFETCH -static inline void prefetch(const void *x) -{ - alternative_input(ASM_NOP4, - "prefetchnta (%1)", - X86_FEATURE_XMM, - "r" (x)); -} - -#define ARCH_HAS_PREFETCH -#define ARCH_HAS_PREFETCHW -#define ARCH_HAS_SPINLOCK_PREFETCH - -/* 3dnow! prefetch to get an exclusive cache line. Useful for - spinlocks to avoid one state transition in the cache coherency protocol. */ -static inline void prefetchw(const void *x) -{ - alternative_input(ASM_NOP4, - "prefetchw (%1)", - X86_FEATURE_3DNOW, - "r" (x)); -} -#define spin_lock_prefetch(x) prefetchw(x) - -extern void select_idle_routine(const struct cpuinfo_x86 *c); - -#define cache_line_size() (boot_cpu_data.x86_cache_alignment) - -extern unsigned long boot_option_idle_override; -extern void enable_sep_cpu(void); -extern int sysenter_setup(void); - -/* Defined in head.S */ -extern struct Xgt_desc_struct early_gdt_descr; - -extern void cpu_set_gdt(int); -extern void switch_to_new_gdt(void); -extern void cpu_init(void); -extern void init_gdt(int cpu); - -extern int force_mwait; - -#endif /* __ASM_I386_PROCESSOR_H */ diff --git a/include/asm-x86/processor_64.h b/include/asm-x86/processor_64.h deleted file mode 100644 index e4f19970a82b..000000000000 --- a/include/asm-x86/processor_64.h +++ /dev/null @@ -1,452 +0,0 @@ -/* - * include/asm-x86_64/processor.h - * - * Copyright (C) 1994 Linus Torvalds - */ - -#ifndef __ASM_X86_64_PROCESSOR_H -#define __ASM_X86_64_PROCESSOR_H - -#include <asm/segment.h> -#include <asm/page.h> -#include <asm/types.h> -#include <asm/sigcontext.h> -#include <asm/cpufeature.h> -#include <linux/threads.h> -#include <asm/msr.h> -#include <asm/current.h> -#include <asm/system.h> -#include <asm/mmsegment.h> -#include <asm/percpu.h> -#include <linux/personality.h> -#include <linux/cpumask.h> -#include <asm/processor-flags.h> - -#define TF_MASK 0x00000100 -#define IF_MASK 0x00000200 -#define IOPL_MASK 0x00003000 -#define NT_MASK 0x00004000 -#define VM_MASK 0x00020000 -#define AC_MASK 0x00040000 -#define VIF_MASK 0x00080000 /* virtual interrupt flag */ -#define VIP_MASK 0x00100000 /* virtual interrupt pending */ -#define ID_MASK 0x00200000 - -#define desc_empty(desc) \ - (!((desc)->a | (desc)->b)) - -#define desc_equal(desc1, desc2) \ - (((desc1)->a == (desc2)->a) && ((desc1)->b == (desc2)->b)) - -/* - * Default implementation of macro that returns current - * instruction pointer ("program counter"). - */ -#define current_text_addr() ({ void *pc; asm volatile("leaq 1f(%%rip),%0\n1:":"=r"(pc)); pc; }) - -/* - * CPU type and hardware bug flags. Kept separately for each CPU. - */ - -struct cpuinfo_x86 { - __u8 x86; /* CPU family */ - __u8 x86_vendor; /* CPU vendor */ - __u8 x86_model; - __u8 x86_mask; - int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */ - __u32 x86_capability[NCAPINTS]; - char x86_vendor_id[16]; - char x86_model_id[64]; - int x86_cache_size; /* in KB */ - int x86_clflush_size; - int x86_cache_alignment; - int x86_tlbsize; /* number of 4K pages in DTLB/ITLB combined(in pages)*/ - __u8 x86_virt_bits, x86_phys_bits; - __u8 x86_max_cores; /* cpuid returned max cores value */ - __u32 x86_power; - __u32 extended_cpuid_level; /* Max extended CPUID function supported */ - unsigned long loops_per_jiffy; -#ifdef CONFIG_SMP - cpumask_t llc_shared_map; /* cpus sharing the last level cache */ -#endif - __u8 apicid; -#ifdef CONFIG_SMP - __u8 booted_cores; /* number of cores as seen by OS */ - __u8 phys_proc_id; /* Physical Processor id. */ - __u8 cpu_core_id; /* Core id. */ - __u8 cpu_index; /* index into per_cpu list */ -#endif -} ____cacheline_aligned; - -#define X86_VENDOR_INTEL 0 -#define X86_VENDOR_CYRIX 1 -#define X86_VENDOR_AMD 2 -#define X86_VENDOR_UMC 3 -#define X86_VENDOR_NEXGEN 4 -#define X86_VENDOR_CENTAUR 5 -#define X86_VENDOR_TRANSMETA 7 -#define X86_VENDOR_NUM 8 -#define X86_VENDOR_UNKNOWN 0xff - -#ifdef CONFIG_SMP -DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info); -#define cpu_data(cpu) per_cpu(cpu_info, cpu) -#define current_cpu_data cpu_data(smp_processor_id()) -#else -#define cpu_data(cpu) boot_cpu_data -#define current_cpu_data boot_cpu_data -#endif - -extern char ignore_irq13; - -extern void identify_cpu(struct cpuinfo_x86 *); -extern void print_cpu_info(struct cpuinfo_x86 *); -extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); -extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); -extern unsigned short num_cache_leaves; - -/* - * Save the cr4 feature set we're using (ie - * Pentium 4MB enable and PPro Global page - * enable), so that any CPU's that boot up - * after us can get the correct flags. - */ -extern unsigned long mmu_cr4_features; - -static inline void set_in_cr4 (unsigned long mask) -{ - mmu_cr4_features |= mask; - __asm__("movq %%cr4,%%rax\n\t" - "orq %0,%%rax\n\t" - "movq %%rax,%%cr4\n" - : : "irg" (mask) - :"ax"); -} - -static inline void clear_in_cr4 (unsigned long mask) -{ - mmu_cr4_features &= ~mask; - __asm__("movq %%cr4,%%rax\n\t" - "andq %0,%%rax\n\t" - "movq %%rax,%%cr4\n" - : : "irg" (~mask) - :"ax"); -} - - -/* - * User space process size. 47bits minus one guard page. - */ -#define TASK_SIZE64 (0x800000000000UL - 4096) - -/* This decides where the kernel will search for a free chunk of vm - * space during mmap's. - */ -#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? 0xc0000000 : 0xFFFFe000) - -#define TASK_SIZE (test_thread_flag(TIF_IA32) ? IA32_PAGE_OFFSET : TASK_SIZE64) -#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_IA32)) ? IA32_PAGE_OFFSET : TASK_SIZE64) - -#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE/3) - -/* - * Size of io_bitmap. - */ -#define IO_BITMAP_BITS 65536 -#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8) -#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) -#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap) -#define INVALID_IO_BITMAP_OFFSET 0x8000 - -struct i387_fxsave_struct { - u16 cwd; - u16 swd; - u16 twd; - u16 fop; - u64 rip; - u64 rdp; - u32 mxcsr; - u32 mxcsr_mask; - u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ - u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */ - u32 padding[24]; -} __attribute__ ((aligned (16))); - -union i387_union { - struct i387_fxsave_struct fxsave; -}; - -struct tss_struct { - u32 reserved1; - u64 rsp0; - u64 rsp1; - u64 rsp2; - u64 reserved2; - u64 ist[7]; - u32 reserved3; - u32 reserved4; - u16 reserved5; - u16 io_bitmap_base; - /* - * The extra 1 is there because the CPU will access an - * additional byte beyond the end of the IO permission - * bitmap. The extra byte must be all 1 bits, and must - * be within the limit. Thus we have: - * - * 128 bytes, the bitmap itself, for ports 0..0x3ff - * 8 bytes, for an extra "long" of ~0UL - */ - unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; -} __attribute__((packed)) ____cacheline_aligned; - - -extern struct cpuinfo_x86 boot_cpu_data; -DECLARE_PER_CPU(struct tss_struct,init_tss); -/* Save the original ist values for checking stack pointers during debugging */ -struct orig_ist { - unsigned long ist[7]; -}; -DECLARE_PER_CPU(struct orig_ist, orig_ist); - -#ifdef CONFIG_X86_VSMP -#define ARCH_MIN_TASKALIGN (1 << INTERNODE_CACHE_SHIFT) -#define ARCH_MIN_MMSTRUCT_ALIGN (1 << INTERNODE_CACHE_SHIFT) -#else -#define ARCH_MIN_TASKALIGN 16 -#define ARCH_MIN_MMSTRUCT_ALIGN 0 -#endif - -struct thread_struct { - unsigned long rsp0; - unsigned long rsp; - unsigned long userrsp; /* Copy from PDA */ - unsigned long fs; - unsigned long gs; - unsigned short es, ds, fsindex, gsindex; -/* Hardware debugging registers */ - unsigned long debugreg0; - unsigned long debugreg1; - unsigned long debugreg2; - unsigned long debugreg3; - unsigned long debugreg6; - unsigned long debugreg7; -/* fault info */ - unsigned long cr2, trap_no, error_code; -/* floating point info */ - union i387_union i387 __attribute__((aligned(16))); -/* IO permissions. the bitmap could be moved into the GDT, that would make - switch faster for a limited number of ioperm using tasks. -AK */ - int ioperm; - unsigned long *io_bitmap_ptr; - unsigned io_bitmap_max; -/* cached TLS descriptors. */ - u64 tls_array[GDT_ENTRY_TLS_ENTRIES]; -} __attribute__((aligned(16))); - -#define INIT_THREAD { \ - .rsp0 = (unsigned long)&init_stack + sizeof(init_stack) \ -} - -#define INIT_TSS { \ - .rsp0 = (unsigned long)&init_stack + sizeof(init_stack) \ -} - -#define INIT_MMAP \ -{ &init_mm, 0, 0, NULL, PAGE_SHARED, VM_READ | VM_WRITE | VM_EXEC, 1, NULL, NULL } - -#define start_thread(regs,new_rip,new_rsp) do { \ - asm volatile("movl %0,%%fs; movl %0,%%es; movl %0,%%ds": :"r" (0)); \ - load_gs_index(0); \ - (regs)->rip = (new_rip); \ - (regs)->rsp = (new_rsp); \ - write_pda(oldrsp, (new_rsp)); \ - (regs)->cs = __USER_CS; \ - (regs)->ss = __USER_DS; \ - (regs)->eflags = 0x200; \ - set_fs(USER_DS); \ -} while(0) - -#define get_debugreg(var, register) \ - __asm__("movq %%db" #register ", %0" \ - :"=r" (var)) -#define set_debugreg(value, register) \ - __asm__("movq %0,%%db" #register \ - : /* no output */ \ - :"r" (value)) - -struct task_struct; -struct mm_struct; - -/* Free all resources held by a thread. */ -extern void release_thread(struct task_struct *); - -/* Prepare to copy thread state - unlazy all lazy status */ -extern void prepare_to_copy(struct task_struct *tsk); - -/* - * create a kernel thread without removing it from tasklists - */ -extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); - -/* - * Return saved PC of a blocked thread. - * What is this good for? it will be always the scheduler or ret_from_fork. - */ -#define thread_saved_pc(t) (*(unsigned long *)((t)->thread.rsp - 8)) - -extern unsigned long get_wchan(struct task_struct *p); -#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.rsp0 - 1) -#define KSTK_EIP(tsk) (task_pt_regs(tsk)->rip) -#define KSTK_ESP(tsk) -1 /* sorry. doesn't work for syscall. */ - - -struct microcode_header { - unsigned int hdrver; - unsigned int rev; - unsigned int date; - unsigned int sig; - unsigned int cksum; - unsigned int ldrver; - unsigned int pf; - unsigned int datasize; - unsigned int totalsize; - unsigned int reserved[3]; -}; - -struct microcode { - struct microcode_header hdr; - unsigned int bits[0]; -}; - -typedef struct microcode microcode_t; -typedef struct microcode_header microcode_header_t; - -/* microcode format is extended from prescott processors */ -struct extended_signature { - unsigned int sig; - unsigned int pf; - unsigned int cksum; -}; - -struct extended_sigtable { - unsigned int count; - unsigned int cksum; - unsigned int reserved[3]; - struct extended_signature sigs[0]; -}; - - -#if defined(CONFIG_MPSC) || defined(CONFIG_MCORE2) -#define ASM_NOP1 P6_NOP1 -#define ASM_NOP2 P6_NOP2 -#define ASM_NOP3 P6_NOP3 -#define ASM_NOP4 P6_NOP4 -#define ASM_NOP5 P6_NOP5 -#define ASM_NOP6 P6_NOP6 -#define ASM_NOP7 P6_NOP7 -#define ASM_NOP8 P6_NOP8 -#else -#define ASM_NOP1 K8_NOP1 -#define ASM_NOP2 K8_NOP2 -#define ASM_NOP3 K8_NOP3 -#define ASM_NOP4 K8_NOP4 -#define ASM_NOP5 K8_NOP5 -#define ASM_NOP6 K8_NOP6 -#define ASM_NOP7 K8_NOP7 -#define ASM_NOP8 K8_NOP8 -#endif - -/* Opteron nops */ -#define K8_NOP1 ".byte 0x90\n" -#define K8_NOP2 ".byte 0x66,0x90\n" -#define K8_NOP3 ".byte 0x66,0x66,0x90\n" -#define K8_NOP4 ".byte 0x66,0x66,0x66,0x90\n" -#define K8_NOP5 K8_NOP3 K8_NOP2 -#define K8_NOP6 K8_NOP3 K8_NOP3 -#define K8_NOP7 K8_NOP4 K8_NOP3 -#define K8_NOP8 K8_NOP4 K8_NOP4 - -/* P6 nops */ -/* uses eax dependencies (Intel-recommended choice) */ -#define P6_NOP1 ".byte 0x90\n" -#define P6_NOP2 ".byte 0x66,0x90\n" -#define P6_NOP3 ".byte 0x0f,0x1f,0x00\n" -#define P6_NOP4 ".byte 0x0f,0x1f,0x40,0\n" -#define P6_NOP5 ".byte 0x0f,0x1f,0x44,0x00,0\n" -#define P6_NOP6 ".byte 0x66,0x0f,0x1f,0x44,0x00,0\n" -#define P6_NOP7 ".byte 0x0f,0x1f,0x80,0,0,0,0\n" -#define P6_NOP8 ".byte 0x0f,0x1f,0x84,0x00,0,0,0,0\n" - -#define ASM_NOP_MAX 8 - -/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ -static inline void rep_nop(void) -{ - __asm__ __volatile__("rep;nop": : :"memory"); -} - -/* Stop speculative execution */ -static inline void sync_core(void) -{ - int tmp; - asm volatile("cpuid" : "=a" (tmp) : "0" (1) : "ebx","ecx","edx","memory"); -} - -#define ARCH_HAS_PREFETCHW 1 -static inline void prefetchw(void *x) -{ - alternative_input("prefetcht0 (%1)", - "prefetchw (%1)", - X86_FEATURE_3DNOW, - "r" (x)); -} - -#define ARCH_HAS_SPINLOCK_PREFETCH 1 - -#define spin_lock_prefetch(x) prefetchw(x) - -#define cpu_relax() rep_nop() - -static inline void __monitor(const void *eax, unsigned long ecx, - unsigned long edx) -{ - /* "monitor %eax,%ecx,%edx;" */ - asm volatile( - ".byte 0x0f,0x01,0xc8;" - : :"a" (eax), "c" (ecx), "d"(edx)); -} - -static inline void __mwait(unsigned long eax, unsigned long ecx) -{ - /* "mwait %eax,%ecx;" */ - asm volatile( - ".byte 0x0f,0x01,0xc9;" - : :"a" (eax), "c" (ecx)); -} - -static inline void __sti_mwait(unsigned long eax, unsigned long ecx) -{ - /* "mwait %eax,%ecx;" */ - asm volatile( - "sti; .byte 0x0f,0x01,0xc9;" - : :"a" (eax), "c" (ecx)); -} - -extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx); - -#define stack_current() \ -({ \ - struct thread_info *ti; \ - asm("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK)); \ - ti->task; \ -}) - -#define cache_line_size() (boot_cpu_data.x86_cache_alignment) - -extern unsigned long boot_option_idle_override; -/* Boot loader type from the setup header */ -extern int bootloader_type; - -#define HAVE_ARCH_PICK_MMAP_LAYOUT 1 - -#endif /* __ASM_X86_64_PROCESSOR_H */ diff --git a/include/asm-x86/proto.h b/include/asm-x86/proto.h index dabba55f7ed8..68563c0709ac 100644 --- a/include/asm-x86/proto.h +++ b/include/asm-x86/proto.h @@ -5,87 +5,24 @@ /* misc architecture specific prototypes */ -struct cpuinfo_x86; -struct pt_regs; - -extern void start_kernel(void); -extern void pda_init(int); - extern void early_idt_handler(void); -extern void mcheck_init(struct cpuinfo_x86 *c); extern void init_memory_mapping(unsigned long start, unsigned long end); -extern void system_call(void); -extern int kernel_syscall(void); +extern void system_call(void); extern void syscall_init(void); extern void ia32_syscall(void); -extern void ia32_cstar_target(void); -extern void ia32_sysenter_target(void); - -extern void config_acpi_tables(void); -extern void ia32_syscall(void); - -extern int pmtimer_mark_offset(void); -extern void pmtimer_resume(void); -extern void pmtimer_wait(unsigned); -extern unsigned int do_gettimeoffset_pm(void); -#ifdef CONFIG_X86_PM_TIMER -extern u32 pmtmr_ioport; -#else -#define pmtmr_ioport 0 -#endif -extern int nohpet; - -extern void early_printk(const char *fmt, ...) __attribute__((format(printf,1,2))); - -extern void early_identify_cpu(struct cpuinfo_x86 *c); - -extern int k8_scan_nodes(unsigned long start, unsigned long end); - -extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn); -extern unsigned long numa_free_all_bootmem(void); +extern void ia32_cstar_target(void); +extern void ia32_sysenter_target(void); extern void reserve_bootmem_generic(unsigned long phys, unsigned len); -extern void load_gs_index(unsigned gs); - -extern unsigned long end_pfn_map; - -extern void show_trace(struct task_struct *, struct pt_regs *, unsigned long * rsp); -extern void show_registers(struct pt_regs *regs); - -extern void exception_table_check(void); - -extern void acpi_reserve_bootmem(void); - -extern void swap_low_mappings(void); - -extern void __show_regs(struct pt_regs * regs); -extern void show_regs(struct pt_regs * regs); - extern void syscall32_cpu_init(void); -extern void setup_node_bootmem(int nodeid, unsigned long start, unsigned long end); - -extern void early_quirks(void); extern void check_efer(void); -extern void select_idle_routine(const struct cpuinfo_x86 *c); - -extern unsigned long table_start, table_end; - -extern int exception_trace; -extern unsigned cpu_khz; -extern unsigned tsc_khz; - extern int reboot_force; -extern int notsc_setup(char *); - -extern int gsi_irq_sharing(int gsi); - -extern int force_mwait; long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); diff --git a/include/asm-x86/ptrace-abi.h b/include/asm-x86/ptrace-abi.h index 7524e1233833..81a8ee4c55fc 100644 --- a/include/asm-x86/ptrace-abi.h +++ b/include/asm-x86/ptrace-abi.h @@ -78,4 +78,66 @@ # define PTRACE_SYSEMU_SINGLESTEP 32 #endif +#define PTRACE_SINGLEBLOCK 33 /* resume execution until next branch */ + +#ifndef __ASSEMBLY__ + +#include <asm/types.h> + +/* configuration/status structure used in PTRACE_BTS_CONFIG and + PTRACE_BTS_STATUS commands. +*/ +struct ptrace_bts_config { + /* requested or actual size of BTS buffer in bytes */ + u32 size; + /* bitmask of below flags */ + u32 flags; + /* buffer overflow signal */ + u32 signal; + /* actual size of bts_struct in bytes */ + u32 bts_size; +}; +#endif + +#define PTRACE_BTS_O_TRACE 0x1 /* branch trace */ +#define PTRACE_BTS_O_SCHED 0x2 /* scheduling events w/ jiffies */ +#define PTRACE_BTS_O_SIGNAL 0x4 /* send SIG<signal> on buffer overflow + instead of wrapping around */ +#define PTRACE_BTS_O_CUT_SIZE 0x8 /* cut requested size to max available + instead of failing */ + +#define PTRACE_BTS_CONFIG 40 +/* Configure branch trace recording. + ADDR points to a struct ptrace_bts_config. + DATA gives the size of that buffer. + A new buffer is allocated, iff the size changes. + Returns the number of bytes read. +*/ +#define PTRACE_BTS_STATUS 41 +/* Return the current configuration in a struct ptrace_bts_config + pointed to by ADDR; DATA gives the size of that buffer. + Returns the number of bytes written. +*/ +#define PTRACE_BTS_SIZE 42 +/* Return the number of available BTS records. + DATA and ADDR are ignored. +*/ +#define PTRACE_BTS_GET 43 +/* Get a single BTS record. + DATA defines the index into the BTS array, where 0 is the newest + entry, and higher indices refer to older entries. + ADDR is pointing to struct bts_struct (see asm/ds.h). +*/ +#define PTRACE_BTS_CLEAR 44 +/* Clear the BTS buffer. + DATA and ADDR are ignored. +*/ +#define PTRACE_BTS_DRAIN 45 +/* Read all available BTS records and clear the buffer. + ADDR points to an array of struct bts_struct. + DATA gives the size of that buffer. + BTS records are read from oldest to newest. + Returns number of BTS records drained. +*/ + #endif diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h index 51ddb2590870..d9e04b46a440 100644 --- a/include/asm-x86/ptrace.h +++ b/include/asm-x86/ptrace.h @@ -4,12 +4,15 @@ #include <linux/compiler.h> /* For __user */ #include <asm/ptrace-abi.h> + #ifndef __ASSEMBLY__ #ifdef __i386__ /* this struct defines the way the registers are stored on the stack during a system call. */ +#ifndef __KERNEL__ + struct pt_regs { long ebx; long ecx; @@ -21,7 +24,7 @@ struct pt_regs { int xds; int xes; int xfs; - /* int xgs; */ + /* int gs; */ long orig_eax; long eip; int xcs; @@ -30,44 +33,37 @@ struct pt_regs { int xss; }; -#ifdef __KERNEL__ +#else /* __KERNEL__ */ + +struct pt_regs { + long bx; + long cx; + long dx; + long si; + long di; + long bp; + long ax; + int ds; + int es; + int fs; + /* int gs; */ + long orig_ax; + long ip; + int cs; + long flags; + long sp; + int ss; +}; #include <asm/vm86.h> #include <asm/segment.h> -struct task_struct; -extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code); - -/* - * user_mode_vm(regs) determines whether a register set came from user mode. - * This is true if V8086 mode was enabled OR if the register set was from - * protected mode with RPL-3 CS value. This tricky test checks that with - * one comparison. Many places in the kernel can bypass this full check - * if they have already ruled out V8086 mode, so user_mode(regs) can be used. - */ -static inline int user_mode(struct pt_regs *regs) -{ - return (regs->xcs & SEGMENT_RPL_MASK) == USER_RPL; -} -static inline int user_mode_vm(struct pt_regs *regs) -{ - return ((regs->xcs & SEGMENT_RPL_MASK) | (regs->eflags & VM_MASK)) >= USER_RPL; -} -static inline int v8086_mode(struct pt_regs *regs) -{ - return (regs->eflags & VM_MASK); -} - -#define instruction_pointer(regs) ((regs)->eip) -#define frame_pointer(regs) ((regs)->ebp) -#define stack_pointer(regs) ((unsigned long)(regs)) -#define regs_return_value(regs) ((regs)->eax) - -extern unsigned long profile_pc(struct pt_regs *regs); #endif /* __KERNEL__ */ #else /* __i386__ */ +#ifndef __KERNEL__ + struct pt_regs { unsigned long r15; unsigned long r14; @@ -96,47 +92,143 @@ struct pt_regs { /* top of stack page */ }; +#else /* __KERNEL__ */ + +struct pt_regs { + unsigned long r15; + unsigned long r14; + unsigned long r13; + unsigned long r12; + unsigned long bp; + unsigned long bx; +/* arguments: non interrupts/non tracing syscalls only save upto here*/ + unsigned long r11; + unsigned long r10; + unsigned long r9; + unsigned long r8; + unsigned long ax; + unsigned long cx; + unsigned long dx; + unsigned long si; + unsigned long di; + unsigned long orig_ax; +/* end of arguments */ +/* cpu exception frame or undefined */ + unsigned long ip; + unsigned long cs; + unsigned long flags; + unsigned long sp; + unsigned long ss; +/* top of stack page */ +}; + +#endif /* __KERNEL__ */ +#endif /* !__i386__ */ + #ifdef __KERNEL__ -#define user_mode(regs) (!!((regs)->cs & 3)) -#define user_mode_vm(regs) user_mode(regs) -#define instruction_pointer(regs) ((regs)->rip) -#define frame_pointer(regs) ((regs)->rbp) -#define stack_pointer(regs) ((regs)->rsp) -#define regs_return_value(regs) ((regs)->rax) +/* the DS BTS struct is used for ptrace as well */ +#include <asm/ds.h> + +struct task_struct; + +extern void ptrace_bts_take_timestamp(struct task_struct *, enum bts_qualifier); extern unsigned long profile_pc(struct pt_regs *regs); + +extern unsigned long +convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs); + +#ifdef CONFIG_X86_32 +extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code); +#else void signal_fault(struct pt_regs *regs, void __user *frame, char *where); +#endif -struct task_struct; +#define regs_return_value(regs) ((regs)->ax) + +/* + * user_mode_vm(regs) determines whether a register set came from user mode. + * This is true if V8086 mode was enabled OR if the register set was from + * protected mode with RPL-3 CS value. This tricky test checks that with + * one comparison. Many places in the kernel can bypass this full check + * if they have already ruled out V8086 mode, so user_mode(regs) can be used. + */ +static inline int user_mode(struct pt_regs *regs) +{ +#ifdef CONFIG_X86_32 + return (regs->cs & SEGMENT_RPL_MASK) == USER_RPL; +#else + return !!(regs->cs & 3); +#endif +} + +static inline int user_mode_vm(struct pt_regs *regs) +{ +#ifdef CONFIG_X86_32 + return ((regs->cs & SEGMENT_RPL_MASK) | + (regs->flags & VM_MASK)) >= USER_RPL; +#else + return user_mode(regs); +#endif +} + +static inline int v8086_mode(struct pt_regs *regs) +{ +#ifdef CONFIG_X86_32 + return (regs->flags & VM_MASK); +#else + return 0; /* No V86 mode support in long mode */ +#endif +} + +/* + * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode + * when it traps. So regs will be the current sp. + * + * This is valid only for kernel mode traps. + */ +static inline unsigned long kernel_trap_sp(struct pt_regs *regs) +{ +#ifdef CONFIG_X86_32 + return (unsigned long)regs; +#else + return regs->sp; +#endif +} + +static inline unsigned long instruction_pointer(struct pt_regs *regs) +{ + return regs->ip; +} + +static inline unsigned long frame_pointer(struct pt_regs *regs) +{ + return regs->bp; +} + +/* + * These are defined as per linux/ptrace.h, which see. + */ +#define arch_has_single_step() (1) +extern void user_enable_single_step(struct task_struct *); +extern void user_disable_single_step(struct task_struct *); + +extern void user_enable_block_step(struct task_struct *); +#ifdef CONFIG_X86_DEBUGCTLMSR +#define arch_has_block_step() (1) +#else +#define arch_has_block_step() (boot_cpu_data.x86 >= 6) +#endif + +struct user_desc; +extern int do_get_thread_area(struct task_struct *p, int idx, + struct user_desc __user *info); +extern int do_set_thread_area(struct task_struct *p, int idx, + struct user_desc __user *info, int can_allocate); -extern unsigned long -convert_rip_to_linear(struct task_struct *child, struct pt_regs *regs); - -enum { - EF_CF = 0x00000001, - EF_PF = 0x00000004, - EF_AF = 0x00000010, - EF_ZF = 0x00000040, - EF_SF = 0x00000080, - EF_TF = 0x00000100, - EF_IE = 0x00000200, - EF_DF = 0x00000400, - EF_OF = 0x00000800, - EF_IOPL = 0x00003000, - EF_IOPL_RING0 = 0x00000000, - EF_IOPL_RING1 = 0x00001000, - EF_IOPL_RING2 = 0x00002000, - EF_NT = 0x00004000, /* nested task */ - EF_RF = 0x00010000, /* resume */ - EF_VM = 0x00020000, /* virtual mode */ - EF_AC = 0x00040000, /* alignment */ - EF_VIF = 0x00080000, /* virtual interrupt */ - EF_VIP = 0x00100000, /* virtual interrupt pending */ - EF_ID = 0x00200000, /* id */ -}; #endif /* __KERNEL__ */ -#endif /* !__i386__ */ + #endif /* !__ASSEMBLY__ */ #endif diff --git a/include/asm-x86/resume-trace.h b/include/asm-x86/resume-trace.h index 9b6dd093a9f7..46f725b0bc82 100644 --- a/include/asm-x86/resume-trace.h +++ b/include/asm-x86/resume-trace.h @@ -1,5 +1,20 @@ -#ifdef CONFIG_X86_32 -# include "resume-trace_32.h" -#else -# include "resume-trace_64.h" +#ifndef _ASM_X86_RESUME_TRACE_H +#define _ASM_X86_RESUME_TRACE_H + +#include <asm/asm.h> + +#define TRACE_RESUME(user) do { \ + if (pm_trace_enabled) { \ + void *tracedata; \ + asm volatile(_ASM_MOV_UL " $1f,%0\n" \ + ".section .tracedata,\"a\"\n" \ + "1:\t.word %c1\n\t" \ + _ASM_PTR " %c2\n" \ + ".previous" \ + :"=r" (tracedata) \ + : "i" (__LINE__), "i" (__FILE__)); \ + generate_resume_trace(tracedata, user); \ + } \ +} while (0) + #endif diff --git a/include/asm-x86/resume-trace_32.h b/include/asm-x86/resume-trace_32.h deleted file mode 100644 index ec9cfd656230..000000000000 --- a/include/asm-x86/resume-trace_32.h +++ /dev/null @@ -1,13 +0,0 @@ -#define TRACE_RESUME(user) do { \ - if (pm_trace_enabled) { \ - void *tracedata; \ - asm volatile("movl $1f,%0\n" \ - ".section .tracedata,\"a\"\n" \ - "1:\t.word %c1\n" \ - "\t.long %c2\n" \ - ".previous" \ - :"=r" (tracedata) \ - : "i" (__LINE__), "i" (__FILE__)); \ - generate_resume_trace(tracedata, user); \ - } \ -} while (0) diff --git a/include/asm-x86/resume-trace_64.h b/include/asm-x86/resume-trace_64.h deleted file mode 100644 index 34bf998fdf62..000000000000 --- a/include/asm-x86/resume-trace_64.h +++ /dev/null @@ -1,13 +0,0 @@ -#define TRACE_RESUME(user) do { \ - if (pm_trace_enabled) { \ - void *tracedata; \ - asm volatile("movq $1f,%0\n" \ - ".section .tracedata,\"a\"\n" \ - "1:\t.word %c1\n" \ - "\t.quad %c2\n" \ - ".previous" \ - :"=r" (tracedata) \ - : "i" (__LINE__), "i" (__FILE__)); \ - generate_resume_trace(tracedata, user); \ - } \ -} while (0) diff --git a/include/asm-x86/rio.h b/include/asm-x86/rio.h index c7350f6d2015..97cdcc9887ba 100644 --- a/include/asm-x86/rio.h +++ b/include/asm-x86/rio.h @@ -1,6 +1,6 @@ /* - * Derived from include/asm-i386/mach-summit/mach_mpparse.h - * and include/asm-i386/mach-default/bios_ebda.h + * Derived from include/asm-x86/mach-summit/mach_mpparse.h + * and include/asm-x86/mach-default/bios_ebda.h * * Author: Laurent Vivier <Laurent.Vivier@bull.net> */ diff --git a/include/asm-x86/rwlock.h b/include/asm-x86/rwlock.h index f2b64a429e6b..6a8c0d645108 100644 --- a/include/asm-x86/rwlock.h +++ b/include/asm-x86/rwlock.h @@ -2,7 +2,6 @@ #define _ASM_X86_RWLOCK_H #define RW_LOCK_BIAS 0x01000000 -#define RW_LOCK_BIAS_STR "0x01000000" /* Actual code is in asm/spinlock.h or in arch/x86/lib/rwlock.S */ diff --git a/include/asm-x86/rwsem.h b/include/asm-x86/rwsem.h index 041906f3c6df..520a379f4b80 100644 --- a/include/asm-x86/rwsem.h +++ b/include/asm-x86/rwsem.h @@ -2,7 +2,7 @@ * * Written by David Howells (dhowells@redhat.com). * - * Derived from asm-i386/semaphore.h + * Derived from asm-x86/semaphore.h * * * The MSW of the count is the negated number of active writers and waiting @@ -44,10 +44,14 @@ struct rwsem_waiter; -extern struct rw_semaphore *FASTCALL(rwsem_down_read_failed(struct rw_semaphore *sem)); -extern struct rw_semaphore *FASTCALL(rwsem_down_write_failed(struct rw_semaphore *sem)); -extern struct rw_semaphore *FASTCALL(rwsem_wake(struct rw_semaphore *)); -extern struct rw_semaphore *FASTCALL(rwsem_downgrade_wake(struct rw_semaphore *sem)); +extern asmregparm struct rw_semaphore * + rwsem_down_read_failed(struct rw_semaphore *sem); +extern asmregparm struct rw_semaphore * + rwsem_down_write_failed(struct rw_semaphore *sem); +extern asmregparm struct rw_semaphore * + rwsem_wake(struct rw_semaphore *); +extern asmregparm struct rw_semaphore * + rwsem_downgrade_wake(struct rw_semaphore *sem); /* * the semaphore definition diff --git a/include/asm-x86/scatterlist.h b/include/asm-x86/scatterlist.h index 3a1e76257a27..d13c197866d6 100644 --- a/include/asm-x86/scatterlist.h +++ b/include/asm-x86/scatterlist.h @@ -1,5 +1,35 @@ +#ifndef _ASM_X86_SCATTERLIST_H +#define _ASM_X86_SCATTERLIST_H + +#include <asm/types.h> + +struct scatterlist { +#ifdef CONFIG_DEBUG_SG + unsigned long sg_magic; +#endif + unsigned long page_link; + unsigned int offset; + unsigned int length; + dma_addr_t dma_address; +#ifdef CONFIG_X86_64 + unsigned int dma_length; +#endif +}; + +#define ARCH_HAS_SG_CHAIN +#define ISA_DMA_THRESHOLD (0x00ffffff) + +/* + * These macros should be used after a pci_map_sg call has been done + * to get bus addresses of each of the SG entries and their lengths. + * You should only work with the number of sg entries pci_map_sg + * returns. + */ +#define sg_dma_address(sg) ((sg)->dma_address) #ifdef CONFIG_X86_32 -# include "scatterlist_32.h" +# define sg_dma_len(sg) ((sg)->length) #else -# include "scatterlist_64.h" +# define sg_dma_len(sg) ((sg)->dma_length) +#endif + #endif diff --git a/include/asm-x86/scatterlist_32.h b/include/asm-x86/scatterlist_32.h deleted file mode 100644 index 0e7d997a34be..000000000000 --- a/include/asm-x86/scatterlist_32.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef _I386_SCATTERLIST_H -#define _I386_SCATTERLIST_H - -#include <asm/types.h> - -struct scatterlist { -#ifdef CONFIG_DEBUG_SG - unsigned long sg_magic; -#endif - unsigned long page_link; - unsigned int offset; - dma_addr_t dma_address; - unsigned int length; -}; - -#define ARCH_HAS_SG_CHAIN - -/* These macros should be used after a pci_map_sg call has been done - * to get bus addresses of each of the SG entries and their lengths. - * You should only work with the number of sg entries pci_map_sg - * returns. - */ -#define sg_dma_address(sg) ((sg)->dma_address) -#define sg_dma_len(sg) ((sg)->length) - -#define ISA_DMA_THRESHOLD (0x00ffffff) - -#endif /* !(_I386_SCATTERLIST_H) */ diff --git a/include/asm-x86/scatterlist_64.h b/include/asm-x86/scatterlist_64.h deleted file mode 100644 index 1847c72befeb..000000000000 --- a/include/asm-x86/scatterlist_64.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef _X8664_SCATTERLIST_H -#define _X8664_SCATTERLIST_H - -#include <asm/types.h> - -struct scatterlist { -#ifdef CONFIG_DEBUG_SG - unsigned long sg_magic; -#endif - unsigned long page_link; - unsigned int offset; - unsigned int length; - dma_addr_t dma_address; - unsigned int dma_length; -}; - -#define ARCH_HAS_SG_CHAIN - -#define ISA_DMA_THRESHOLD (0x00ffffff) - -/* These macros should be used after a pci_map_sg call has been done - * to get bus addresses of each of the SG entries and their lengths. - * You should only work with the number of sg entries pci_map_sg - * returns. - */ -#define sg_dma_address(sg) ((sg)->dma_address) -#define sg_dma_len(sg) ((sg)->dma_length) - -#endif diff --git a/include/asm-x86/segment.h b/include/asm-x86/segment.h index 605068280e28..23f0535fec61 100644 --- a/include/asm-x86/segment.h +++ b/include/asm-x86/segment.h @@ -1,5 +1,204 @@ +#ifndef _ASM_X86_SEGMENT_H_ +#define _ASM_X86_SEGMENT_H_ + +/* Simple and small GDT entries for booting only */ + +#define GDT_ENTRY_BOOT_CS 2 +#define __BOOT_CS (GDT_ENTRY_BOOT_CS * 8) + +#define GDT_ENTRY_BOOT_DS (GDT_ENTRY_BOOT_CS + 1) +#define __BOOT_DS (GDT_ENTRY_BOOT_DS * 8) + +#define GDT_ENTRY_BOOT_TSS (GDT_ENTRY_BOOT_CS + 2) +#define __BOOT_TSS (GDT_ENTRY_BOOT_TSS * 8) + #ifdef CONFIG_X86_32 -# include "segment_32.h" +/* + * The layout of the per-CPU GDT under Linux: + * + * 0 - null + * 1 - reserved + * 2 - reserved + * 3 - reserved + * + * 4 - unused <==== new cacheline + * 5 - unused + * + * ------- start of TLS (Thread-Local Storage) segments: + * + * 6 - TLS segment #1 [ glibc's TLS segment ] + * 7 - TLS segment #2 [ Wine's %fs Win32 segment ] + * 8 - TLS segment #3 + * 9 - reserved + * 10 - reserved + * 11 - reserved + * + * ------- start of kernel segments: + * + * 12 - kernel code segment <==== new cacheline + * 13 - kernel data segment + * 14 - default user CS + * 15 - default user DS + * 16 - TSS + * 17 - LDT + * 18 - PNPBIOS support (16->32 gate) + * 19 - PNPBIOS support + * 20 - PNPBIOS support + * 21 - PNPBIOS support + * 22 - PNPBIOS support + * 23 - APM BIOS support + * 24 - APM BIOS support + * 25 - APM BIOS support + * + * 26 - ESPFIX small SS + * 27 - per-cpu [ offset to per-cpu data area ] + * 28 - unused + * 29 - unused + * 30 - unused + * 31 - TSS for double fault handler + */ +#define GDT_ENTRY_TLS_MIN 6 +#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1) + +#define GDT_ENTRY_DEFAULT_USER_CS 14 +#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3) + +#define GDT_ENTRY_DEFAULT_USER_DS 15 +#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3) + +#define GDT_ENTRY_KERNEL_BASE 12 + +#define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 0) +#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8) + +#define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 1) +#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8) + +#define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 4) +#define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 5) + +#define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 6) +#define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 11) + +#define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE + 14) +#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8) + +#define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE + 15) +#ifdef CONFIG_SMP +#define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8) #else -# include "segment_64.h" +#define __KERNEL_PERCPU 0 +#endif + +#define GDT_ENTRY_DOUBLEFAULT_TSS 31 + +/* + * The GDT has 32 entries + */ +#define GDT_ENTRIES 32 + +/* The PnP BIOS entries in the GDT */ +#define GDT_ENTRY_PNPBIOS_CS32 (GDT_ENTRY_PNPBIOS_BASE + 0) +#define GDT_ENTRY_PNPBIOS_CS16 (GDT_ENTRY_PNPBIOS_BASE + 1) +#define GDT_ENTRY_PNPBIOS_DS (GDT_ENTRY_PNPBIOS_BASE + 2) +#define GDT_ENTRY_PNPBIOS_TS1 (GDT_ENTRY_PNPBIOS_BASE + 3) +#define GDT_ENTRY_PNPBIOS_TS2 (GDT_ENTRY_PNPBIOS_BASE + 4) + +/* The PnP BIOS selectors */ +#define PNP_CS32 (GDT_ENTRY_PNPBIOS_CS32 * 8) /* segment for calling fn */ +#define PNP_CS16 (GDT_ENTRY_PNPBIOS_CS16 * 8) /* code segment for BIOS */ +#define PNP_DS (GDT_ENTRY_PNPBIOS_DS * 8) /* data segment for BIOS */ +#define PNP_TS1 (GDT_ENTRY_PNPBIOS_TS1 * 8) /* transfer data segment */ +#define PNP_TS2 (GDT_ENTRY_PNPBIOS_TS2 * 8) /* another data segment */ + +/* Bottom two bits of selector give the ring privilege level */ +#define SEGMENT_RPL_MASK 0x3 +/* Bit 2 is table indicator (LDT/GDT) */ +#define SEGMENT_TI_MASK 0x4 + +/* User mode is privilege level 3 */ +#define USER_RPL 0x3 +/* LDT segment has TI set, GDT has it cleared */ +#define SEGMENT_LDT 0x4 +#define SEGMENT_GDT 0x0 + +/* + * Matching rules for certain types of segments. + */ + +/* Matches only __KERNEL_CS, ignoring PnP / USER / APM segments */ +#define SEGMENT_IS_KERNEL_CODE(x) (((x) & 0xfc) == GDT_ENTRY_KERNEL_CS * 8) + +/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */ +#define SEGMENT_IS_FLAT_CODE(x) (((x) & 0xec) == GDT_ENTRY_KERNEL_CS * 8) + +/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */ +#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8) + + +#else +#include <asm/cache.h> + +#define __KERNEL_CS 0x10 +#define __KERNEL_DS 0x18 + +#define __KERNEL32_CS 0x08 + +/* + * we cannot use the same code segment descriptor for user and kernel + * -- not even in the long flat mode, because of different DPL /kkeil + * The segment offset needs to contain a RPL. Grr. -AK + * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets) + */ + +#define __USER32_CS 0x23 /* 4*8+3 */ +#define __USER_DS 0x2b /* 5*8+3 */ +#define __USER_CS 0x33 /* 6*8+3 */ +#define __USER32_DS __USER_DS + +#define GDT_ENTRY_TSS 8 /* needs two entries */ +#define GDT_ENTRY_LDT 10 /* needs two entries */ +#define GDT_ENTRY_TLS_MIN 12 +#define GDT_ENTRY_TLS_MAX 14 + +#define GDT_ENTRY_PER_CPU 15 /* Abused to load per CPU data from limit */ +#define __PER_CPU_SEG (GDT_ENTRY_PER_CPU * 8 + 3) + +/* TLS indexes for 64bit - hardcoded in arch_prctl */ +#define FS_TLS 0 +#define GS_TLS 1 + +#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3) +#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3) + +#define GDT_ENTRIES 16 + +#endif + +#ifndef CONFIG_PARAVIRT +#define get_kernel_rpl() 0 +#endif + +/* User mode is privilege level 3 */ +#define USER_RPL 0x3 +/* LDT segment has TI set, GDT has it cleared */ +#define SEGMENT_LDT 0x4 +#define SEGMENT_GDT 0x0 + +/* Bottom two bits of selector give the ring privilege level */ +#define SEGMENT_RPL_MASK 0x3 +/* Bit 2 is table indicator (LDT/GDT) */ +#define SEGMENT_TI_MASK 0x4 + +#define IDT_ENTRIES 256 +#define GDT_SIZE (GDT_ENTRIES * 8) +#define GDT_ENTRY_TLS_ENTRIES 3 +#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8) + +#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ +extern const char early_idt_handlers[IDT_ENTRIES][10]; +#endif +#endif + #endif diff --git a/include/asm-x86/segment_32.h b/include/asm-x86/segment_32.h deleted file mode 100644 index 597a47c2515f..000000000000 --- a/include/asm-x86/segment_32.h +++ /dev/null @@ -1,148 +0,0 @@ -#ifndef _ASM_SEGMENT_H -#define _ASM_SEGMENT_H - -/* - * The layout of the per-CPU GDT under Linux: - * - * 0 - null - * 1 - reserved - * 2 - reserved - * 3 - reserved - * - * 4 - unused <==== new cacheline - * 5 - unused - * - * ------- start of TLS (Thread-Local Storage) segments: - * - * 6 - TLS segment #1 [ glibc's TLS segment ] - * 7 - TLS segment #2 [ Wine's %fs Win32 segment ] - * 8 - TLS segment #3 - * 9 - reserved - * 10 - reserved - * 11 - reserved - * - * ------- start of kernel segments: - * - * 12 - kernel code segment <==== new cacheline - * 13 - kernel data segment - * 14 - default user CS - * 15 - default user DS - * 16 - TSS - * 17 - LDT - * 18 - PNPBIOS support (16->32 gate) - * 19 - PNPBIOS support - * 20 - PNPBIOS support - * 21 - PNPBIOS support - * 22 - PNPBIOS support - * 23 - APM BIOS support - * 24 - APM BIOS support - * 25 - APM BIOS support - * - * 26 - ESPFIX small SS - * 27 - per-cpu [ offset to per-cpu data area ] - * 28 - unused - * 29 - unused - * 30 - unused - * 31 - TSS for double fault handler - */ -#define GDT_ENTRY_TLS_ENTRIES 3 -#define GDT_ENTRY_TLS_MIN 6 -#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1) - -#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8) - -#define GDT_ENTRY_DEFAULT_USER_CS 14 -#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3) - -#define GDT_ENTRY_DEFAULT_USER_DS 15 -#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3) - -#define GDT_ENTRY_KERNEL_BASE 12 - -#define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 0) -#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8) - -#define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 1) -#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8) - -#define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 4) -#define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 5) - -#define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 6) -#define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 11) - -#define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE + 14) -#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8) - -#define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE + 15) -#ifdef CONFIG_SMP -#define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8) -#else -#define __KERNEL_PERCPU 0 -#endif - -#define GDT_ENTRY_DOUBLEFAULT_TSS 31 - -/* - * The GDT has 32 entries - */ -#define GDT_ENTRIES 32 -#define GDT_SIZE (GDT_ENTRIES * 8) - -/* Simple and small GDT entries for booting only */ - -#define GDT_ENTRY_BOOT_CS 2 -#define __BOOT_CS (GDT_ENTRY_BOOT_CS * 8) - -#define GDT_ENTRY_BOOT_DS (GDT_ENTRY_BOOT_CS + 1) -#define __BOOT_DS (GDT_ENTRY_BOOT_DS * 8) - -/* The PnP BIOS entries in the GDT */ -#define GDT_ENTRY_PNPBIOS_CS32 (GDT_ENTRY_PNPBIOS_BASE + 0) -#define GDT_ENTRY_PNPBIOS_CS16 (GDT_ENTRY_PNPBIOS_BASE + 1) -#define GDT_ENTRY_PNPBIOS_DS (GDT_ENTRY_PNPBIOS_BASE + 2) -#define GDT_ENTRY_PNPBIOS_TS1 (GDT_ENTRY_PNPBIOS_BASE + 3) -#define GDT_ENTRY_PNPBIOS_TS2 (GDT_ENTRY_PNPBIOS_BASE + 4) - -/* The PnP BIOS selectors */ -#define PNP_CS32 (GDT_ENTRY_PNPBIOS_CS32 * 8) /* segment for calling fn */ -#define PNP_CS16 (GDT_ENTRY_PNPBIOS_CS16 * 8) /* code segment for BIOS */ -#define PNP_DS (GDT_ENTRY_PNPBIOS_DS * 8) /* data segment for BIOS */ -#define PNP_TS1 (GDT_ENTRY_PNPBIOS_TS1 * 8) /* transfer data segment */ -#define PNP_TS2 (GDT_ENTRY_PNPBIOS_TS2 * 8) /* another data segment */ - -/* - * The interrupt descriptor table has room for 256 idt's, - * the global descriptor table is dependent on the number - * of tasks we can have.. - */ -#define IDT_ENTRIES 256 - -/* Bottom two bits of selector give the ring privilege level */ -#define SEGMENT_RPL_MASK 0x3 -/* Bit 2 is table indicator (LDT/GDT) */ -#define SEGMENT_TI_MASK 0x4 - -/* User mode is privilege level 3 */ -#define USER_RPL 0x3 -/* LDT segment has TI set, GDT has it cleared */ -#define SEGMENT_LDT 0x4 -#define SEGMENT_GDT 0x0 - -#ifndef CONFIG_PARAVIRT -#define get_kernel_rpl() 0 -#endif -/* - * Matching rules for certain types of segments. - */ - -/* Matches only __KERNEL_CS, ignoring PnP / USER / APM segments */ -#define SEGMENT_IS_KERNEL_CODE(x) (((x) & 0xfc) == GDT_ENTRY_KERNEL_CS * 8) - -/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */ -#define SEGMENT_IS_FLAT_CODE(x) (((x) & 0xec) == GDT_ENTRY_KERNEL_CS * 8) - -/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */ -#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8) - -#endif diff --git a/include/asm-x86/segment_64.h b/include/asm-x86/segment_64.h deleted file mode 100644 index 04b8ab21328f..000000000000 --- a/include/asm-x86/segment_64.h +++ /dev/null @@ -1,53 +0,0 @@ -#ifndef _ASM_SEGMENT_H -#define _ASM_SEGMENT_H - -#include <asm/cache.h> - -/* Simple and small GDT entries for booting only */ - -#define GDT_ENTRY_BOOT_CS 2 -#define __BOOT_CS (GDT_ENTRY_BOOT_CS * 8) - -#define GDT_ENTRY_BOOT_DS (GDT_ENTRY_BOOT_CS + 1) -#define __BOOT_DS (GDT_ENTRY_BOOT_DS * 8) - -#define __KERNEL_CS 0x10 -#define __KERNEL_DS 0x18 - -#define __KERNEL32_CS 0x08 - -/* - * we cannot use the same code segment descriptor for user and kernel - * -- not even in the long flat mode, because of different DPL /kkeil - * The segment offset needs to contain a RPL. Grr. -AK - * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets) - */ - -#define __USER32_CS 0x23 /* 4*8+3 */ -#define __USER_DS 0x2b /* 5*8+3 */ -#define __USER_CS 0x33 /* 6*8+3 */ -#define __USER32_DS __USER_DS - -#define GDT_ENTRY_TSS 8 /* needs two entries */ -#define GDT_ENTRY_LDT 10 /* needs two entries */ -#define GDT_ENTRY_TLS_MIN 12 -#define GDT_ENTRY_TLS_MAX 14 - -#define GDT_ENTRY_TLS_ENTRIES 3 - -#define GDT_ENTRY_PER_CPU 15 /* Abused to load per CPU data from limit */ -#define __PER_CPU_SEG (GDT_ENTRY_PER_CPU * 8 + 3) - -/* TLS indexes for 64bit - hardcoded in arch_prctl */ -#define FS_TLS 0 -#define GS_TLS 1 - -#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3) -#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3) - -#define IDT_ENTRIES 256 -#define GDT_ENTRIES 16 -#define GDT_SIZE (GDT_ENTRIES * 8) -#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8) - -#endif diff --git a/include/asm-x86/semaphore_32.h b/include/asm-x86/semaphore_32.h index 835c1d751a9f..ac96d3804d0c 100644 --- a/include/asm-x86/semaphore_32.h +++ b/include/asm-x86/semaphore_32.h @@ -83,10 +83,10 @@ static inline void init_MUTEX_LOCKED (struct semaphore *sem) sema_init(sem, 0); } -fastcall void __down_failed(void /* special register calling convention */); -fastcall int __down_failed_interruptible(void /* params in registers */); -fastcall int __down_failed_trylock(void /* params in registers */); -fastcall void __up_wakeup(void /* special register calling convention */); +extern asmregparm void __down_failed(atomic_t *count_ptr); +extern asmregparm int __down_failed_interruptible(atomic_t *count_ptr); +extern asmregparm int __down_failed_trylock(atomic_t *count_ptr); +extern asmregparm void __up_wakeup(atomic_t *count_ptr); /* * This is ugly, but we want the default case to fall through. diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h index 24d786e07b49..071e054abd82 100644 --- a/include/asm-x86/setup.h +++ b/include/asm-x86/setup.h @@ -3,6 +3,13 @@ #define COMMAND_LINE_SIZE 2048 +#ifndef __ASSEMBLY__ +char *machine_specific_memory_setup(void); +#ifndef CONFIG_PARAVIRT +#define paravirt_post_allocator_init() do {} while (0) +#endif +#endif /* __ASSEMBLY__ */ + #ifdef __KERNEL__ #ifdef __i386__ @@ -51,9 +58,7 @@ void __init add_memory_region(unsigned long long start, extern unsigned long init_pg_tables_end; -#ifndef CONFIG_PARAVIRT -#define paravirt_post_allocator_init() do {} while (0) -#endif + #endif /* __i386__ */ #endif /* _SETUP */ diff --git a/include/asm-x86/sigcontext.h b/include/asm-x86/sigcontext.h index c047f9dc3423..681deade5f00 100644 --- a/include/asm-x86/sigcontext.h +++ b/include/asm-x86/sigcontext.h @@ -63,20 +63,20 @@ struct sigcontext { unsigned short fs, __fsh; unsigned short es, __esh; unsigned short ds, __dsh; - unsigned long edi; - unsigned long esi; - unsigned long ebp; - unsigned long esp; - unsigned long ebx; - unsigned long edx; - unsigned long ecx; - unsigned long eax; + unsigned long di; + unsigned long si; + unsigned long bp; + unsigned long sp; + unsigned long bx; + unsigned long dx; + unsigned long cx; + unsigned long ax; unsigned long trapno; unsigned long err; - unsigned long eip; + unsigned long ip; unsigned short cs, __csh; - unsigned long eflags; - unsigned long esp_at_signal; + unsigned long flags; + unsigned long sp_at_signal; unsigned short ss, __ssh; struct _fpstate __user * fpstate; unsigned long oldmask; @@ -111,16 +111,16 @@ struct sigcontext { unsigned long r13; unsigned long r14; unsigned long r15; - unsigned long rdi; - unsigned long rsi; - unsigned long rbp; - unsigned long rbx; - unsigned long rdx; - unsigned long rax; - unsigned long rcx; - unsigned long rsp; - unsigned long rip; - unsigned long eflags; /* RFLAGS */ + unsigned long di; + unsigned long si; + unsigned long bp; + unsigned long bx; + unsigned long dx; + unsigned long ax; + unsigned long cx; + unsigned long sp; + unsigned long ip; + unsigned long flags; unsigned short cs; unsigned short gs; unsigned short fs; diff --git a/include/asm-x86/sigcontext32.h b/include/asm-x86/sigcontext32.h index 3d657038ab7c..6ffab4fd593a 100644 --- a/include/asm-x86/sigcontext32.h +++ b/include/asm-x86/sigcontext32.h @@ -48,20 +48,20 @@ struct sigcontext_ia32 { unsigned short fs, __fsh; unsigned short es, __esh; unsigned short ds, __dsh; - unsigned int edi; - unsigned int esi; - unsigned int ebp; - unsigned int esp; - unsigned int ebx; - unsigned int edx; - unsigned int ecx; - unsigned int eax; + unsigned int di; + unsigned int si; + unsigned int bp; + unsigned int sp; + unsigned int bx; + unsigned int dx; + unsigned int cx; + unsigned int ax; unsigned int trapno; unsigned int err; - unsigned int eip; + unsigned int ip; unsigned short cs, __csh; - unsigned int eflags; - unsigned int esp_at_signal; + unsigned int flags; + unsigned int sp_at_signal; unsigned short ss, __ssh; unsigned int fpstate; /* really (struct _fpstate_ia32 *) */ unsigned int oldmask; diff --git a/include/asm-x86/signal.h b/include/asm-x86/signal.h index 987a422a2c78..aee7eca585ab 100644 --- a/include/asm-x86/signal.h +++ b/include/asm-x86/signal.h @@ -245,21 +245,14 @@ static __inline__ int sigfindinword(unsigned long word) struct pt_regs; -#define ptrace_signal_deliver(regs, cookie) \ - do { \ - if (current->ptrace & PT_DTRACE) { \ - current->ptrace &= ~PT_DTRACE; \ - (regs)->eflags &= ~TF_MASK; \ - } \ - } while (0) - #else /* __i386__ */ #undef __HAVE_ARCH_SIG_BITOPS +#endif /* !__i386__ */ + #define ptrace_signal_deliver(regs, cookie) do { } while (0) -#endif /* !__i386__ */ #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ diff --git a/include/asm-x86/smp_32.h b/include/asm-x86/smp_32.h index e10b7affdfe5..56152e312287 100644 --- a/include/asm-x86/smp_32.h +++ b/include/asm-x86/smp_32.h @@ -1,51 +1,41 @@ #ifndef __ASM_SMP_H #define __ASM_SMP_H +#ifndef __ASSEMBLY__ +#include <linux/cpumask.h> +#include <linux/init.h> + /* * We need the APIC definitions automatically as part of 'smp.h' */ -#ifndef __ASSEMBLY__ -#include <linux/kernel.h> -#include <linux/threads.h> -#include <linux/cpumask.h> +#ifdef CONFIG_X86_LOCAL_APIC +# include <asm/mpspec.h> +# include <asm/apic.h> +# ifdef CONFIG_X86_IO_APIC +# include <asm/io_apic.h> +# endif #endif -#if defined(CONFIG_X86_LOCAL_APIC) && !defined(__ASSEMBLY__) -#include <linux/bitops.h> -#include <asm/mpspec.h> -#include <asm/apic.h> -#ifdef CONFIG_X86_IO_APIC -#include <asm/io_apic.h> -#endif -#endif +extern cpumask_t cpu_callout_map; +extern cpumask_t cpu_callin_map; -#define BAD_APICID 0xFFu -#ifdef CONFIG_SMP -#ifndef __ASSEMBLY__ +extern int smp_num_siblings; +extern unsigned int num_processors; -/* - * Private routines/data - */ - extern void smp_alloc_memory(void); -extern int pic_mode; -extern int smp_num_siblings; -DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); -DECLARE_PER_CPU(cpumask_t, cpu_core_map); +extern void lock_ipi_call_lock(void); +extern void unlock_ipi_call_lock(void); extern void (*mtrr_hook) (void); extern void zap_low_mappings (void); -extern void lock_ipi_call_lock(void); -extern void unlock_ipi_call_lock(void); -#define MAX_APICID 256 extern u8 __initdata x86_cpu_to_apicid_init[]; -extern void *x86_cpu_to_apicid_ptr; -DECLARE_PER_CPU(u8, x86_cpu_to_apicid); - -#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) +extern void *x86_cpu_to_apicid_early_ptr; -extern void set_cpu_sibling_map(int cpu); +DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); +DECLARE_PER_CPU(cpumask_t, cpu_core_map); +DECLARE_PER_CPU(u8, cpu_llc_id); +DECLARE_PER_CPU(u8, x86_cpu_to_apicid); #ifdef CONFIG_HOTPLUG_CPU extern void cpu_exit_clear(void); @@ -53,6 +43,9 @@ extern void cpu_uninit(void); extern void remove_siblinginfo(int cpu); #endif +/* Globals due to paravirt */ +extern void set_cpu_sibling_map(int cpu); + struct smp_ops { void (*smp_prepare_boot_cpu)(void); @@ -67,6 +60,7 @@ struct smp_ops int wait); }; +#ifdef CONFIG_SMP extern struct smp_ops smp_ops; static inline void smp_prepare_boot_cpu(void) @@ -107,10 +101,12 @@ int native_cpu_up(unsigned int cpunum); void native_smp_cpus_done(unsigned int max_cpus); #ifndef CONFIG_PARAVIRT -#define startup_ipi_hook(phys_apicid, start_eip, start_esp) \ -do { } while (0) +#define startup_ipi_hook(phys_apicid, start_eip, start_esp) do { } while (0) #endif +extern int __cpu_disable(void); +extern void __cpu_die(unsigned int cpu); + /* * This function is needed by all SMP systems. It must _always_ be valid * from the initial startup. We map APIC_BASE very early in page_setup(), @@ -119,9 +115,11 @@ do { } while (0) DECLARE_PER_CPU(int, cpu_number); #define raw_smp_processor_id() (x86_read_percpu(cpu_number)) -extern cpumask_t cpu_callout_map; -extern cpumask_t cpu_callin_map; -extern cpumask_t cpu_possible_map; +#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) + +extern int safe_smp_processor_id(void); + +void __cpuinit smp_store_cpu_info(int id); /* We don't mark CPUs online until __cpu_up(), so we need another measure */ static inline int num_booting_cpus(void) @@ -129,56 +127,39 @@ static inline int num_booting_cpus(void) return cpus_weight(cpu_callout_map); } -extern int safe_smp_processor_id(void); -extern int __cpu_disable(void); -extern void __cpu_die(unsigned int cpu); -extern unsigned int num_processors; - -void __cpuinit smp_store_cpu_info(int id); - -#endif /* !__ASSEMBLY__ */ - #else /* CONFIG_SMP */ #define safe_smp_processor_id() 0 #define cpu_physical_id(cpu) boot_cpu_physical_apicid -#define NO_PROC_ID 0xFF /* No processor magic marker */ - -#endif /* CONFIG_SMP */ - -#ifndef __ASSEMBLY__ +#endif /* !CONFIG_SMP */ #ifdef CONFIG_X86_LOCAL_APIC -#ifdef APIC_DEFINITION +static __inline int logical_smp_processor_id(void) +{ + /* we don't want to mark this access volatile - bad code generation */ + return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR)); +} + +# ifdef APIC_DEFINITION extern int hard_smp_processor_id(void); -#else -#include <mach_apicdef.h> +# else +# include <mach_apicdef.h> static inline int hard_smp_processor_id(void) { /* we don't want to mark this access volatile - bad code generation */ - return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID)); + return GET_APIC_ID(*(u32 *)(APIC_BASE + APIC_ID)); } -#endif /* APIC_DEFINITION */ +# endif /* APIC_DEFINITION */ #else /* CONFIG_X86_LOCAL_APIC */ -#ifndef CONFIG_SMP -#define hard_smp_processor_id() 0 -#endif +# ifndef CONFIG_SMP +# define hard_smp_processor_id() 0 +# endif #endif /* CONFIG_X86_LOCAL_APIC */ -extern u8 apicid_2_node[]; - -#ifdef CONFIG_X86_LOCAL_APIC -static __inline int logical_smp_processor_id(void) -{ - /* we don't want to mark this access volatile - bad code generation */ - return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR)); -} -#endif -#endif - +#endif /* !ASSEMBLY */ #endif diff --git a/include/asm-x86/smp_64.h b/include/asm-x86/smp_64.h index ab612b0ff270..e0a75519ad21 100644 --- a/include/asm-x86/smp_64.h +++ b/include/asm-x86/smp_64.h @@ -1,130 +1,101 @@ #ifndef __ASM_SMP_H #define __ASM_SMP_H -/* - * We need the APIC definitions automatically as part of 'smp.h' - */ -#include <linux/threads.h> #include <linux/cpumask.h> -#include <linux/bitops.h> #include <linux/init.h> -extern int disable_apic; -#include <asm/mpspec.h> +/* + * We need the APIC definitions automatically as part of 'smp.h' + */ #include <asm/apic.h> #include <asm/io_apic.h> -#include <asm/thread_info.h> - -#ifdef CONFIG_SMP - +#include <asm/mpspec.h> #include <asm/pda.h> +#include <asm/thread_info.h> -struct pt_regs; - -extern cpumask_t cpu_present_mask; -extern cpumask_t cpu_possible_map; -extern cpumask_t cpu_online_map; extern cpumask_t cpu_callout_map; extern cpumask_t cpu_initialized; -/* - * Private routines/data - */ - +extern int smp_num_siblings; +extern unsigned int num_processors; + extern void smp_alloc_memory(void); -extern volatile unsigned long smp_invalidate_needed; extern void lock_ipi_call_lock(void); extern void unlock_ipi_call_lock(void); -extern int smp_num_siblings; -extern void smp_send_reschedule(int cpu); + extern int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info, int wait); -/* - * cpu_sibling_map and cpu_core_map now live - * in the per cpu area - * - * extern cpumask_t cpu_sibling_map[NR_CPUS]; - * extern cpumask_t cpu_core_map[NR_CPUS]; - */ +extern u16 __initdata x86_cpu_to_apicid_init[]; +extern u16 __initdata x86_bios_cpu_apicid_init[]; +extern void *x86_cpu_to_apicid_early_ptr; +extern void *x86_bios_cpu_apicid_early_ptr; + DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); DECLARE_PER_CPU(cpumask_t, cpu_core_map); -DECLARE_PER_CPU(u8, cpu_llc_id); - -#define SMP_TRAMPOLINE_BASE 0x6000 - -/* - * On x86 all CPUs are mapped 1:1 to the APIC space. - * This simplifies scheduling and IPI sending and - * compresses data structures. - */ +DECLARE_PER_CPU(u16, cpu_llc_id); +DECLARE_PER_CPU(u16, x86_cpu_to_apicid); +DECLARE_PER_CPU(u16, x86_bios_cpu_apicid); -static inline int num_booting_cpus(void) +static inline int cpu_present_to_apicid(int mps_cpu) { - return cpus_weight(cpu_callout_map); + if (cpu_present(mps_cpu)) + return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); + else + return BAD_APICID; } -#define raw_smp_processor_id() read_pda(cpunumber) +#ifdef CONFIG_SMP + +#define SMP_TRAMPOLINE_BASE 0x6000 extern int __cpu_disable(void); extern void __cpu_die(unsigned int cpu); extern void prefill_possible_map(void); -extern unsigned num_processors; extern unsigned __cpuinitdata disabled_cpus; -#define NO_PROC_ID 0xFF /* No processor magic marker */ - -#endif /* CONFIG_SMP */ +#define raw_smp_processor_id() read_pda(cpunumber) +#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) -#define safe_smp_processor_id() smp_processor_id() - -static inline int hard_smp_processor_id(void) -{ - /* we don't want to mark this access volatile - bad code generation */ - return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID)); -} +#define stack_smp_processor_id() \ + ({ \ + struct thread_info *ti; \ + __asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK)); \ + ti->cpu; \ +}) /* - * Some lowlevel functions might want to know about - * the real APIC ID <-> CPU # mapping. + * On x86 all CPUs are mapped 1:1 to the APIC space. This simplifies + * scheduling and IPI sending and compresses data structures. */ -extern u8 __initdata x86_cpu_to_apicid_init[]; -extern void *x86_cpu_to_apicid_ptr; -DECLARE_PER_CPU(u8, x86_cpu_to_apicid); /* physical ID */ -extern u8 bios_cpu_apicid[]; - -static inline int cpu_present_to_apicid(int mps_cpu) +static inline int num_booting_cpus(void) { - if (mps_cpu < NR_CPUS) - return (int)bios_cpu_apicid[mps_cpu]; - else - return BAD_APICID; + return cpus_weight(cpu_callout_map); } -#ifndef CONFIG_SMP +extern void smp_send_reschedule(int cpu); + +#else /* CONFIG_SMP */ + +extern unsigned int boot_cpu_id; +#define cpu_physical_id(cpu) boot_cpu_id #define stack_smp_processor_id() 0 -#define cpu_logical_map(x) (x) -#else -#include <asm/thread_info.h> -#define stack_smp_processor_id() \ -({ \ - struct thread_info *ti; \ - __asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK)); \ - ti->cpu; \ -}) -#endif + +#endif /* !CONFIG_SMP */ + +#define safe_smp_processor_id() smp_processor_id() static __inline int logical_smp_processor_id(void) { /* we don't want to mark this access volatile - bad code generation */ - return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR)); + return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR)); +} + +static inline int hard_smp_processor_id(void) +{ + /* we don't want to mark this access volatile - bad code generation */ + return GET_APIC_ID(*(u32 *)(APIC_BASE + APIC_ID)); } -#ifdef CONFIG_SMP -#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) -#else -extern unsigned int boot_cpu_id; -#define cpu_physical_id(cpu) boot_cpu_id -#endif /* !CONFIG_SMP */ #endif diff --git a/include/asm-x86/sparsemem.h b/include/asm-x86/sparsemem.h index 3f203b1d9ee8..fa58cd55411a 100644 --- a/include/asm-x86/sparsemem.h +++ b/include/asm-x86/sparsemem.h @@ -1,5 +1,34 @@ +#ifndef _ASM_X86_SPARSEMEM_H +#define _ASM_X86_SPARSEMEM_H + +#ifdef CONFIG_SPARSEMEM +/* + * generic non-linear memory support: + * + * 1) we will not split memory into more chunks than will fit into the flags + * field of the struct page + * + * SECTION_SIZE_BITS 2^n: size of each section + * MAX_PHYSADDR_BITS 2^n: max size of physical address space + * MAX_PHYSMEM_BITS 2^n: how much memory we can have in that space + * + */ + #ifdef CONFIG_X86_32 -# include "sparsemem_32.h" -#else -# include "sparsemem_64.h" +# ifdef CONFIG_X86_PAE +# define SECTION_SIZE_BITS 30 +# define MAX_PHYSADDR_BITS 36 +# define MAX_PHYSMEM_BITS 36 +# else +# define SECTION_SIZE_BITS 26 +# define MAX_PHYSADDR_BITS 32 +# define MAX_PHYSMEM_BITS 32 +# endif +#else /* CONFIG_X86_32 */ +# define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */ +# define MAX_PHYSADDR_BITS 40 +# define MAX_PHYSMEM_BITS 40 +#endif + +#endif /* CONFIG_SPARSEMEM */ #endif diff --git a/include/asm-x86/sparsemem_32.h b/include/asm-x86/sparsemem_32.h deleted file mode 100644 index cfeed990585f..000000000000 --- a/include/asm-x86/sparsemem_32.h +++ /dev/null @@ -1,31 +0,0 @@ -#ifndef _I386_SPARSEMEM_H -#define _I386_SPARSEMEM_H -#ifdef CONFIG_SPARSEMEM - -/* - * generic non-linear memory support: - * - * 1) we will not split memory into more chunks than will fit into the - * flags field of the struct page - */ - -/* - * SECTION_SIZE_BITS 2^N: how big each section will be - * MAX_PHYSADDR_BITS 2^N: how much physical address space we have - * MAX_PHYSMEM_BITS 2^N: how much memory we can have in that space - */ -#ifdef CONFIG_X86_PAE -#define SECTION_SIZE_BITS 30 -#define MAX_PHYSADDR_BITS 36 -#define MAX_PHYSMEM_BITS 36 -#else -#define SECTION_SIZE_BITS 26 -#define MAX_PHYSADDR_BITS 32 -#define MAX_PHYSMEM_BITS 32 -#endif - -/* XXX: FIXME -- wli */ -#define kern_addr_valid(kaddr) (0) - -#endif /* CONFIG_SPARSEMEM */ -#endif /* _I386_SPARSEMEM_H */ diff --git a/include/asm-x86/sparsemem_64.h b/include/asm-x86/sparsemem_64.h deleted file mode 100644 index dabb16714a71..000000000000 --- a/include/asm-x86/sparsemem_64.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef _ASM_X86_64_SPARSEMEM_H -#define _ASM_X86_64_SPARSEMEM_H 1 - -#ifdef CONFIG_SPARSEMEM - -/* - * generic non-linear memory support: - * - * 1) we will not split memory into more chunks than will fit into the flags - * field of the struct page - * - * SECTION_SIZE_BITS 2^n: size of each section - * MAX_PHYSADDR_BITS 2^n: max size of physical address space - * MAX_PHYSMEM_BITS 2^n: how much memory we can have in that space - * - */ - -#define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */ -#define MAX_PHYSADDR_BITS 40 -#define MAX_PHYSMEM_BITS 40 - -extern int early_pfn_to_nid(unsigned long pfn); - -#endif /* CONFIG_SPARSEMEM */ - -#endif /* _ASM_X86_64_SPARSEMEM_H */ diff --git a/include/asm-x86/spinlock.h b/include/asm-x86/spinlock.h index d74d85e71dcb..23804c1890ff 100644 --- a/include/asm-x86/spinlock.h +++ b/include/asm-x86/spinlock.h @@ -1,5 +1,296 @@ +#ifndef _X86_SPINLOCK_H_ +#define _X86_SPINLOCK_H_ + +#include <asm/atomic.h> +#include <asm/rwlock.h> +#include <asm/page.h> +#include <asm/processor.h> +#include <linux/compiler.h> + +/* + * Your basic SMP spinlocks, allowing only a single CPU anywhere + * + * Simple spin lock operations. There are two variants, one clears IRQ's + * on the local processor, one does not. + * + * These are fair FIFO ticket locks, which are currently limited to 256 + * CPUs. + * + * (the type definitions are in asm/spinlock_types.h) + */ + #ifdef CONFIG_X86_32 -# include "spinlock_32.h" +typedef char _slock_t; +# define LOCK_INS_DEC "decb" +# define LOCK_INS_XCH "xchgb" +# define LOCK_INS_MOV "movb" +# define LOCK_INS_CMP "cmpb" +# define LOCK_PTR_REG "a" #else -# include "spinlock_64.h" +typedef int _slock_t; +# define LOCK_INS_DEC "decl" +# define LOCK_INS_XCH "xchgl" +# define LOCK_INS_MOV "movl" +# define LOCK_INS_CMP "cmpl" +# define LOCK_PTR_REG "D" +#endif + +#if defined(CONFIG_X86_32) && \ + (defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)) +/* + * On PPro SMP or if we are using OOSTORE, we use a locked operation to unlock + * (PPro errata 66, 92) + */ +# define UNLOCK_LOCK_PREFIX LOCK_PREFIX +#else +# define UNLOCK_LOCK_PREFIX +#endif + +/* + * Ticket locks are conceptually two parts, one indicating the current head of + * the queue, and the other indicating the current tail. The lock is acquired + * by atomically noting the tail and incrementing it by one (thus adding + * ourself to the queue and noting our position), then waiting until the head + * becomes equal to the the initial value of the tail. + * + * We use an xadd covering *both* parts of the lock, to increment the tail and + * also load the position of the head, which takes care of memory ordering + * issues and should be optimal for the uncontended case. Note the tail must be + * in the high part, because a wide xadd increment of the low part would carry + * up and contaminate the high part. + * + * With fewer than 2^8 possible CPUs, we can use x86's partial registers to + * save some instructions and make the code more elegant. There really isn't + * much between them in performance though, especially as locks are out of line. + */ +#if (NR_CPUS < 256) +static inline int __raw_spin_is_locked(raw_spinlock_t *lock) +{ + int tmp = *(volatile signed int *)(&(lock)->slock); + + return (((tmp >> 8) & 0xff) != (tmp & 0xff)); +} + +static inline int __raw_spin_is_contended(raw_spinlock_t *lock) +{ + int tmp = *(volatile signed int *)(&(lock)->slock); + + return (((tmp >> 8) & 0xff) - (tmp & 0xff)) > 1; +} + +static inline void __raw_spin_lock(raw_spinlock_t *lock) +{ + short inc = 0x0100; + + __asm__ __volatile__ ( + LOCK_PREFIX "xaddw %w0, %1\n" + "1:\t" + "cmpb %h0, %b0\n\t" + "je 2f\n\t" + "rep ; nop\n\t" + "movb %1, %b0\n\t" + /* don't need lfence here, because loads are in-order */ + "jmp 1b\n" + "2:" + :"+Q" (inc), "+m" (lock->slock) + : + :"memory", "cc"); +} + +#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock) + +static inline int __raw_spin_trylock(raw_spinlock_t *lock) +{ + int tmp; + short new; + + asm volatile( + "movw %2,%w0\n\t" + "cmpb %h0,%b0\n\t" + "jne 1f\n\t" + "movw %w0,%w1\n\t" + "incb %h1\n\t" + "lock ; cmpxchgw %w1,%2\n\t" + "1:" + "sete %b1\n\t" + "movzbl %b1,%0\n\t" + :"=&a" (tmp), "=Q" (new), "+m" (lock->slock) + : + : "memory", "cc"); + + return tmp; +} + +static inline void __raw_spin_unlock(raw_spinlock_t *lock) +{ + __asm__ __volatile__( + UNLOCK_LOCK_PREFIX "incb %0" + :"+m" (lock->slock) + : + :"memory", "cc"); +} +#else +static inline int __raw_spin_is_locked(raw_spinlock_t *lock) +{ + int tmp = *(volatile signed int *)(&(lock)->slock); + + return (((tmp >> 16) & 0xffff) != (tmp & 0xffff)); +} + +static inline int __raw_spin_is_contended(raw_spinlock_t *lock) +{ + int tmp = *(volatile signed int *)(&(lock)->slock); + + return (((tmp >> 16) & 0xffff) - (tmp & 0xffff)) > 1; +} + +static inline void __raw_spin_lock(raw_spinlock_t *lock) +{ + int inc = 0x00010000; + int tmp; + + __asm__ __volatile__ ( + "lock ; xaddl %0, %1\n" + "movzwl %w0, %2\n\t" + "shrl $16, %0\n\t" + "1:\t" + "cmpl %0, %2\n\t" + "je 2f\n\t" + "rep ; nop\n\t" + "movzwl %1, %2\n\t" + /* don't need lfence here, because loads are in-order */ + "jmp 1b\n" + "2:" + :"+Q" (inc), "+m" (lock->slock), "=r" (tmp) + : + :"memory", "cc"); +} + +#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock) + +static inline int __raw_spin_trylock(raw_spinlock_t *lock) +{ + int tmp; + int new; + + asm volatile( + "movl %2,%0\n\t" + "movl %0,%1\n\t" + "roll $16, %0\n\t" + "cmpl %0,%1\n\t" + "jne 1f\n\t" + "addl $0x00010000, %1\n\t" + "lock ; cmpxchgl %1,%2\n\t" + "1:" + "sete %b1\n\t" + "movzbl %b1,%0\n\t" + :"=&a" (tmp), "=r" (new), "+m" (lock->slock) + : + : "memory", "cc"); + + return tmp; +} + +static inline void __raw_spin_unlock(raw_spinlock_t *lock) +{ + __asm__ __volatile__( + UNLOCK_LOCK_PREFIX "incw %0" + :"+m" (lock->slock) + : + :"memory", "cc"); +} +#endif + +static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock) +{ + while (__raw_spin_is_locked(lock)) + cpu_relax(); +} + +/* + * Read-write spinlocks, allowing multiple readers + * but only one writer. + * + * NOTE! it is quite common to have readers in interrupts + * but no interrupt writers. For those circumstances we + * can "mix" irq-safe locks - any writer needs to get a + * irq-safe write-lock, but readers can get non-irqsafe + * read-locks. + * + * On x86, we implement read-write locks as a 32-bit counter + * with the high bit (sign) being the "contended" bit. + */ + +/** + * read_can_lock - would read_trylock() succeed? + * @lock: the rwlock in question. + */ +static inline int __raw_read_can_lock(raw_rwlock_t *lock) +{ + return (int)(lock)->lock > 0; +} + +/** + * write_can_lock - would write_trylock() succeed? + * @lock: the rwlock in question. + */ +static inline int __raw_write_can_lock(raw_rwlock_t *lock) +{ + return (lock)->lock == RW_LOCK_BIAS; +} + +static inline void __raw_read_lock(raw_rwlock_t *rw) +{ + asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t" + "jns 1f\n" + "call __read_lock_failed\n\t" + "1:\n" + ::LOCK_PTR_REG (rw) : "memory"); +} + +static inline void __raw_write_lock(raw_rwlock_t *rw) +{ + asm volatile(LOCK_PREFIX " subl %1,(%0)\n\t" + "jz 1f\n" + "call __write_lock_failed\n\t" + "1:\n" + ::LOCK_PTR_REG (rw), "i" (RW_LOCK_BIAS) : "memory"); +} + +static inline int __raw_read_trylock(raw_rwlock_t *lock) +{ + atomic_t *count = (atomic_t *)lock; + + atomic_dec(count); + if (atomic_read(count) >= 0) + return 1; + atomic_inc(count); + return 0; +} + +static inline int __raw_write_trylock(raw_rwlock_t *lock) +{ + atomic_t *count = (atomic_t *)lock; + + if (atomic_sub_and_test(RW_LOCK_BIAS, count)) + return 1; + atomic_add(RW_LOCK_BIAS, count); + return 0; +} + +static inline void __raw_read_unlock(raw_rwlock_t *rw) +{ + asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory"); +} + +static inline void __raw_write_unlock(raw_rwlock_t *rw) +{ + asm volatile(LOCK_PREFIX "addl %1, %0" + : "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory"); +} + +#define _raw_spin_relax(lock) cpu_relax() +#define _raw_read_relax(lock) cpu_relax() +#define _raw_write_relax(lock) cpu_relax() + #endif diff --git a/include/asm-x86/spinlock_32.h b/include/asm-x86/spinlock_32.h deleted file mode 100644 index d3bcebed60ca..000000000000 --- a/include/asm-x86/spinlock_32.h +++ /dev/null @@ -1,221 +0,0 @@ -#ifndef __ASM_SPINLOCK_H -#define __ASM_SPINLOCK_H - -#include <asm/atomic.h> -#include <asm/rwlock.h> -#include <asm/page.h> -#include <asm/processor.h> -#include <linux/compiler.h> - -#ifdef CONFIG_PARAVIRT -#include <asm/paravirt.h> -#else -#define CLI_STRING "cli" -#define STI_STRING "sti" -#define CLI_STI_CLOBBERS -#define CLI_STI_INPUT_ARGS -#endif /* CONFIG_PARAVIRT */ - -/* - * Your basic SMP spinlocks, allowing only a single CPU anywhere - * - * Simple spin lock operations. There are two variants, one clears IRQ's - * on the local processor, one does not. - * - * We make no fairness assumptions. They have a cost. - * - * (the type definitions are in asm/spinlock_types.h) - */ - -static inline int __raw_spin_is_locked(raw_spinlock_t *x) -{ - return *(volatile signed char *)(&(x)->slock) <= 0; -} - -static inline void __raw_spin_lock(raw_spinlock_t *lock) -{ - asm volatile("\n1:\t" - LOCK_PREFIX " ; decb %0\n\t" - "jns 3f\n" - "2:\t" - "rep;nop\n\t" - "cmpb $0,%0\n\t" - "jle 2b\n\t" - "jmp 1b\n" - "3:\n\t" - : "+m" (lock->slock) : : "memory"); -} - -/* - * It is easier for the lock validator if interrupts are not re-enabled - * in the middle of a lock-acquire. This is a performance feature anyway - * so we turn it off: - * - * NOTE: there's an irqs-on section here, which normally would have to be - * irq-traced, but on CONFIG_TRACE_IRQFLAGS we never use this variant. - */ -#ifndef CONFIG_PROVE_LOCKING -static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags) -{ - asm volatile( - "\n1:\t" - LOCK_PREFIX " ; decb %[slock]\n\t" - "jns 5f\n" - "2:\t" - "testl $0x200, %[flags]\n\t" - "jz 4f\n\t" - STI_STRING "\n" - "3:\t" - "rep;nop\n\t" - "cmpb $0, %[slock]\n\t" - "jle 3b\n\t" - CLI_STRING "\n\t" - "jmp 1b\n" - "4:\t" - "rep;nop\n\t" - "cmpb $0, %[slock]\n\t" - "jg 1b\n\t" - "jmp 4b\n" - "5:\n\t" - : [slock] "+m" (lock->slock) - : [flags] "r" (flags) - CLI_STI_INPUT_ARGS - : "memory" CLI_STI_CLOBBERS); -} -#endif - -static inline int __raw_spin_trylock(raw_spinlock_t *lock) -{ - char oldval; - asm volatile( - "xchgb %b0,%1" - :"=q" (oldval), "+m" (lock->slock) - :"0" (0) : "memory"); - return oldval > 0; -} - -/* - * __raw_spin_unlock based on writing $1 to the low byte. - * This method works. Despite all the confusion. - * (except on PPro SMP or if we are using OOSTORE, so we use xchgb there) - * (PPro errata 66, 92) - */ - -#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE) - -static inline void __raw_spin_unlock(raw_spinlock_t *lock) -{ - asm volatile("movb $1,%0" : "+m" (lock->slock) :: "memory"); -} - -#else - -static inline void __raw_spin_unlock(raw_spinlock_t *lock) -{ - char oldval = 1; - - asm volatile("xchgb %b0, %1" - : "=q" (oldval), "+m" (lock->slock) - : "0" (oldval) : "memory"); -} - -#endif - -static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock) -{ - while (__raw_spin_is_locked(lock)) - cpu_relax(); -} - -/* - * Read-write spinlocks, allowing multiple readers - * but only one writer. - * - * NOTE! it is quite common to have readers in interrupts - * but no interrupt writers. For those circumstances we - * can "mix" irq-safe locks - any writer needs to get a - * irq-safe write-lock, but readers can get non-irqsafe - * read-locks. - * - * On x86, we implement read-write locks as a 32-bit counter - * with the high bit (sign) being the "contended" bit. - * - * The inline assembly is non-obvious. Think about it. - * - * Changed to use the same technique as rw semaphores. See - * semaphore.h for details. -ben - * - * the helpers are in arch/i386/kernel/semaphore.c - */ - -/** - * read_can_lock - would read_trylock() succeed? - * @lock: the rwlock in question. - */ -static inline int __raw_read_can_lock(raw_rwlock_t *x) -{ - return (int)(x)->lock > 0; -} - -/** - * write_can_lock - would write_trylock() succeed? - * @lock: the rwlock in question. - */ -static inline int __raw_write_can_lock(raw_rwlock_t *x) -{ - return (x)->lock == RW_LOCK_BIAS; -} - -static inline void __raw_read_lock(raw_rwlock_t *rw) -{ - asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t" - "jns 1f\n" - "call __read_lock_failed\n\t" - "1:\n" - ::"a" (rw) : "memory"); -} - -static inline void __raw_write_lock(raw_rwlock_t *rw) -{ - asm volatile(LOCK_PREFIX " subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" - "jz 1f\n" - "call __write_lock_failed\n\t" - "1:\n" - ::"a" (rw) : "memory"); -} - -static inline int __raw_read_trylock(raw_rwlock_t *lock) -{ - atomic_t *count = (atomic_t *)lock; - atomic_dec(count); - if (atomic_read(count) >= 0) - return 1; - atomic_inc(count); - return 0; -} - -static inline int __raw_write_trylock(raw_rwlock_t *lock) -{ - atomic_t *count = (atomic_t *)lock; - if (atomic_sub_and_test(RW_LOCK_BIAS, count)) - return 1; - atomic_add(RW_LOCK_BIAS, count); - return 0; -} - -static inline void __raw_read_unlock(raw_rwlock_t *rw) -{ - asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory"); -} - -static inline void __raw_write_unlock(raw_rwlock_t *rw) -{ - asm volatile(LOCK_PREFIX "addl $" RW_LOCK_BIAS_STR ", %0" - : "+m" (rw->lock) : : "memory"); -} - -#define _raw_spin_relax(lock) cpu_relax() -#define _raw_read_relax(lock) cpu_relax() -#define _raw_write_relax(lock) cpu_relax() - -#endif /* __ASM_SPINLOCK_H */ diff --git a/include/asm-x86/spinlock_64.h b/include/asm-x86/spinlock_64.h deleted file mode 100644 index 88bf981e73cf..000000000000 --- a/include/asm-x86/spinlock_64.h +++ /dev/null @@ -1,167 +0,0 @@ -#ifndef __ASM_SPINLOCK_H -#define __ASM_SPINLOCK_H - -#include <asm/atomic.h> -#include <asm/rwlock.h> -#include <asm/page.h> -#include <asm/processor.h> - -/* - * Your basic SMP spinlocks, allowing only a single CPU anywhere - * - * Simple spin lock operations. There are two variants, one clears IRQ's - * on the local processor, one does not. - * - * We make no fairness assumptions. They have a cost. - * - * (the type definitions are in asm/spinlock_types.h) - */ - -static inline int __raw_spin_is_locked(raw_spinlock_t *lock) -{ - return *(volatile signed int *)(&(lock)->slock) <= 0; -} - -static inline void __raw_spin_lock(raw_spinlock_t *lock) -{ - asm volatile( - "\n1:\t" - LOCK_PREFIX " ; decl %0\n\t" - "jns 2f\n" - "3:\n" - "rep;nop\n\t" - "cmpl $0,%0\n\t" - "jle 3b\n\t" - "jmp 1b\n" - "2:\t" : "=m" (lock->slock) : : "memory"); -} - -/* - * Same as __raw_spin_lock, but reenable interrupts during spinning. - */ -#ifndef CONFIG_PROVE_LOCKING -static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags) -{ - asm volatile( - "\n1:\t" - LOCK_PREFIX " ; decl %0\n\t" - "jns 5f\n" - "testl $0x200, %1\n\t" /* interrupts were disabled? */ - "jz 4f\n\t" - "sti\n" - "3:\t" - "rep;nop\n\t" - "cmpl $0, %0\n\t" - "jle 3b\n\t" - "cli\n\t" - "jmp 1b\n" - "4:\t" - "rep;nop\n\t" - "cmpl $0, %0\n\t" - "jg 1b\n\t" - "jmp 4b\n" - "5:\n\t" - : "+m" (lock->slock) : "r" ((unsigned)flags) : "memory"); -} -#endif - -static inline int __raw_spin_trylock(raw_spinlock_t *lock) -{ - int oldval; - - asm volatile( - "xchgl %0,%1" - :"=q" (oldval), "=m" (lock->slock) - :"0" (0) : "memory"); - - return oldval > 0; -} - -static inline void __raw_spin_unlock(raw_spinlock_t *lock) -{ - asm volatile("movl $1,%0" :"=m" (lock->slock) :: "memory"); -} - -static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock) -{ - while (__raw_spin_is_locked(lock)) - cpu_relax(); -} - -/* - * Read-write spinlocks, allowing multiple readers - * but only one writer. - * - * NOTE! it is quite common to have readers in interrupts - * but no interrupt writers. For those circumstances we - * can "mix" irq-safe locks - any writer needs to get a - * irq-safe write-lock, but readers can get non-irqsafe - * read-locks. - * - * On x86, we implement read-write locks as a 32-bit counter - * with the high bit (sign) being the "contended" bit. - */ - -static inline int __raw_read_can_lock(raw_rwlock_t *lock) -{ - return (int)(lock)->lock > 0; -} - -static inline int __raw_write_can_lock(raw_rwlock_t *lock) -{ - return (lock)->lock == RW_LOCK_BIAS; -} - -static inline void __raw_read_lock(raw_rwlock_t *rw) -{ - asm volatile(LOCK_PREFIX "subl $1,(%0)\n\t" - "jns 1f\n" - "call __read_lock_failed\n" - "1:\n" - ::"D" (rw), "i" (RW_LOCK_BIAS) : "memory"); -} - -static inline void __raw_write_lock(raw_rwlock_t *rw) -{ - asm volatile(LOCK_PREFIX "subl %1,(%0)\n\t" - "jz 1f\n" - "\tcall __write_lock_failed\n\t" - "1:\n" - ::"D" (rw), "i" (RW_LOCK_BIAS) : "memory"); -} - -static inline int __raw_read_trylock(raw_rwlock_t *lock) -{ - atomic_t *count = (atomic_t *)lock; - atomic_dec(count); - if (atomic_read(count) >= 0) - return 1; - atomic_inc(count); - return 0; -} - -static inline int __raw_write_trylock(raw_rwlock_t *lock) -{ - atomic_t *count = (atomic_t *)lock; - if (atomic_sub_and_test(RW_LOCK_BIAS, count)) - return 1; - atomic_add(RW_LOCK_BIAS, count); - return 0; -} - -static inline void __raw_read_unlock(raw_rwlock_t *rw) -{ - asm volatile(LOCK_PREFIX " ; incl %0" :"=m" (rw->lock) : : "memory"); -} - -static inline void __raw_write_unlock(raw_rwlock_t *rw) -{ - asm volatile(LOCK_PREFIX " ; addl $" RW_LOCK_BIAS_STR ",%0" - : "=m" (rw->lock) : : "memory"); -} - -#define _raw_spin_relax(lock) cpu_relax() -#define _raw_read_relax(lock) cpu_relax() -#define _raw_write_relax(lock) cpu_relax() - -#endif /* __ASM_SPINLOCK_H */ diff --git a/include/asm-x86/spinlock_types.h b/include/asm-x86/spinlock_types.h index 4da9345c1500..9029cf78cf5d 100644 --- a/include/asm-x86/spinlock_types.h +++ b/include/asm-x86/spinlock_types.h @@ -9,7 +9,7 @@ typedef struct { unsigned int slock; } raw_spinlock_t; -#define __RAW_SPIN_LOCK_UNLOCKED { 1 } +#define __RAW_SPIN_LOCK_UNLOCKED { 0 } typedef struct { unsigned int lock; diff --git a/include/asm-x86/stacktrace.h b/include/asm-x86/stacktrace.h index 70dd5bae3235..30f82526a8e2 100644 --- a/include/asm-x86/stacktrace.h +++ b/include/asm-x86/stacktrace.h @@ -9,12 +9,13 @@ struct stacktrace_ops { void (*warning)(void *data, char *msg); /* msg must contain %s for the symbol */ void (*warning_symbol)(void *data, char *msg, unsigned long symbol); - void (*address)(void *data, unsigned long address); + void (*address)(void *data, unsigned long address, int reliable); /* On negative return stop dumping */ int (*stack)(void *data, char *name); }; -void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack, +void dump_trace(struct task_struct *tsk, struct pt_regs *regs, + unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data); #endif diff --git a/include/asm-x86/suspend_32.h b/include/asm-x86/suspend_32.h index a2520732ffd6..1bbda3ad7796 100644 --- a/include/asm-x86/suspend_32.h +++ b/include/asm-x86/suspend_32.h @@ -12,8 +12,8 @@ static inline int arch_prepare_suspend(void) { return 0; } struct saved_context { u16 es, fs, gs, ss; unsigned long cr0, cr2, cr3, cr4; - struct Xgt_desc_struct gdt; - struct Xgt_desc_struct idt; + struct desc_ptr gdt; + struct desc_ptr idt; u16 ldt; u16 tss; unsigned long tr; diff --git a/include/asm-x86/suspend_64.h b/include/asm-x86/suspend_64.h index c505a76bcf6e..2eb92cb81a0d 100644 --- a/include/asm-x86/suspend_64.h +++ b/include/asm-x86/suspend_64.h @@ -15,7 +15,14 @@ arch_prepare_suspend(void) return 0; } -/* Image of the saved processor state. If you touch this, fix acpi/wakeup.S. */ +/* + * Image of the saved processor state, used by the low level ACPI suspend to + * RAM code and by the low level hibernation code. + * + * If you modify it, fix arch/x86/kernel/acpi/wakeup_64.S and make sure that + * __save/__restore_processor_state(), defined in arch/x86/kernel/suspend_64.c, + * still work as required. + */ struct saved_context { struct pt_regs regs; u16 ds, es, fs, gs, ss; @@ -38,8 +45,6 @@ struct saved_context { #define loaddebug(thread,register) \ set_debugreg((thread)->debugreg##register, register) -extern void fix_processor_context(void); - /* routines for saving/restoring kernel state */ extern int acpi_save_state_mem(void); extern char core_restore_code; diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h index 692562b48f2a..ee32ef9367f4 100644 --- a/include/asm-x86/system.h +++ b/include/asm-x86/system.h @@ -1,5 +1,414 @@ +#ifndef _ASM_X86_SYSTEM_H_ +#define _ASM_X86_SYSTEM_H_ + +#include <asm/asm.h> +#include <asm/segment.h> +#include <asm/cpufeature.h> +#include <asm/cmpxchg.h> +#include <asm/nops.h> + +#include <linux/kernel.h> +#include <linux/irqflags.h> + +/* entries in ARCH_DLINFO: */ +#ifdef CONFIG_IA32_EMULATION +# define AT_VECTOR_SIZE_ARCH 2 +#else +# define AT_VECTOR_SIZE_ARCH 1 +#endif + +#ifdef CONFIG_X86_32 + +struct task_struct; /* one of the stranger aspects of C forward declarations */ +extern struct task_struct *FASTCALL(__switch_to(struct task_struct *prev, + struct task_struct *next)); + +/* + * Saving eflags is important. It switches not only IOPL between tasks, + * it also protects other tasks from NT leaking through sysenter etc. + */ +#define switch_to(prev, next, last) do { \ + unsigned long esi, edi; \ + asm volatile("pushfl\n\t" /* Save flags */ \ + "pushl %%ebp\n\t" \ + "movl %%esp,%0\n\t" /* save ESP */ \ + "movl %5,%%esp\n\t" /* restore ESP */ \ + "movl $1f,%1\n\t" /* save EIP */ \ + "pushl %6\n\t" /* restore EIP */ \ + "jmp __switch_to\n" \ + "1:\t" \ + "popl %%ebp\n\t" \ + "popfl" \ + :"=m" (prev->thread.sp), "=m" (prev->thread.ip), \ + "=a" (last), "=S" (esi), "=D" (edi) \ + :"m" (next->thread.sp), "m" (next->thread.ip), \ + "2" (prev), "d" (next)); \ +} while (0) + +/* + * disable hlt during certain critical i/o operations + */ +#define HAVE_DISABLE_HLT +#else +#define __SAVE(reg, offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t" +#define __RESTORE(reg, offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t" + +/* frame pointer must be last for get_wchan */ +#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t" +#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t" + +#define __EXTRA_CLOBBER \ + , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \ + "r12", "r13", "r14", "r15" + +/* Save restore flags to clear handle leaking NT */ +#define switch_to(prev, next, last) \ + asm volatile(SAVE_CONTEXT \ + "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ + "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ + "call __switch_to\n\t" \ + ".globl thread_return\n" \ + "thread_return:\n\t" \ + "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ + "movq %P[thread_info](%%rsi),%%r8\n\t" \ + LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ + "movq %%rax,%%rdi\n\t" \ + "jc ret_from_fork\n\t" \ + RESTORE_CONTEXT \ + : "=a" (last) \ + : [next] "S" (next), [prev] "D" (prev), \ + [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ + [ti_flags] "i" (offsetof(struct thread_info, flags)), \ + [tif_fork] "i" (TIF_FORK), \ + [thread_info] "i" (offsetof(struct task_struct, stack)), \ + [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ + : "memory", "cc" __EXTRA_CLOBBER) +#endif + +#ifdef __KERNEL__ +#define _set_base(addr, base) do { unsigned long __pr; \ +__asm__ __volatile__ ("movw %%dx,%1\n\t" \ + "rorl $16,%%edx\n\t" \ + "movb %%dl,%2\n\t" \ + "movb %%dh,%3" \ + :"=&d" (__pr) \ + :"m" (*((addr)+2)), \ + "m" (*((addr)+4)), \ + "m" (*((addr)+7)), \ + "0" (base) \ + ); } while (0) + +#define _set_limit(addr, limit) do { unsigned long __lr; \ +__asm__ __volatile__ ("movw %%dx,%1\n\t" \ + "rorl $16,%%edx\n\t" \ + "movb %2,%%dh\n\t" \ + "andb $0xf0,%%dh\n\t" \ + "orb %%dh,%%dl\n\t" \ + "movb %%dl,%2" \ + :"=&d" (__lr) \ + :"m" (*(addr)), \ + "m" (*((addr)+6)), \ + "0" (limit) \ + ); } while (0) + +#define set_base(ldt, base) _set_base(((char *)&(ldt)) , (base)) +#define set_limit(ldt, limit) _set_limit(((char *)&(ldt)) , ((limit)-1)) + +extern void load_gs_index(unsigned); + +/* + * Load a segment. Fall back on loading the zero + * segment if something goes wrong.. + */ +#define loadsegment(seg, value) \ + asm volatile("\n" \ + "1:\t" \ + "movl %k0,%%" #seg "\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3:\t" \ + "movl %k1, %%" #seg "\n\t" \ + "jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n\t" \ + _ASM_ALIGN "\n\t" \ + _ASM_PTR " 1b,3b\n" \ + ".previous" \ + : :"r" (value), "r" (0)) + + +/* + * Save a segment register away + */ +#define savesegment(seg, value) \ + asm volatile("mov %%" #seg ",%0":"=rm" (value)) + +static inline unsigned long get_limit(unsigned long segment) +{ + unsigned long __limit; + __asm__("lsll %1,%0" + :"=r" (__limit):"r" (segment)); + return __limit+1; +} + +static inline void native_clts(void) +{ + asm volatile ("clts"); +} + +/* + * Volatile isn't enough to prevent the compiler from reordering the + * read/write functions for the control registers and messing everything up. + * A memory clobber would solve the problem, but would prevent reordering of + * all loads stores around it, which can hurt performance. Solution is to + * use a variable and mimic reads and writes to it to enforce serialization + */ +static unsigned long __force_order; + +static inline unsigned long native_read_cr0(void) +{ + unsigned long val; + asm volatile("mov %%cr0,%0\n\t" :"=r" (val), "=m" (__force_order)); + return val; +} + +static inline void native_write_cr0(unsigned long val) +{ + asm volatile("mov %0,%%cr0": :"r" (val), "m" (__force_order)); +} + +static inline unsigned long native_read_cr2(void) +{ + unsigned long val; + asm volatile("mov %%cr2,%0\n\t" :"=r" (val), "=m" (__force_order)); + return val; +} + +static inline void native_write_cr2(unsigned long val) +{ + asm volatile("mov %0,%%cr2": :"r" (val), "m" (__force_order)); +} + +static inline unsigned long native_read_cr3(void) +{ + unsigned long val; + asm volatile("mov %%cr3,%0\n\t" :"=r" (val), "=m" (__force_order)); + return val; +} + +static inline void native_write_cr3(unsigned long val) +{ + asm volatile("mov %0,%%cr3": :"r" (val), "m" (__force_order)); +} + +static inline unsigned long native_read_cr4(void) +{ + unsigned long val; + asm volatile("mov %%cr4,%0\n\t" :"=r" (val), "=m" (__force_order)); + return val; +} + +static inline unsigned long native_read_cr4_safe(void) +{ + unsigned long val; + /* This could fault if %cr4 does not exist. In x86_64, a cr4 always + * exists, so it will never fail. */ +#ifdef CONFIG_X86_32 + asm volatile("1: mov %%cr4, %0 \n" + "2: \n" + ".section __ex_table,\"a\" \n" + ".long 1b,2b \n" + ".previous \n" + : "=r" (val), "=m" (__force_order) : "0" (0)); +#else + val = native_read_cr4(); +#endif + return val; +} + +static inline void native_write_cr4(unsigned long val) +{ + asm volatile("mov %0,%%cr4": :"r" (val), "m" (__force_order)); +} + +#ifdef CONFIG_X86_64 +static inline unsigned long native_read_cr8(void) +{ + unsigned long cr8; + asm volatile("movq %%cr8,%0" : "=r" (cr8)); + return cr8; +} + +static inline void native_write_cr8(unsigned long val) +{ + asm volatile("movq %0,%%cr8" :: "r" (val) : "memory"); +} +#endif + +static inline void native_wbinvd(void) +{ + asm volatile("wbinvd": : :"memory"); +} +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else +#define read_cr0() (native_read_cr0()) +#define write_cr0(x) (native_write_cr0(x)) +#define read_cr2() (native_read_cr2()) +#define write_cr2(x) (native_write_cr2(x)) +#define read_cr3() (native_read_cr3()) +#define write_cr3(x) (native_write_cr3(x)) +#define read_cr4() (native_read_cr4()) +#define read_cr4_safe() (native_read_cr4_safe()) +#define write_cr4(x) (native_write_cr4(x)) +#define wbinvd() (native_wbinvd()) +#ifdef CONFIG_X86_64 +#define read_cr8() (native_read_cr8()) +#define write_cr8(x) (native_write_cr8(x)) +#endif + +/* Clear the 'TS' bit */ +#define clts() (native_clts()) + +#endif/* CONFIG_PARAVIRT */ + +#define stts() write_cr0(8 | read_cr0()) + +#endif /* __KERNEL__ */ + +static inline void clflush(void *__p) +{ + asm volatile("clflush %0" : "+m" (*(char __force *)__p)); +} + +#define nop() __asm__ __volatile__ ("nop") + +void disable_hlt(void); +void enable_hlt(void); + +extern int es7000_plat; +void cpu_idle_wait(void); + +extern unsigned long arch_align_stack(unsigned long sp); +extern void free_init_pages(char *what, unsigned long begin, unsigned long end); + +void default_idle(void); + +/* + * Force strict CPU ordering. + * And yes, this is required on UP too when we're talking + * to devices. + */ #ifdef CONFIG_X86_32 -# include "system_32.h" +/* + * For now, "wmb()" doesn't actually do anything, as all + * Intel CPU's follow what Intel calls a *Processor Order*, + * in which all writes are seen in the program order even + * outside the CPU. + * + * I expect future Intel CPU's to have a weaker ordering, + * but I'd also expect them to finally get their act together + * and add some real memory barriers if so. + * + * Some non intel clones support out of order store. wmb() ceases to be a + * nop for these. + */ +#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) +#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) +#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) #else -# include "system_64.h" +#define mb() asm volatile("mfence":::"memory") +#define rmb() asm volatile("lfence":::"memory") +#define wmb() asm volatile("sfence" ::: "memory") +#endif + +/** + * read_barrier_depends - Flush all pending reads that subsequents reads + * depend on. + * + * No data-dependent reads from memory-like regions are ever reordered + * over this barrier. All reads preceding this primitive are guaranteed + * to access memory (but not necessarily other CPUs' caches) before any + * reads following this primitive that depend on the data return by + * any of the preceding reads. This primitive is much lighter weight than + * rmb() on most CPUs, and is never heavier weight than is + * rmb(). + * + * These ordering constraints are respected by both the local CPU + * and the compiler. + * + * Ordering is not guaranteed by anything other than these primitives, + * not even by data dependencies. See the documentation for + * memory_barrier() for examples and URLs to more information. + * + * For example, the following code would force ordering (the initial + * value of "a" is zero, "b" is one, and "p" is "&a"): + * + * <programlisting> + * CPU 0 CPU 1 + * + * b = 2; + * memory_barrier(); + * p = &b; q = p; + * read_barrier_depends(); + * d = *q; + * </programlisting> + * + * because the read of "*q" depends on the read of "p" and these + * two reads are separated by a read_barrier_depends(). However, + * the following code, with the same initial values for "a" and "b": + * + * <programlisting> + * CPU 0 CPU 1 + * + * a = 2; + * memory_barrier(); + * b = 3; y = b; + * read_barrier_depends(); + * x = a; + * </programlisting> + * + * does not enforce ordering, since there is no data dependency between + * the read of "a" and the read of "b". Therefore, on some CPUs, such + * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() + * in cases like this where there are no data dependencies. + **/ + +#define read_barrier_depends() do { } while (0) + +#ifdef CONFIG_SMP +#define smp_mb() mb() +#ifdef CONFIG_X86_PPRO_FENCE +# define smp_rmb() rmb() +#else +# define smp_rmb() barrier() +#endif +#ifdef CONFIG_X86_OOSTORE +# define smp_wmb() wmb() +#else +# define smp_wmb() barrier() +#endif +#define smp_read_barrier_depends() read_barrier_depends() +#define set_mb(var, value) do { (void) xchg(&var, value); } while (0) +#else +#define smp_mb() barrier() +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#define smp_read_barrier_depends() do { } while (0) +#define set_mb(var, value) do { var = value; barrier(); } while (0) +#endif + +/* + * Stop RDTSC speculation. This is needed when you need to use RDTSC + * (or get_cycles or vread that possibly accesses the TSC) in a defined + * code region. + * + * (Could use an alternative three way for this if there was one.) + */ +static inline void rdtsc_barrier(void) +{ + alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); + alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); +} + #endif diff --git a/include/asm-x86/system_32.h b/include/asm-x86/system_32.h deleted file mode 100644 index ef8468883bac..000000000000 --- a/include/asm-x86/system_32.h +++ /dev/null @@ -1,320 +0,0 @@ -#ifndef __ASM_SYSTEM_H -#define __ASM_SYSTEM_H - -#include <linux/kernel.h> -#include <asm/segment.h> -#include <asm/cpufeature.h> -#include <asm/cmpxchg.h> - -#ifdef __KERNEL__ -#define AT_VECTOR_SIZE_ARCH 2 /* entries in ARCH_DLINFO */ - -struct task_struct; /* one of the stranger aspects of C forward declarations.. */ -extern struct task_struct * FASTCALL(__switch_to(struct task_struct *prev, struct task_struct *next)); - -/* - * Saving eflags is important. It switches not only IOPL between tasks, - * it also protects other tasks from NT leaking through sysenter etc. - */ -#define switch_to(prev,next,last) do { \ - unsigned long esi,edi; \ - asm volatile("pushfl\n\t" /* Save flags */ \ - "pushl %%ebp\n\t" \ - "movl %%esp,%0\n\t" /* save ESP */ \ - "movl %5,%%esp\n\t" /* restore ESP */ \ - "movl $1f,%1\n\t" /* save EIP */ \ - "pushl %6\n\t" /* restore EIP */ \ - "jmp __switch_to\n" \ - "1:\t" \ - "popl %%ebp\n\t" \ - "popfl" \ - :"=m" (prev->thread.esp),"=m" (prev->thread.eip), \ - "=a" (last),"=S" (esi),"=D" (edi) \ - :"m" (next->thread.esp),"m" (next->thread.eip), \ - "2" (prev), "d" (next)); \ -} while (0) - -#define _set_base(addr,base) do { unsigned long __pr; \ -__asm__ __volatile__ ("movw %%dx,%1\n\t" \ - "rorl $16,%%edx\n\t" \ - "movb %%dl,%2\n\t" \ - "movb %%dh,%3" \ - :"=&d" (__pr) \ - :"m" (*((addr)+2)), \ - "m" (*((addr)+4)), \ - "m" (*((addr)+7)), \ - "0" (base) \ - ); } while(0) - -#define _set_limit(addr,limit) do { unsigned long __lr; \ -__asm__ __volatile__ ("movw %%dx,%1\n\t" \ - "rorl $16,%%edx\n\t" \ - "movb %2,%%dh\n\t" \ - "andb $0xf0,%%dh\n\t" \ - "orb %%dh,%%dl\n\t" \ - "movb %%dl,%2" \ - :"=&d" (__lr) \ - :"m" (*(addr)), \ - "m" (*((addr)+6)), \ - "0" (limit) \ - ); } while(0) - -#define set_base(ldt,base) _set_base( ((char *)&(ldt)) , (base) ) -#define set_limit(ldt,limit) _set_limit( ((char *)&(ldt)) , ((limit)-1) ) - -/* - * Load a segment. Fall back on loading the zero - * segment if something goes wrong.. - */ -#define loadsegment(seg,value) \ - asm volatile("\n" \ - "1:\t" \ - "mov %0,%%" #seg "\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3:\t" \ - "pushl $0\n\t" \ - "popl %%" #seg "\n\t" \ - "jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n\t" \ - ".align 4\n\t" \ - ".long 1b,3b\n" \ - ".previous" \ - : :"rm" (value)) - -/* - * Save a segment register away - */ -#define savesegment(seg, value) \ - asm volatile("mov %%" #seg ",%0":"=rm" (value)) - - -static inline void native_clts(void) -{ - asm volatile ("clts"); -} - -static inline unsigned long native_read_cr0(void) -{ - unsigned long val; - asm volatile("movl %%cr0,%0\n\t" :"=r" (val)); - return val; -} - -static inline void native_write_cr0(unsigned long val) -{ - asm volatile("movl %0,%%cr0": :"r" (val)); -} - -static inline unsigned long native_read_cr2(void) -{ - unsigned long val; - asm volatile("movl %%cr2,%0\n\t" :"=r" (val)); - return val; -} - -static inline void native_write_cr2(unsigned long val) -{ - asm volatile("movl %0,%%cr2": :"r" (val)); -} - -static inline unsigned long native_read_cr3(void) -{ - unsigned long val; - asm volatile("movl %%cr3,%0\n\t" :"=r" (val)); - return val; -} - -static inline void native_write_cr3(unsigned long val) -{ - asm volatile("movl %0,%%cr3": :"r" (val)); -} - -static inline unsigned long native_read_cr4(void) -{ - unsigned long val; - asm volatile("movl %%cr4,%0\n\t" :"=r" (val)); - return val; -} - -static inline unsigned long native_read_cr4_safe(void) -{ - unsigned long val; - /* This could fault if %cr4 does not exist */ - asm volatile("1: movl %%cr4, %0 \n" - "2: \n" - ".section __ex_table,\"a\" \n" - ".long 1b,2b \n" - ".previous \n" - : "=r" (val): "0" (0)); - return val; -} - -static inline void native_write_cr4(unsigned long val) -{ - asm volatile("movl %0,%%cr4": :"r" (val)); -} - -static inline void native_wbinvd(void) -{ - asm volatile("wbinvd": : :"memory"); -} - -static inline void clflush(volatile void *__p) -{ - asm volatile("clflush %0" : "+m" (*(char __force *)__p)); -} - -#ifdef CONFIG_PARAVIRT -#include <asm/paravirt.h> -#else -#define read_cr0() (native_read_cr0()) -#define write_cr0(x) (native_write_cr0(x)) -#define read_cr2() (native_read_cr2()) -#define write_cr2(x) (native_write_cr2(x)) -#define read_cr3() (native_read_cr3()) -#define write_cr3(x) (native_write_cr3(x)) -#define read_cr4() (native_read_cr4()) -#define read_cr4_safe() (native_read_cr4_safe()) -#define write_cr4(x) (native_write_cr4(x)) -#define wbinvd() (native_wbinvd()) - -/* Clear the 'TS' bit */ -#define clts() (native_clts()) - -#endif/* CONFIG_PARAVIRT */ - -/* Set the 'TS' bit */ -#define stts() write_cr0(8 | read_cr0()) - -#endif /* __KERNEL__ */ - -static inline unsigned long get_limit(unsigned long segment) -{ - unsigned long __limit; - __asm__("lsll %1,%0" - :"=r" (__limit):"r" (segment)); - return __limit+1; -} - -#define nop() __asm__ __volatile__ ("nop") - -/* - * Force strict CPU ordering. - * And yes, this is required on UP too when we're talking - * to devices. - * - * For now, "wmb()" doesn't actually do anything, as all - * Intel CPU's follow what Intel calls a *Processor Order*, - * in which all writes are seen in the program order even - * outside the CPU. - * - * I expect future Intel CPU's to have a weaker ordering, - * but I'd also expect them to finally get their act together - * and add some real memory barriers if so. - * - * Some non intel clones support out of order store. wmb() ceases to be a - * nop for these. - */ - - -#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) -#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) -#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) - -/** - * read_barrier_depends - Flush all pending reads that subsequents reads - * depend on. - * - * No data-dependent reads from memory-like regions are ever reordered - * over this barrier. All reads preceding this primitive are guaranteed - * to access memory (but not necessarily other CPUs' caches) before any - * reads following this primitive that depend on the data return by - * any of the preceding reads. This primitive is much lighter weight than - * rmb() on most CPUs, and is never heavier weight than is - * rmb(). - * - * These ordering constraints are respected by both the local CPU - * and the compiler. - * - * Ordering is not guaranteed by anything other than these primitives, - * not even by data dependencies. See the documentation for - * memory_barrier() for examples and URLs to more information. - * - * For example, the following code would force ordering (the initial - * value of "a" is zero, "b" is one, and "p" is "&a"): - * - * <programlisting> - * CPU 0 CPU 1 - * - * b = 2; - * memory_barrier(); - * p = &b; q = p; - * read_barrier_depends(); - * d = *q; - * </programlisting> - * - * because the read of "*q" depends on the read of "p" and these - * two reads are separated by a read_barrier_depends(). However, - * the following code, with the same initial values for "a" and "b": - * - * <programlisting> - * CPU 0 CPU 1 - * - * a = 2; - * memory_barrier(); - * b = 3; y = b; - * read_barrier_depends(); - * x = a; - * </programlisting> - * - * does not enforce ordering, since there is no data dependency between - * the read of "a" and the read of "b". Therefore, on some CPUs, such - * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() - * in cases like this where there are no data dependencies. - **/ - -#define read_barrier_depends() do { } while(0) - -#ifdef CONFIG_SMP -#define smp_mb() mb() -#ifdef CONFIG_X86_PPRO_FENCE -# define smp_rmb() rmb() -#else -# define smp_rmb() barrier() -#endif -#ifdef CONFIG_X86_OOSTORE -# define smp_wmb() wmb() -#else -# define smp_wmb() barrier() -#endif -#define smp_read_barrier_depends() read_barrier_depends() -#define set_mb(var, value) do { (void) xchg(&var, value); } while (0) -#else -#define smp_mb() barrier() -#define smp_rmb() barrier() -#define smp_wmb() barrier() -#define smp_read_barrier_depends() do { } while(0) -#define set_mb(var, value) do { var = value; barrier(); } while (0) -#endif - -#include <linux/irqflags.h> - -/* - * disable hlt during certain critical i/o operations - */ -#define HAVE_DISABLE_HLT -void disable_hlt(void); -void enable_hlt(void); - -extern int es7000_plat; -void cpu_idle_wait(void); - -extern unsigned long arch_align_stack(unsigned long sp); -extern void free_init_pages(char *what, unsigned long begin, unsigned long end); - -void default_idle(void); -void __show_registers(struct pt_regs *, int all); - -#endif diff --git a/include/asm-x86/system_64.h b/include/asm-x86/system_64.h index 6e9e4841a2da..97fa251ccb2b 100644 --- a/include/asm-x86/system_64.h +++ b/include/asm-x86/system_64.h @@ -1,126 +1,9 @@ #ifndef __ASM_SYSTEM_H #define __ASM_SYSTEM_H -#include <linux/kernel.h> #include <asm/segment.h> #include <asm/cmpxchg.h> -#ifdef __KERNEL__ - -/* entries in ARCH_DLINFO: */ -#ifdef CONFIG_IA32_EMULATION -# define AT_VECTOR_SIZE_ARCH 2 -#else -# define AT_VECTOR_SIZE_ARCH 1 -#endif - -#define __SAVE(reg,offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t" -#define __RESTORE(reg,offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t" - -/* frame pointer must be last for get_wchan */ -#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t" -#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t" - -#define __EXTRA_CLOBBER \ - ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" - -/* Save restore flags to clear handle leaking NT */ -#define switch_to(prev,next,last) \ - asm volatile(SAVE_CONTEXT \ - "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ - "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ - "call __switch_to\n\t" \ - ".globl thread_return\n" \ - "thread_return:\n\t" \ - "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ - "movq %P[thread_info](%%rsi),%%r8\n\t" \ - LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ - "movq %%rax,%%rdi\n\t" \ - "jc ret_from_fork\n\t" \ - RESTORE_CONTEXT \ - : "=a" (last) \ - : [next] "S" (next), [prev] "D" (prev), \ - [threadrsp] "i" (offsetof(struct task_struct, thread.rsp)), \ - [ti_flags] "i" (offsetof(struct thread_info, flags)),\ - [tif_fork] "i" (TIF_FORK), \ - [thread_info] "i" (offsetof(struct task_struct, stack)), \ - [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ - : "memory", "cc" __EXTRA_CLOBBER) - -extern void load_gs_index(unsigned); - -/* - * Load a segment. Fall back on loading the zero - * segment if something goes wrong.. - */ -#define loadsegment(seg,value) \ - asm volatile("\n" \ - "1:\t" \ - "movl %k0,%%" #seg "\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3:\t" \ - "movl %1,%%" #seg "\n\t" \ - "jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n\t" \ - ".align 8\n\t" \ - ".quad 1b,3b\n" \ - ".previous" \ - : :"r" (value), "r" (0)) - -/* - * Clear and set 'TS' bit respectively - */ -#define clts() __asm__ __volatile__ ("clts") - -static inline unsigned long read_cr0(void) -{ - unsigned long cr0; - asm volatile("movq %%cr0,%0" : "=r" (cr0)); - return cr0; -} - -static inline void write_cr0(unsigned long val) -{ - asm volatile("movq %0,%%cr0" :: "r" (val)); -} - -static inline unsigned long read_cr2(void) -{ - unsigned long cr2; - asm volatile("movq %%cr2,%0" : "=r" (cr2)); - return cr2; -} - -static inline void write_cr2(unsigned long val) -{ - asm volatile("movq %0,%%cr2" :: "r" (val)); -} - -static inline unsigned long read_cr3(void) -{ - unsigned long cr3; - asm volatile("movq %%cr3,%0" : "=r" (cr3)); - return cr3; -} - -static inline void write_cr3(unsigned long val) -{ - asm volatile("movq %0,%%cr3" :: "r" (val) : "memory"); -} - -static inline unsigned long read_cr4(void) -{ - unsigned long cr4; - asm volatile("movq %%cr4,%0" : "=r" (cr4)); - return cr4; -} - -static inline void write_cr4(unsigned long val) -{ - asm volatile("movq %0,%%cr4" :: "r" (val) : "memory"); -} static inline unsigned long read_cr8(void) { @@ -134,52 +17,6 @@ static inline void write_cr8(unsigned long val) asm volatile("movq %0,%%cr8" :: "r" (val) : "memory"); } -#define stts() write_cr0(8 | read_cr0()) - -#define wbinvd() \ - __asm__ __volatile__ ("wbinvd": : :"memory") - -#endif /* __KERNEL__ */ - -static inline void clflush(volatile void *__p) -{ - asm volatile("clflush %0" : "+m" (*(char __force *)__p)); -} - -#define nop() __asm__ __volatile__ ("nop") - -#ifdef CONFIG_SMP -#define smp_mb() mb() -#define smp_rmb() barrier() -#define smp_wmb() barrier() -#define smp_read_barrier_depends() do {} while(0) -#else -#define smp_mb() barrier() -#define smp_rmb() barrier() -#define smp_wmb() barrier() -#define smp_read_barrier_depends() do {} while(0) -#endif - - -/* - * Force strict CPU ordering. - * And yes, this is required on UP too when we're talking - * to devices. - */ -#define mb() asm volatile("mfence":::"memory") -#define rmb() asm volatile("lfence":::"memory") -#define wmb() asm volatile("sfence" ::: "memory") - -#define read_barrier_depends() do {} while(0) -#define set_mb(var, value) do { (void) xchg(&var, value); } while (0) - -#define warn_if_not_ulong(x) do { unsigned long foo; (void) (&(x) == &foo); } while (0) - #include <linux/irqflags.h> -void cpu_idle_wait(void); - -extern unsigned long arch_align_stack(unsigned long sp); -extern void free_init_pages(char *what, unsigned long begin, unsigned long end); - #endif diff --git a/include/asm-x86/thread_info_32.h b/include/asm-x86/thread_info_32.h index a516e9192f11..5bd508260ffb 100644 --- a/include/asm-x86/thread_info_32.h +++ b/include/asm-x86/thread_info_32.h @@ -138,6 +138,10 @@ static inline struct thread_info *current_thread_info(void) #define TIF_IO_BITMAP 18 /* uses I/O bitmap */ #define TIF_FREEZE 19 /* is freezing for suspend */ #define TIF_NOTSC 20 /* TSC is not accessible in userland */ +#define TIF_FORCED_TF 21 /* true if TF in eflags artificially */ +#define TIF_DEBUGCTLMSR 22 /* uses thread_struct.debugctlmsr */ +#define TIF_DS_AREA_MSR 23 /* uses thread_struct.ds_area_msr */ +#define TIF_BTS_TRACE_TS 24 /* record scheduling event timestamps */ #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) #define _TIF_SIGPENDING (1<<TIF_SIGPENDING) @@ -153,6 +157,10 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_IO_BITMAP (1<<TIF_IO_BITMAP) #define _TIF_FREEZE (1<<TIF_FREEZE) #define _TIF_NOTSC (1<<TIF_NOTSC) +#define _TIF_FORCED_TF (1<<TIF_FORCED_TF) +#define _TIF_DEBUGCTLMSR (1<<TIF_DEBUGCTLMSR) +#define _TIF_DS_AREA_MSR (1<<TIF_DS_AREA_MSR) +#define _TIF_BTS_TRACE_TS (1<<TIF_BTS_TRACE_TS) /* work to do on interrupt/exception return */ #define _TIF_WORK_MASK \ @@ -162,8 +170,12 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP) /* flags to check in __switch_to() */ -#define _TIF_WORK_CTXSW_NEXT (_TIF_IO_BITMAP | _TIF_NOTSC | _TIF_DEBUG) -#define _TIF_WORK_CTXSW_PREV (_TIF_IO_BITMAP | _TIF_NOTSC) +#define _TIF_WORK_CTXSW \ + (_TIF_IO_BITMAP | _TIF_NOTSC | _TIF_DEBUGCTLMSR | \ + _TIF_DS_AREA_MSR | _TIF_BTS_TRACE_TS) +#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW +#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW | _TIF_DEBUG) + /* * Thread-synchronous status. diff --git a/include/asm-x86/thread_info_64.h b/include/asm-x86/thread_info_64.h index 7f6ee68f0002..9b531ea015a8 100644 --- a/include/asm-x86/thread_info_64.h +++ b/include/asm-x86/thread_info_64.h @@ -21,7 +21,7 @@ #ifndef __ASSEMBLY__ struct task_struct; struct exec_domain; -#include <asm/mmsegment.h> +#include <asm/processor.h> struct thread_info { struct task_struct *task; /* main task structure */ @@ -33,6 +33,9 @@ struct thread_info { mm_segment_t addr_limit; struct restart_block restart_block; +#ifdef CONFIG_IA32_EMULATION + void __user *sysenter_return; +#endif }; #endif @@ -74,20 +77,14 @@ static inline struct thread_info *stack_thread_info(void) /* thread information allocation */ #ifdef CONFIG_DEBUG_STACK_USAGE -#define alloc_thread_info(tsk) \ - ({ \ - struct thread_info *ret; \ - \ - ret = ((struct thread_info *) __get_free_pages(GFP_KERNEL,THREAD_ORDER)); \ - if (ret) \ - memset(ret, 0, THREAD_SIZE); \ - ret; \ - }) +#define THREAD_FLAGS (GFP_KERNEL | __GFP_ZERO) #else -#define alloc_thread_info(tsk) \ - ((struct thread_info *) __get_free_pages(GFP_KERNEL,THREAD_ORDER)) +#define THREAD_FLAGS GFP_KERNEL #endif +#define alloc_thread_info(tsk) \ + ((struct thread_info *) __get_free_pages(THREAD_FLAGS, THREAD_ORDER)) + #define free_thread_info(ti) free_pages((unsigned long) (ti), THREAD_ORDER) #else /* !__ASSEMBLY__ */ @@ -124,6 +121,10 @@ static inline struct thread_info *stack_thread_info(void) #define TIF_DEBUG 21 /* uses debug registers */ #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ #define TIF_FREEZE 23 /* is freezing for suspend */ +#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ +#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ +#define TIF_DS_AREA_MSR 25 /* uses thread_struct.ds_area_msr */ +#define TIF_BTS_TRACE_TS 26 /* record scheduling event timestamps */ #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) #define _TIF_SIGPENDING (1<<TIF_SIGPENDING) @@ -141,6 +142,10 @@ static inline struct thread_info *stack_thread_info(void) #define _TIF_DEBUG (1<<TIF_DEBUG) #define _TIF_IO_BITMAP (1<<TIF_IO_BITMAP) #define _TIF_FREEZE (1<<TIF_FREEZE) +#define _TIF_FORCED_TF (1<<TIF_FORCED_TF) +#define _TIF_DEBUGCTLMSR (1<<TIF_DEBUGCTLMSR) +#define _TIF_DS_AREA_MSR (1<<TIF_DS_AREA_MSR) +#define _TIF_BTS_TRACE_TS (1<<TIF_BTS_TRACE_TS) /* work to do on interrupt/exception return */ #define _TIF_WORK_MASK \ @@ -152,7 +157,10 @@ static inline struct thread_info *stack_thread_info(void) (_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED) /* flags to check in __switch_to() */ -#define _TIF_WORK_CTXSW (_TIF_DEBUG|_TIF_IO_BITMAP) +#define _TIF_WORK_CTXSW \ + (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS) +#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW +#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) #define PREEMPT_ACTIVE 0x10000000 diff --git a/include/asm-x86/time.h b/include/asm-x86/time.h index eac011366dc2..68779b048a3e 100644 --- a/include/asm-x86/time.h +++ b/include/asm-x86/time.h @@ -1,8 +1,12 @@ -#ifndef _ASMi386_TIME_H -#define _ASMi386_TIME_H +#ifndef _ASMX86_TIME_H +#define _ASMX86_TIME_H +extern void (*late_time_init)(void); +extern void hpet_time_init(void); + +#include <asm/mc146818rtc.h> +#ifdef CONFIG_X86_32 #include <linux/efi.h> -#include "mach_time.h" static inline unsigned long native_get_wallclock(void) { @@ -28,8 +32,20 @@ static inline int native_set_wallclock(unsigned long nowtime) return retval; } -extern void (*late_time_init)(void); -extern void hpet_time_init(void); +#else +extern void native_time_init_hook(void); + +static inline unsigned long native_get_wallclock(void) +{ + return mach_get_cmos_time(); +} + +static inline int native_set_wallclock(unsigned long nowtime) +{ + return mach_set_rtc_mmss(nowtime); +} + +#endif #ifdef CONFIG_PARAVIRT #include <asm/paravirt.h> diff --git a/include/asm-x86/timer.h b/include/asm-x86/timer.h index 0db7e994fb8b..4f6fcb050c11 100644 --- a/include/asm-x86/timer.h +++ b/include/asm-x86/timer.h @@ -2,6 +2,7 @@ #define _ASMi386_TIMER_H #include <linux/init.h> #include <linux/pm.h> +#include <linux/percpu.h> #define TICK_SIZE (tick_nsec / 1000) @@ -16,7 +17,7 @@ extern int recalibrate_cpu_khz(void); #define calculate_cpu_khz() native_calculate_cpu_khz() #endif -/* Accellerators for sched_clock() +/* Accelerators for sched_clock() * convert from cycles(64bits) => nanoseconds (64bits) * basic equation: * ns = cycles / (freq / ns_per_sec) @@ -31,20 +32,32 @@ extern int recalibrate_cpu_khz(void); * And since SC is a constant power of two, we can convert the div * into a shift. * - * We can use khz divisor instead of mhz to keep a better percision, since + * We can use khz divisor instead of mhz to keep a better precision, since * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. * (mathieu.desnoyers@polymtl.ca) * * -johnstul@us.ibm.com "math is hard, lets go shopping!" */ -extern unsigned long cyc2ns_scale __read_mostly; + +DECLARE_PER_CPU(unsigned long, cyc2ns); #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ -static inline unsigned long long cycles_2_ns(unsigned long long cyc) +static inline unsigned long long __cycles_2_ns(unsigned long long cyc) { - return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; + return cyc * per_cpu(cyc2ns, smp_processor_id()) >> CYC2NS_SCALE_FACTOR; } +static inline unsigned long long cycles_2_ns(unsigned long long cyc) +{ + unsigned long long ns; + unsigned long flags; + + local_irq_save(flags); + ns = __cycles_2_ns(cyc); + local_irq_restore(flags); + + return ns; +} #endif diff --git a/include/asm-x86/timex.h b/include/asm-x86/timex.h index 39a21ab030f0..27cfd6c599ba 100644 --- a/include/asm-x86/timex.h +++ b/include/asm-x86/timex.h @@ -7,6 +7,8 @@ #ifdef CONFIG_X86_ELAN # define PIT_TICK_RATE 1189200 /* AMD Elan has different frequency! */ +#elif defined(CONFIG_X86_RDC321X) +# define PIT_TICK_RATE 1041667 /* Underlying HZ for R8610 */ #else # define PIT_TICK_RATE 1193182 /* Underlying HZ */ #endif diff --git a/include/asm-x86/tlbflush.h b/include/asm-x86/tlbflush.h index 9af4cc83a1af..3998709ed637 100644 --- a/include/asm-x86/tlbflush.h +++ b/include/asm-x86/tlbflush.h @@ -1,5 +1,158 @@ +#ifndef _ASM_X86_TLBFLUSH_H +#define _ASM_X86_TLBFLUSH_H + +#include <linux/mm.h> +#include <linux/sched.h> + +#include <asm/processor.h> +#include <asm/system.h> + +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else +#define __flush_tlb() __native_flush_tlb() +#define __flush_tlb_global() __native_flush_tlb_global() +#define __flush_tlb_single(addr) __native_flush_tlb_single(addr) +#endif + +static inline void __native_flush_tlb(void) +{ + write_cr3(read_cr3()); +} + +static inline void __native_flush_tlb_global(void) +{ + unsigned long cr4 = read_cr4(); + + /* clear PGE */ + write_cr4(cr4 & ~X86_CR4_PGE); + /* write old PGE again and flush TLBs */ + write_cr4(cr4); +} + +static inline void __native_flush_tlb_single(unsigned long addr) +{ + __asm__ __volatile__("invlpg (%0)" ::"r" (addr) : "memory"); +} + +static inline void __flush_tlb_all(void) +{ + if (cpu_has_pge) + __flush_tlb_global(); + else + __flush_tlb(); +} + +static inline void __flush_tlb_one(unsigned long addr) +{ + if (cpu_has_invlpg) + __flush_tlb_single(addr); + else + __flush_tlb(); +} + #ifdef CONFIG_X86_32 -# include "tlbflush_32.h" +# define TLB_FLUSH_ALL 0xffffffff #else -# include "tlbflush_64.h" +# define TLB_FLUSH_ALL -1ULL +#endif + +/* + * TLB flushing: + * + * - flush_tlb() flushes the current mm struct TLBs + * - flush_tlb_all() flushes all processes TLBs + * - flush_tlb_mm(mm) flushes the specified mm context TLB's + * - flush_tlb_page(vma, vmaddr) flushes one page + * - flush_tlb_range(vma, start, end) flushes a range of pages + * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages + * - flush_tlb_others(cpumask, mm, va) flushes TLBs on other cpus + * + * ..but the i386 has somewhat limited tlb flushing capabilities, + * and page-granular flushes are available only on i486 and up. + * + * x86-64 can only flush individual pages or full VMs. For a range flush + * we always do the full VM. Might be worth trying if for a small + * range a few INVLPGs in a row are a win. + */ + +#ifndef CONFIG_SMP + +#define flush_tlb() __flush_tlb() +#define flush_tlb_all() __flush_tlb_all() +#define local_flush_tlb() __flush_tlb() + +static inline void flush_tlb_mm(struct mm_struct *mm) +{ + if (mm == current->active_mm) + __flush_tlb(); +} + +static inline void flush_tlb_page(struct vm_area_struct *vma, + unsigned long addr) +{ + if (vma->vm_mm == current->active_mm) + __flush_tlb_one(addr); +} + +static inline void flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + if (vma->vm_mm == current->active_mm) + __flush_tlb(); +} + +static inline void native_flush_tlb_others(const cpumask_t *cpumask, + struct mm_struct *mm, + unsigned long va) +{ +} + +#else /* SMP */ + +#include <asm/smp.h> + +#define local_flush_tlb() __flush_tlb() + +extern void flush_tlb_all(void); +extern void flush_tlb_current_task(void); +extern void flush_tlb_mm(struct mm_struct *); +extern void flush_tlb_page(struct vm_area_struct *, unsigned long); + +#define flush_tlb() flush_tlb_current_task() + +static inline void flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + flush_tlb_mm(vma->vm_mm); +} + +void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm, + unsigned long va); + +#define TLBSTATE_OK 1 +#define TLBSTATE_LAZY 2 + +#ifdef CONFIG_X86_32 +struct tlb_state +{ + struct mm_struct *active_mm; + int state; + char __cacheline_padding[L1_CACHE_BYTES-8]; +}; +DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); +#endif + +#endif /* SMP */ + +#ifndef CONFIG_PARAVIRT +#define flush_tlb_others(mask, mm, va) native_flush_tlb_others(&mask, mm, va) #endif + +static inline void flush_tlb_kernel_range(unsigned long start, + unsigned long end) +{ + flush_tlb_all(); +} + +#endif /* _ASM_X86_TLBFLUSH_H */ diff --git a/include/asm-x86/tlbflush_32.h b/include/asm-x86/tlbflush_32.h deleted file mode 100644 index 2bd5b95e2048..000000000000 --- a/include/asm-x86/tlbflush_32.h +++ /dev/null @@ -1,168 +0,0 @@ -#ifndef _I386_TLBFLUSH_H -#define _I386_TLBFLUSH_H - -#include <linux/mm.h> -#include <asm/processor.h> - -#ifdef CONFIG_PARAVIRT -#include <asm/paravirt.h> -#else -#define __flush_tlb() __native_flush_tlb() -#define __flush_tlb_global() __native_flush_tlb_global() -#define __flush_tlb_single(addr) __native_flush_tlb_single(addr) -#endif - -#define __native_flush_tlb() \ - do { \ - unsigned int tmpreg; \ - \ - __asm__ __volatile__( \ - "movl %%cr3, %0; \n" \ - "movl %0, %%cr3; # flush TLB \n" \ - : "=r" (tmpreg) \ - :: "memory"); \ - } while (0) - -/* - * Global pages have to be flushed a bit differently. Not a real - * performance problem because this does not happen often. - */ -#define __native_flush_tlb_global() \ - do { \ - unsigned int tmpreg, cr4, cr4_orig; \ - \ - __asm__ __volatile__( \ - "movl %%cr4, %2; # turn off PGE \n" \ - "movl %2, %1; \n" \ - "andl %3, %1; \n" \ - "movl %1, %%cr4; \n" \ - "movl %%cr3, %0; \n" \ - "movl %0, %%cr3; # flush TLB \n" \ - "movl %2, %%cr4; # turn PGE back on \n" \ - : "=&r" (tmpreg), "=&r" (cr4), "=&r" (cr4_orig) \ - : "i" (~X86_CR4_PGE) \ - : "memory"); \ - } while (0) - -#define __native_flush_tlb_single(addr) \ - __asm__ __volatile__("invlpg (%0)" ::"r" (addr) : "memory") - -# define __flush_tlb_all() \ - do { \ - if (cpu_has_pge) \ - __flush_tlb_global(); \ - else \ - __flush_tlb(); \ - } while (0) - -#define cpu_has_invlpg (boot_cpu_data.x86 > 3) - -#ifdef CONFIG_X86_INVLPG -# define __flush_tlb_one(addr) __flush_tlb_single(addr) -#else -# define __flush_tlb_one(addr) \ - do { \ - if (cpu_has_invlpg) \ - __flush_tlb_single(addr); \ - else \ - __flush_tlb(); \ - } while (0) -#endif - -/* - * TLB flushing: - * - * - flush_tlb() flushes the current mm struct TLBs - * - flush_tlb_all() flushes all processes TLBs - * - flush_tlb_mm(mm) flushes the specified mm context TLB's - * - flush_tlb_page(vma, vmaddr) flushes one page - * - flush_tlb_range(vma, start, end) flushes a range of pages - * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages - * - flush_tlb_others(cpumask, mm, va) flushes a TLBs on other cpus - * - * ..but the i386 has somewhat limited tlb flushing capabilities, - * and page-granular flushes are available only on i486 and up. - */ - -#define TLB_FLUSH_ALL 0xffffffff - - -#ifndef CONFIG_SMP - -#include <linux/sched.h> - -#define flush_tlb() __flush_tlb() -#define flush_tlb_all() __flush_tlb_all() -#define local_flush_tlb() __flush_tlb() - -static inline void flush_tlb_mm(struct mm_struct *mm) -{ - if (mm == current->active_mm) - __flush_tlb(); -} - -static inline void flush_tlb_page(struct vm_area_struct *vma, - unsigned long addr) -{ - if (vma->vm_mm == current->active_mm) - __flush_tlb_one(addr); -} - -static inline void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ - if (vma->vm_mm == current->active_mm) - __flush_tlb(); -} - -static inline void native_flush_tlb_others(const cpumask_t *cpumask, - struct mm_struct *mm, unsigned long va) -{ -} - -#else /* SMP */ - -#include <asm/smp.h> - -#define local_flush_tlb() \ - __flush_tlb() - -extern void flush_tlb_all(void); -extern void flush_tlb_current_task(void); -extern void flush_tlb_mm(struct mm_struct *); -extern void flush_tlb_page(struct vm_area_struct *, unsigned long); - -#define flush_tlb() flush_tlb_current_task() - -static inline void flush_tlb_range(struct vm_area_struct * vma, unsigned long start, unsigned long end) -{ - flush_tlb_mm(vma->vm_mm); -} - -void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm, - unsigned long va); - -#define TLBSTATE_OK 1 -#define TLBSTATE_LAZY 2 - -struct tlb_state -{ - struct mm_struct *active_mm; - int state; - char __cacheline_padding[L1_CACHE_BYTES-8]; -}; -DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); -#endif /* SMP */ - -#ifndef CONFIG_PARAVIRT -#define flush_tlb_others(mask, mm, va) \ - native_flush_tlb_others(&mask, mm, va) -#endif - -static inline void flush_tlb_kernel_range(unsigned long start, - unsigned long end) -{ - flush_tlb_all(); -} - -#endif /* _I386_TLBFLUSH_H */ diff --git a/include/asm-x86/tlbflush_64.h b/include/asm-x86/tlbflush_64.h deleted file mode 100644 index 7731fd23d572..000000000000 --- a/include/asm-x86/tlbflush_64.h +++ /dev/null @@ -1,100 +0,0 @@ -#ifndef _X8664_TLBFLUSH_H -#define _X8664_TLBFLUSH_H - -#include <linux/mm.h> -#include <linux/sched.h> -#include <asm/processor.h> -#include <asm/system.h> - -static inline void __flush_tlb(void) -{ - write_cr3(read_cr3()); -} - -static inline void __flush_tlb_all(void) -{ - unsigned long cr4 = read_cr4(); - write_cr4(cr4 & ~X86_CR4_PGE); /* clear PGE */ - write_cr4(cr4); /* write old PGE again and flush TLBs */ -} - -#define __flush_tlb_one(addr) \ - __asm__ __volatile__("invlpg (%0)" :: "r" (addr) : "memory") - - -/* - * TLB flushing: - * - * - flush_tlb() flushes the current mm struct TLBs - * - flush_tlb_all() flushes all processes TLBs - * - flush_tlb_mm(mm) flushes the specified mm context TLB's - * - flush_tlb_page(vma, vmaddr) flushes one page - * - flush_tlb_range(vma, start, end) flushes a range of pages - * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages - * - * x86-64 can only flush individual pages or full VMs. For a range flush - * we always do the full VM. Might be worth trying if for a small - * range a few INVLPGs in a row are a win. - */ - -#ifndef CONFIG_SMP - -#define flush_tlb() __flush_tlb() -#define flush_tlb_all() __flush_tlb_all() -#define local_flush_tlb() __flush_tlb() - -static inline void flush_tlb_mm(struct mm_struct *mm) -{ - if (mm == current->active_mm) - __flush_tlb(); -} - -static inline void flush_tlb_page(struct vm_area_struct *vma, - unsigned long addr) -{ - if (vma->vm_mm == current->active_mm) - __flush_tlb_one(addr); -} - -static inline void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ - if (vma->vm_mm == current->active_mm) - __flush_tlb(); -} - -#else - -#include <asm/smp.h> - -#define local_flush_tlb() \ - __flush_tlb() - -extern void flush_tlb_all(void); -extern void flush_tlb_current_task(void); -extern void flush_tlb_mm(struct mm_struct *); -extern void flush_tlb_page(struct vm_area_struct *, unsigned long); - -#define flush_tlb() flush_tlb_current_task() - -static inline void flush_tlb_range(struct vm_area_struct * vma, unsigned long start, unsigned long end) -{ - flush_tlb_mm(vma->vm_mm); -} - -#define TLBSTATE_OK 1 -#define TLBSTATE_LAZY 2 - -/* Roughly an IPI every 20MB with 4k pages for freeing page table - ranges. Cost is about 42k of memory for each CPU. */ -#define ARCH_FREE_PTE_NR 5350 - -#endif - -static inline void flush_tlb_kernel_range(unsigned long start, - unsigned long end) -{ - flush_tlb_all(); -} - -#endif /* _X8664_TLBFLUSH_H */ diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h index b10fde9798ea..8af05a93f097 100644 --- a/include/asm-x86/topology.h +++ b/include/asm-x86/topology.h @@ -1,5 +1,188 @@ +/* + * Written by: Matthew Dobson, IBM Corporation + * + * Copyright (C) 2002, IBM Corp. + * + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Send feedback to <colpatch@us.ibm.com> + */ +#ifndef _ASM_X86_TOPOLOGY_H +#define _ASM_X86_TOPOLOGY_H + +#ifdef CONFIG_NUMA +#include <linux/cpumask.h> +#include <asm/mpspec.h> + +/* Mappings between logical cpu number and node number */ #ifdef CONFIG_X86_32 -# include "topology_32.h" +extern int cpu_to_node_map[]; + #else -# include "topology_64.h" +DECLARE_PER_CPU(int, x86_cpu_to_node_map); +extern int x86_cpu_to_node_map_init[]; +extern void *x86_cpu_to_node_map_early_ptr; +/* Returns the number of the current Node. */ +#define numa_node_id() (early_cpu_to_node(raw_smp_processor_id())) +#endif + +extern cpumask_t node_to_cpumask_map[]; + +#define NUMA_NO_NODE (-1) + +/* Returns the number of the node containing CPU 'cpu' */ +#ifdef CONFIG_X86_32 +#define early_cpu_to_node(cpu) cpu_to_node(cpu) +static inline int cpu_to_node(int cpu) +{ + return cpu_to_node_map[cpu]; +} + +#else /* CONFIG_X86_64 */ +static inline int early_cpu_to_node(int cpu) +{ + int *cpu_to_node_map = x86_cpu_to_node_map_early_ptr; + + if (cpu_to_node_map) + return cpu_to_node_map[cpu]; + else if (per_cpu_offset(cpu)) + return per_cpu(x86_cpu_to_node_map, cpu); + else + return NUMA_NO_NODE; +} + +static inline int cpu_to_node(int cpu) +{ +#ifdef CONFIG_DEBUG_PER_CPU_MAPS + if (x86_cpu_to_node_map_early_ptr) { + printk("KERN_NOTICE cpu_to_node(%d): usage too early!\n", + (int)cpu); + dump_stack(); + return ((int *)x86_cpu_to_node_map_early_ptr)[cpu]; + } +#endif + if (per_cpu_offset(cpu)) + return per_cpu(x86_cpu_to_node_map, cpu); + else + return NUMA_NO_NODE; +} +#endif /* CONFIG_X86_64 */ + +/* + * Returns the number of the node containing Node 'node'. This + * architecture is flat, so it is a pretty simple function! + */ +#define parent_node(node) (node) + +/* Returns a bitmask of CPUs on Node 'node'. */ +static inline cpumask_t node_to_cpumask(int node) +{ + return node_to_cpumask_map[node]; +} + +/* Returns the number of the first CPU on Node 'node'. */ +static inline int node_to_first_cpu(int node) +{ + cpumask_t mask = node_to_cpumask(node); + + return first_cpu(mask); +} + +#define pcibus_to_node(bus) __pcibus_to_node(bus) +#define pcibus_to_cpumask(bus) __pcibus_to_cpumask(bus) + +#ifdef CONFIG_X86_32 +extern unsigned long node_start_pfn[]; +extern unsigned long node_end_pfn[]; +extern unsigned long node_remap_size[]; +#define node_has_online_mem(nid) (node_start_pfn[nid] != node_end_pfn[nid]) + +# ifdef CONFIG_X86_HT +# define ENABLE_TOPO_DEFINES +# endif + +# define SD_CACHE_NICE_TRIES 1 +# define SD_IDLE_IDX 1 +# define SD_NEWIDLE_IDX 2 +# define SD_FORKEXEC_IDX 0 + +#else + +# ifdef CONFIG_SMP +# define ENABLE_TOPO_DEFINES +# endif + +# define SD_CACHE_NICE_TRIES 2 +# define SD_IDLE_IDX 2 +# define SD_NEWIDLE_IDX 0 +# define SD_FORKEXEC_IDX 1 + +#endif + +/* sched_domains SD_NODE_INIT for NUMAQ machines */ +#define SD_NODE_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ + .parent = NULL, \ + .child = NULL, \ + .groups = NULL, \ + .min_interval = 8, \ + .max_interval = 32, \ + .busy_factor = 32, \ + .imbalance_pct = 125, \ + .cache_nice_tries = SD_CACHE_NICE_TRIES, \ + .busy_idx = 3, \ + .idle_idx = SD_IDLE_IDX, \ + .newidle_idx = SD_NEWIDLE_IDX, \ + .wake_idx = 1, \ + .forkexec_idx = SD_FORKEXEC_IDX, \ + .flags = SD_LOAD_BALANCE \ + | SD_BALANCE_EXEC \ + | SD_BALANCE_FORK \ + | SD_SERIALIZE \ + | SD_WAKE_BALANCE, \ + .last_balance = jiffies, \ + .balance_interval = 1, \ + .nr_balance_failed = 0, \ +} + +#ifdef CONFIG_X86_64_ACPI_NUMA +extern int __node_distance(int, int); +#define node_distance(a, b) __node_distance(a, b) +#endif + +#else /* CONFIG_NUMA */ + +#include <asm-generic/topology.h> + +#endif + +extern cpumask_t cpu_coregroup_map(int cpu); + +#ifdef ENABLE_TOPO_DEFINES +#define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id) +#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) +#define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu)) +#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) +#endif + +#ifdef CONFIG_SMP +#define mc_capable() (boot_cpu_data.x86_max_cores > 1) +#define smt_capable() (smp_num_siblings > 1) +#endif + #endif diff --git a/include/asm-x86/topology_32.h b/include/asm-x86/topology_32.h deleted file mode 100644 index 9040f5a61278..000000000000 --- a/include/asm-x86/topology_32.h +++ /dev/null @@ -1,121 +0,0 @@ -/* - * linux/include/asm-i386/topology.h - * - * Written by: Matthew Dobson, IBM Corporation - * - * Copyright (C) 2002, IBM Corp. - * - * All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Send feedback to <colpatch@us.ibm.com> - */ -#ifndef _ASM_I386_TOPOLOGY_H -#define _ASM_I386_TOPOLOGY_H - -#ifdef CONFIG_X86_HT -#define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id) -#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) -#define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu)) -#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) -#endif - -#ifdef CONFIG_NUMA - -#include <asm/mpspec.h> - -#include <linux/cpumask.h> - -/* Mappings between logical cpu number and node number */ -extern cpumask_t node_2_cpu_mask[]; -extern int cpu_2_node[]; - -/* Returns the number of the node containing CPU 'cpu' */ -static inline int cpu_to_node(int cpu) -{ - return cpu_2_node[cpu]; -} - -/* Returns the number of the node containing Node 'node'. This architecture is flat, - so it is a pretty simple function! */ -#define parent_node(node) (node) - -/* Returns a bitmask of CPUs on Node 'node'. */ -static inline cpumask_t node_to_cpumask(int node) -{ - return node_2_cpu_mask[node]; -} - -/* Returns the number of the first CPU on Node 'node'. */ -static inline int node_to_first_cpu(int node) -{ - cpumask_t mask = node_to_cpumask(node); - return first_cpu(mask); -} - -#define pcibus_to_node(bus) ((struct pci_sysdata *)((bus)->sysdata))->node -#define pcibus_to_cpumask(bus) node_to_cpumask(pcibus_to_node(bus)) - -/* sched_domains SD_NODE_INIT for NUMAQ machines */ -#define SD_NODE_INIT (struct sched_domain) { \ - .span = CPU_MASK_NONE, \ - .parent = NULL, \ - .child = NULL, \ - .groups = NULL, \ - .min_interval = 8, \ - .max_interval = 32, \ - .busy_factor = 32, \ - .imbalance_pct = 125, \ - .cache_nice_tries = 1, \ - .busy_idx = 3, \ - .idle_idx = 1, \ - .newidle_idx = 2, \ - .wake_idx = 1, \ - .flags = SD_LOAD_BALANCE \ - | SD_BALANCE_EXEC \ - | SD_BALANCE_FORK \ - | SD_SERIALIZE \ - | SD_WAKE_BALANCE, \ - .last_balance = jiffies, \ - .balance_interval = 1, \ - .nr_balance_failed = 0, \ -} - -extern unsigned long node_start_pfn[]; -extern unsigned long node_end_pfn[]; -extern unsigned long node_remap_size[]; - -#define node_has_online_mem(nid) (node_start_pfn[nid] != node_end_pfn[nid]) - -#else /* !CONFIG_NUMA */ -/* - * Other i386 platforms should define their own version of the - * above macros here. - */ - -#include <asm-generic/topology.h> - -#endif /* CONFIG_NUMA */ - -extern cpumask_t cpu_coregroup_map(int cpu); - -#ifdef CONFIG_SMP -#define mc_capable() (boot_cpu_data.x86_max_cores > 1) -#define smt_capable() (smp_num_siblings > 1) -#endif - -#endif /* _ASM_I386_TOPOLOGY_H */ diff --git a/include/asm-x86/topology_64.h b/include/asm-x86/topology_64.h deleted file mode 100644 index a718dda037e0..000000000000 --- a/include/asm-x86/topology_64.h +++ /dev/null @@ -1,71 +0,0 @@ -#ifndef _ASM_X86_64_TOPOLOGY_H -#define _ASM_X86_64_TOPOLOGY_H - - -#ifdef CONFIG_NUMA - -#include <asm/mpspec.h> -#include <linux/bitops.h> - -extern cpumask_t cpu_online_map; - -extern unsigned char cpu_to_node[]; -extern cpumask_t node_to_cpumask[]; - -#ifdef CONFIG_ACPI_NUMA -extern int __node_distance(int, int); -#define node_distance(a,b) __node_distance(a,b) -/* #else fallback version */ -#endif - -#define cpu_to_node(cpu) (cpu_to_node[cpu]) -#define parent_node(node) (node) -#define node_to_first_cpu(node) (first_cpu(node_to_cpumask[node])) -#define node_to_cpumask(node) (node_to_cpumask[node]) -#define pcibus_to_node(bus) ((struct pci_sysdata *)((bus)->sysdata))->node -#define pcibus_to_cpumask(bus) node_to_cpumask(pcibus_to_node(bus)); - -#define numa_node_id() read_pda(nodenumber) - -/* sched_domains SD_NODE_INIT for x86_64 machines */ -#define SD_NODE_INIT (struct sched_domain) { \ - .span = CPU_MASK_NONE, \ - .parent = NULL, \ - .child = NULL, \ - .groups = NULL, \ - .min_interval = 8, \ - .max_interval = 32, \ - .busy_factor = 32, \ - .imbalance_pct = 125, \ - .cache_nice_tries = 2, \ - .busy_idx = 3, \ - .idle_idx = 2, \ - .newidle_idx = 0, \ - .wake_idx = 1, \ - .forkexec_idx = 1, \ - .flags = SD_LOAD_BALANCE \ - | SD_BALANCE_FORK \ - | SD_BALANCE_EXEC \ - | SD_SERIALIZE \ - | SD_WAKE_BALANCE, \ - .last_balance = jiffies, \ - .balance_interval = 1, \ - .nr_balance_failed = 0, \ -} - -#endif - -#ifdef CONFIG_SMP -#define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id) -#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) -#define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu)) -#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) -#define mc_capable() (boot_cpu_data.x86_max_cores > 1) -#define smt_capable() (smp_num_siblings > 1) -#endif - -#include <asm-generic/topology.h> - -extern cpumask_t cpu_coregroup_map(int cpu); - -#endif diff --git a/include/asm-x86/tsc.h b/include/asm-x86/tsc.h index 6baab30dc2c8..7d3e27f7d484 100644 --- a/include/asm-x86/tsc.h +++ b/include/asm-x86/tsc.h @@ -17,6 +17,8 @@ typedef unsigned long long cycles_t; extern unsigned int cpu_khz; extern unsigned int tsc_khz; +extern void disable_TSC(void); + static inline cycles_t get_cycles(void) { unsigned long long ret = 0; @@ -25,39 +27,22 @@ static inline cycles_t get_cycles(void) if (!cpu_has_tsc) return 0; #endif - -#if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC) rdtscll(ret); -#endif + return ret; } -/* Like get_cycles, but make sure the CPU is synchronized. */ -static __always_inline cycles_t get_cycles_sync(void) +static inline cycles_t vget_cycles(void) { - unsigned long long ret; - unsigned eax, edx; - - /* - * Use RDTSCP if possible; it is guaranteed to be synchronous - * and doesn't cause a VMEXIT on Hypervisors - */ - alternative_io(ASM_NOP3, ".byte 0x0f,0x01,0xf9", X86_FEATURE_RDTSCP, - ASM_OUTPUT2("=a" (eax), "=d" (edx)), - "a" (0U), "d" (0U) : "ecx", "memory"); - ret = (((unsigned long long)edx) << 32) | ((unsigned long long)eax); - if (ret) - return ret; - /* - * Don't do an additional sync on CPUs where we know - * RDTSC is already synchronous: + * We only do VDSOs on TSC capable CPUs, so this shouldnt + * access boot_cpu_data (which is not VDSO-safe): */ - alternative_io("cpuid", ASM_NOP2, X86_FEATURE_SYNC_RDTSC, - "=a" (eax), "0" (1) : "ebx","ecx","edx","memory"); - rdtscll(ret); - - return ret; +#ifndef CONFIG_X86_TSC + if (!cpu_has_tsc) + return 0; +#endif + return (cycles_t) __native_read_tsc(); } extern void tsc_init(void); @@ -73,8 +58,7 @@ int check_tsc_unstable(void); extern void check_tsc_sync_source(int cpu); extern void check_tsc_sync_target(void); -#ifdef CONFIG_X86_64 extern void tsc_calibrate(void); -#endif +extern int notsc_setup(char *); #endif diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h index f4ce8768ad44..31d794702719 100644 --- a/include/asm-x86/uaccess_64.h +++ b/include/asm-x86/uaccess_64.h @@ -65,6 +65,8 @@ struct exception_table_entry unsigned long insn, fixup; }; +extern int fixup_exception(struct pt_regs *regs); + #define ARCH_HAS_SEARCH_EXTABLE /* diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h index 9b15545eb9b5..8d8f9b5adbb9 100644 --- a/include/asm-x86/unistd_32.h +++ b/include/asm-x86/unistd_32.h @@ -333,8 +333,6 @@ #ifdef __KERNEL__ -#define NR_syscalls 325 - #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_OLD_STAT diff --git a/include/asm-x86/user_32.h b/include/asm-x86/user_32.h index 0e85d2a5e33a..ed8b8fc6906c 100644 --- a/include/asm-x86/user_32.h +++ b/include/asm-x86/user_32.h @@ -75,13 +75,23 @@ struct user_fxsr_struct { * doesn't use the extra segment registers) */ struct user_regs_struct { - long ebx, ecx, edx, esi, edi, ebp, eax; - unsigned short ds, __ds, es, __es; - unsigned short fs, __fs, gs, __gs; - long orig_eax, eip; - unsigned short cs, __cs; - long eflags, esp; - unsigned short ss, __ss; + unsigned long bx; + unsigned long cx; + unsigned long dx; + unsigned long si; + unsigned long di; + unsigned long bp; + unsigned long ax; + unsigned long ds; + unsigned long es; + unsigned long fs; + unsigned long gs; + unsigned long orig_ax; + unsigned long ip; + unsigned long cs; + unsigned long flags; + unsigned long sp; + unsigned long ss; }; /* When the kernel dumps core, it starts by dumping the user struct - diff --git a/include/asm-x86/user_64.h b/include/asm-x86/user_64.h index 12785c649ac5..a5449d456cc0 100644 --- a/include/asm-x86/user_64.h +++ b/include/asm-x86/user_64.h @@ -40,13 +40,13 @@ * and both the standard and SIMD floating point data can be accessed via * the new ptrace requests. In either case, changes to the FPU environment * will be reflected in the task's state as expected. - * + * * x86-64 support by Andi Kleen. */ /* This matches the 64bit FXSAVE format as defined by AMD. It is the same as the 32bit format defined by Intel, except that the selector:offset pairs for - data and eip are replaced with flat 64bit pointers. */ + data and eip are replaced with flat 64bit pointers. */ struct user_i387_struct { unsigned short cwd; unsigned short swd; @@ -65,13 +65,34 @@ struct user_i387_struct { * Segment register layout in coredumps. */ struct user_regs_struct { - unsigned long r15,r14,r13,r12,rbp,rbx,r11,r10; - unsigned long r9,r8,rax,rcx,rdx,rsi,rdi,orig_rax; - unsigned long rip,cs,eflags; - unsigned long rsp,ss; - unsigned long fs_base, gs_base; - unsigned long ds,es,fs,gs; -}; + unsigned long r15; + unsigned long r14; + unsigned long r13; + unsigned long r12; + unsigned long bp; + unsigned long bx; + unsigned long r11; + unsigned long r10; + unsigned long r9; + unsigned long r8; + unsigned long ax; + unsigned long cx; + unsigned long dx; + unsigned long si; + unsigned long di; + unsigned long orig_ax; + unsigned long ip; + unsigned long cs; + unsigned long flags; + unsigned long sp; + unsigned long ss; + unsigned long fs_base; + unsigned long gs_base; + unsigned long ds; + unsigned long es; + unsigned long fs; + unsigned long gs; +}; /* When the kernel dumps core, it starts by dumping the user struct - this will be used by gdb to figure out where the data and stack segments @@ -94,7 +115,7 @@ struct user{ This is actually the bottom of the stack, the top of the stack is always found in the esp register. */ - long int signal; /* Signal that caused the core dump. */ + long int signal; /* Signal that caused the core dump. */ int reserved; /* No longer used */ int pad1; struct user_pt_regs * u_ar0; /* Used by gdb to help find the values for */ diff --git a/include/asm-x86/vdso.h b/include/asm-x86/vdso.h new file mode 100644 index 000000000000..629bcb6e8e45 --- /dev/null +++ b/include/asm-x86/vdso.h @@ -0,0 +1,28 @@ +#ifndef _ASM_X86_VDSO_H +#define _ASM_X86_VDSO_H 1 + +#ifdef CONFIG_X86_64 +extern const char VDSO64_PRELINK[]; + +/* + * Given a pointer to the vDSO image, find the pointer to VDSO64_name + * as that symbol is defined in the vDSO sources or linker script. + */ +#define VDSO64_SYMBOL(base, name) ({ \ + extern const char VDSO64_##name[]; \ + (void *) (VDSO64_##name - VDSO64_PRELINK + (unsigned long) (base)); }) +#endif + +#if defined CONFIG_X86_32 || defined CONFIG_COMPAT +extern const char VDSO32_PRELINK[]; + +/* + * Given a pointer to the vDSO image, find the pointer to VDSO32_name + * as that symbol is defined in the vDSO sources or linker script. + */ +#define VDSO32_SYMBOL(base, name) ({ \ + extern const char VDSO32_##name[]; \ + (void *) (VDSO32_##name - VDSO32_PRELINK + (unsigned long) (base)); }) +#endif + +#endif /* asm-x86/vdso.h */ diff --git a/include/asm-x86/vsyscall.h b/include/asm-x86/vsyscall.h index f01c49f5d108..17b3700949bf 100644 --- a/include/asm-x86/vsyscall.h +++ b/include/asm-x86/vsyscall.h @@ -36,6 +36,8 @@ extern volatile unsigned long __jiffies; extern int vgetcpu_mode; extern struct timezone sys_tz; +extern void map_vsyscall(void); + #endif /* __KERNEL__ */ #endif /* _ASM_X86_64_VSYSCALL_H_ */ diff --git a/include/asm-x86/vsyscall32.h b/include/asm-x86/vsyscall32.h deleted file mode 100644 index c631c082f8f7..000000000000 --- a/include/asm-x86/vsyscall32.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef _ASM_VSYSCALL32_H -#define _ASM_VSYSCALL32_H 1 - -/* Values need to match arch/x86_64/ia32/vsyscall.lds */ - -#ifdef __ASSEMBLY__ -#define VSYSCALL32_BASE 0xffffe000 -#define VSYSCALL32_SYSEXIT (VSYSCALL32_BASE + 0x410) -#else -#define VSYSCALL32_BASE 0xffffe000UL -#define VSYSCALL32_END (VSYSCALL32_BASE + PAGE_SIZE) -#define VSYSCALL32_EHDR ((const struct elf32_hdr *) VSYSCALL32_BASE) - -#define VSYSCALL32_VSYSCALL ((void *)VSYSCALL32_BASE + 0x400) -#define VSYSCALL32_SYSEXIT ((void *)VSYSCALL32_BASE + 0x410) -#define VSYSCALL32_SIGRETURN ((void __user *)VSYSCALL32_BASE + 0x500) -#define VSYSCALL32_RTSIGRETURN ((void __user *)VSYSCALL32_BASE + 0x600) -#endif - -#endif diff --git a/include/asm-x86/xor_32.h b/include/asm-x86/xor_32.h index 23c86cef3b25..a41ef1bdd424 100644 --- a/include/asm-x86/xor_32.h +++ b/include/asm-x86/xor_32.h @@ -1,6 +1,4 @@ /* - * include/asm-i386/xor.h - * * Optimized RAID-5 checksumming functions for MMX and SSE. * * This program is free software; you can redistribute it and/or modify diff --git a/include/asm-x86/xor_64.h b/include/asm-x86/xor_64.h index f942fcc21831..1eee7fcb2420 100644 --- a/include/asm-x86/xor_64.h +++ b/include/asm-x86/xor_64.h @@ -1,6 +1,4 @@ /* - * include/asm-x86_64/xor.h - * * Optimized RAID-5 checksumming functions for MMX and SSE. * * This program is free software; you can redistribute it and/or modify |