diff options
Diffstat (limited to 'arch/x86')
33 files changed, 280 insertions, 139 deletions
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index a9f8a814a1f7..4a8e80cdcfa5 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -22,4 +22,3 @@ unifdef-y += unistd_32.h unifdef-y += unistd_64.h unifdef-y += vm86.h unifdef-y += vsyscall.h -unifdef-y += swab.h diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index e02a359d2aa5..02b47a603fc8 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -3,6 +3,9 @@ /* * Copyright 1992, Linus Torvalds. + * + * Note: inlines with more than a single statement should be marked + * __always_inline to avoid problems with older gcc's inlining heuristics. */ #ifndef _LINUX_BITOPS_H @@ -53,7 +56,8 @@ * Note that @nr may be almost arbitrarily large; this function is not * restricted to acting on a single-word quantity. */ -static inline void set_bit(unsigned int nr, volatile unsigned long *addr) +static __always_inline void +set_bit(unsigned int nr, volatile unsigned long *addr) { if (IS_IMMEDIATE(nr)) { asm volatile(LOCK_PREFIX "orb %1,%0" @@ -90,7 +94,8 @@ static inline void __set_bit(int nr, volatile unsigned long *addr) * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() * in order to ensure changes are visible on other processors. */ -static inline void clear_bit(int nr, volatile unsigned long *addr) +static __always_inline void +clear_bit(int nr, volatile unsigned long *addr) { if (IS_IMMEDIATE(nr)) { asm volatile(LOCK_PREFIX "andb %1,%0" @@ -204,7 +209,8 @@ static inline int test_and_set_bit(int nr, volatile unsigned long *addr) * * This is the same as test_and_set_bit on x86. */ -static inline int test_and_set_bit_lock(int nr, volatile unsigned long *addr) +static __always_inline int +test_and_set_bit_lock(int nr, volatile unsigned long *addr) { return test_and_set_bit(nr, addr); } @@ -300,7 +306,7 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr) return oldbit; } -static inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr) +static __always_inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr) { return ((1UL << (nr % BITS_PER_LONG)) & (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0; diff --git a/arch/x86/include/asm/byteorder.h b/arch/x86/include/asm/byteorder.h index 7c49917e3d9d..b13a7a88f3eb 100644 --- a/arch/x86/include/asm/byteorder.h +++ b/arch/x86/include/asm/byteorder.h @@ -1,7 +1,6 @@ #ifndef _ASM_X86_BYTEORDER_H #define _ASM_X86_BYTEORDER_H -#include <asm/swab.h> #include <linux/byteorder/little_endian.h> #endif /* _ASM_X86_BYTEORDER_H */ diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 05cfed4485fa..1dbbdf4be9b4 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -99,7 +99,6 @@ extern void __iomem *ioremap_wc(unsigned long offset, unsigned long size); * A boot-time mapping is currently limited to at most 16 pages. */ extern void early_ioremap_init(void); -extern void early_ioremap_clear(void); extern void early_ioremap_reset(void); extern void __iomem *early_ioremap(unsigned long offset, unsigned long size); extern void __iomem *early_memremap(unsigned long offset, unsigned long size); diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h index ceb013660146..89897a6a65b9 100644 --- a/arch/x86/include/asm/mach-default/mach_wakecpu.h +++ b/arch/x86/include/asm/mach-default/mach_wakecpu.h @@ -24,7 +24,13 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) { } +#ifdef CONFIG_SMP extern void __inquire_remote_apic(int apicid); +#else /* CONFIG_SMP */ +static inline void __inquire_remote_apic(int apicid) +{ +} +#endif /* CONFIG_SMP */ static inline void inquire_remote_apic(int apicid) { diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index cb58643947b9..358acc59ae04 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -202,6 +202,35 @@ #define MSR_IA32_THERM_STATUS 0x0000019c #define MSR_IA32_MISC_ENABLE 0x000001a0 +/* MISC_ENABLE bits: architectural */ +#define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << 0) +#define MSR_IA32_MISC_ENABLE_TCC (1ULL << 1) +#define MSR_IA32_MISC_ENABLE_EMON (1ULL << 7) +#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL (1ULL << 11) +#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1ULL << 12) +#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP (1ULL << 16) +#define MSR_IA32_MISC_ENABLE_MWAIT (1ULL << 18) +#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1ULL << 22) +#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1ULL << 23) +#define MSR_IA32_MISC_ENABLE_XD_DISABLE (1ULL << 34) + +/* MISC_ENABLE bits: model-specific, meaning may vary from core to core */ +#define MSR_IA32_MISC_ENABLE_X87_COMPAT (1ULL << 2) +#define MSR_IA32_MISC_ENABLE_TM1 (1ULL << 3) +#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE (1ULL << 4) +#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE (1ULL << 6) +#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK (1ULL << 8) +#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE (1ULL << 9) +#define MSR_IA32_MISC_ENABLE_FERR (1ULL << 10) +#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX (1ULL << 10) +#define MSR_IA32_MISC_ENABLE_TM2 (1ULL << 13) +#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE (1ULL << 19) +#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK (1ULL << 20) +#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT (1ULL << 24) +#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE (1ULL << 37) +#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << 38) +#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << 39) + /* Intel Model 6 */ #define MSR_P6_EVNTSEL0 0x00000186 #define MSR_P6_EVNTSEL1 0x00000187 diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index cb988aab716d..14080d22edb3 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -58,15 +58,15 @@ struct mtrr_gentry { #endif /* !__i386__ */ struct mtrr_var_range { - u32 base_lo; - u32 base_hi; - u32 mask_lo; - u32 mask_hi; + __u32 base_lo; + __u32 base_hi; + __u32 mask_lo; + __u32 mask_hi; }; /* In the Intel processor's MTRR interface, the MTRR type is always held in an 8 bit field: */ -typedef u8 mtrr_type; +typedef __u8 mtrr_type; #define MTRR_NUM_FIXED_RANGES 88 #define MTRR_MAX_VAR_RANGES 256 diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index cb7c151a8bff..dd14c54ac718 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -42,6 +42,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) static inline void pte_free(struct mm_struct *mm, struct page *pte) { + pgtable_page_dtor(pte); __free_page(pte); } diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 83e69f4a37f0..06bbcbd66e9c 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -341,6 +341,25 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) #define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask) +static inline int is_new_memtype_allowed(unsigned long flags, + unsigned long new_flags) +{ + /* + * Certain new memtypes are not allowed with certain + * requested memtype: + * - request is uncached, return cannot be write-back + * - request is write-combine, return cannot be write-back + */ + if ((flags == _PAGE_CACHE_UC_MINUS && + new_flags == _PAGE_CACHE_WB) || + (flags == _PAGE_CACHE_WC && + new_flags == _PAGE_CACHE_WB)) { + return 0; + } + + return 1; +} + #ifndef __ASSEMBLY__ /* Indicate that x86 has its own track and untrack pfn vma functions */ #define __HAVE_PFNMAP_TRACKING diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 9c6797c3e56c..c0b0bda754ee 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -40,7 +40,7 @@ asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, struct old_sigaction __user *); asmlinkage int sys_sigaltstack(unsigned long); asmlinkage unsigned long sys_sigreturn(unsigned long); -asmlinkage int sys_rt_sigreturn(struct pt_regs); +asmlinkage int sys_rt_sigreturn(unsigned long); /* kernel/ioport.c */ asmlinkage long sys_iopl(unsigned long); diff --git a/arch/x86/include/asm/timex.h b/arch/x86/include/asm/timex.h index 1287dc1347d6..b5c9d45c981f 100644 --- a/arch/x86/include/asm/timex.h +++ b/arch/x86/include/asm/timex.h @@ -1,18 +1,13 @@ -/* x86 architecture timex specifications */ #ifndef _ASM_X86_TIMEX_H #define _ASM_X86_TIMEX_H #include <asm/processor.h> #include <asm/tsc.h> -#ifdef CONFIG_X86_ELAN -# define PIT_TICK_RATE 1189200 /* AMD Elan has different frequency! */ -#elif defined(CONFIG_X86_RDC321X) -# define PIT_TICK_RATE 1041667 /* Underlying HZ for R8610 */ -#else -# define PIT_TICK_RATE 1193182 /* Underlying HZ */ -#endif -#define CLOCK_TICK_RATE PIT_TICK_RATE +/* The PIT ticks at this frequency (in HZ): */ +#define PIT_TICK_RATE 1193182 + +#define CLOCK_TICK_RATE PIT_TICK_RATE #define ARCH_HAS_READ_CURRENT_TIMER diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 566a08466b19..4b6df2469fe3 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -47,6 +47,7 @@ #include <asm/proto.h> #include <asm/apic.h> #include <asm/i8259.h> +#include <asm/smp.h> #include <mach_apic.h> #include <mach_apicdef.h> @@ -894,6 +895,10 @@ void disable_local_APIC(void) { unsigned int value; + /* APIC hasn't been mapped yet */ + if (!apic_phys) + return; + clear_local_APIC(); /* @@ -1832,6 +1837,11 @@ void __cpuinit generic_processor_info(int apicid, int version) num_processors++; cpu = cpumask_next_zero(-1, cpu_present_mask); + if (version != apic_version[boot_cpu_physical_apicid]) + WARN_ONCE(1, + "ACPI: apic version mismatch, bootcpu: %x cpu %d: %x\n", + apic_version[boot_cpu_physical_apicid], cpu, version); + physid_set(apicid, phys_cpu_present_map); if (apicid == boot_cpu_physical_apicid) { /* diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 06fcd8f9323c..4b1c319d30c3 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -145,7 +145,7 @@ typedef union { struct drv_cmd { unsigned int type; - cpumask_var_t mask; + const struct cpumask *mask; drv_addr_union addr; u32 val; }; @@ -231,15 +231,9 @@ static u32 get_cur_val(const struct cpumask *mask) return 0; } - if (unlikely(!alloc_cpumask_var(&cmd.mask, GFP_KERNEL))) - return 0; - - cpumask_copy(cmd.mask, mask); - + cmd.mask = mask; drv_read(&cmd); - free_cpumask_var(cmd.mask); - dprintk("get_cur_val = %u\n", cmd.val); return cmd.val; @@ -369,7 +363,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) return freq; } -static unsigned int check_freqs(const cpumask_t *mask, unsigned int freq, +static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq, struct acpi_cpufreq_data *data) { unsigned int cur_freq; @@ -404,9 +398,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, return -ENODEV; } - if (unlikely(!alloc_cpumask_var(&cmd.mask, GFP_KERNEL))) - return -ENOMEM; - perf = data->acpi_data; result = cpufreq_frequency_table_target(policy, data->freq_table, @@ -451,9 +442,9 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, /* cpufreq holds the hotplug lock, so we are safe from here on */ if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY) - cpumask_and(cmd.mask, cpu_online_mask, policy->cpus); + cmd.mask = policy->cpus; else - cpumask_copy(cmd.mask, cpumask_of(policy->cpu)); + cmd.mask = cpumask_of(policy->cpu); freqs.old = perf->states[perf->state].core_frequency * 1000; freqs.new = data->freq_table[next_state].frequency; @@ -480,7 +471,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, perf->state = next_perf_state; out: - free_cpumask_var(cmd.mask); return result; } diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 8ea6929e974c..549f2ada55f5 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -29,6 +29,19 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) { + /* Unmask CPUID levels if masked: */ + if (c->x86 == 6 && c->x86_model >= 15) { + u64 misc_enable; + + rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); + + if (misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID) { + misc_enable &= ~MSR_IA32_MISC_ENABLE_LIMIT_CPUID; + wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable); + c->cpuid_level = cpuid_eax(0); + } + } + if ((c->x86 == 0xf && c->x86_model >= 0x03) || (c->x86 == 0x6 && c->x86_model >= 0x0e)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index b59ddcc88cd8..0c0a455fe95c 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -33,11 +33,13 @@ u64 mtrr_tom2; struct mtrr_state_type mtrr_state = {}; EXPORT_SYMBOL_GPL(mtrr_state); -#undef MODULE_PARAM_PREFIX -#define MODULE_PARAM_PREFIX "mtrr." - -static int mtrr_show; -module_param_named(show, mtrr_show, bool, 0); +static int __initdata mtrr_show; +static int __init mtrr_debug(char *opt) +{ + mtrr_show = 1; + return 0; +} +early_param("mtrr.show", mtrr_debug); /* * Returns the effective MTRR type for the region diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index d6f0490a7391..46469029e9d3 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1203,7 +1203,6 @@ nmi_stack_correct: pushl %eax CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL - TRACE_IRQS_OFF xorl %edx,%edx # zero error code movl %esp,%eax # pt_regs pointer call do_nmi @@ -1244,7 +1243,6 @@ nmi_espfix_stack: pushl %eax CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL - TRACE_IRQS_OFF FIXUP_ESPFIX_STACK # %eax == %esp xorl %edx,%edx # zero error code call do_nmi diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 884d985b8b82..e948b28a5a9a 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -446,7 +446,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { -#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM) +#if !defined(CONFIG_PREEMPT) || defined(CONFIG_FREEZER) if (p->ainsn.boostable == 1 && !p->post_handler) { /* Boost up -- we can execute copied instructions directly */ reset_current_kprobe(); diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index c0601c2848a1..a649a4ccad43 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -27,6 +27,7 @@ #include <asm/e820.h> #include <asm/trampoline.h> #include <asm/setup.h> +#include <asm/smp.h> #include <mach_apic.h> #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 55c46074eba0..01161077a49c 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -136,7 +136,7 @@ static void __init setup_cpu_pda_map(void) #ifdef CONFIG_X86_64 /* correctly size the local cpu masks */ -static void setup_cpu_local_masks(void) +static void __init setup_cpu_local_masks(void) { alloc_bootmem_cpumask_var(&cpu_initialized_mask); alloc_bootmem_cpumask_var(&cpu_callin_mask); diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 89bb7668041d..df0587f24c54 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -632,9 +632,16 @@ badframe: } #ifdef CONFIG_X86_32 -asmlinkage int sys_rt_sigreturn(struct pt_regs regs) +/* + * Note: do not pass in pt_regs directly as with tail-call optimization + * GCC will incorrectly stomp on the caller's frame and corrupt user-space + * register state: + */ +asmlinkage int sys_rt_sigreturn(unsigned long __unused) { - return do_rt_sigreturn(®s); + struct pt_regs *regs = (struct pt_regs *)&__unused; + + return do_rt_sigreturn(regs); } #else /* !CONFIG_X86_32 */ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index d44395ff34c3..e2e86a08f31d 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -88,7 +88,7 @@ ENTRY(sys_call_table) .long sys_uselib .long sys_swapon .long sys_reboot - .long old_readdir + .long sys_old_readdir .long old_mmap /* 90 */ .long sys_munmap .long sys_truncate diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index f885023167e0..6812b829ed83 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -200,6 +200,7 @@ static int uv_wait_completion(struct bau_desc *bau_desc, destination_timeouts = 0; } } + cpu_relax(); } return FLUSH_COMPLETE; } diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 23206ba16874..1d3302cc2ddf 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -858,7 +858,7 @@ void __init vmi_init(void) #endif } -void vmi_activate(void) +void __init vmi_activate(void) { unsigned long flags; diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 4a20b2f9a381..7c8ca91bb9ec 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -56,7 +56,7 @@ do { \ " jmp 2b\n" \ ".previous\n" \ _ASM_EXTABLE(0b,3b) \ - : "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \ + : "=&d"(res), "=&c"(count), "=&a" (__d0), "=&S" (__d1), \ "=&D" (__d2) \ : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ : "memory"); \ @@ -218,7 +218,7 @@ long strnlen_user(const char __user *s, long n) " .align 4\n" " .long 0b,2b\n" ".previous" - :"=r" (n), "=D" (s), "=a" (res), "=c" (tmp) + :"=&r" (n), "=&D" (s), "=&a" (res), "=&c" (tmp) :"0" (n), "1" (s), "2" (0), "3" (mask) :"cc"); return res & mask; diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index 64d6c84e6353..ec13cb5f17ed 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -32,7 +32,7 @@ do { \ " jmp 2b\n" \ ".previous\n" \ _ASM_EXTABLE(0b,3b) \ - : "=r"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \ + : "=&r"(res), "=&c"(count), "=&a" (__d0), "=&S" (__d1), \ "=&D" (__d2) \ : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ : "memory"); \ @@ -86,7 +86,7 @@ unsigned long __clear_user(void __user *addr, unsigned long size) ".previous\n" _ASM_EXTABLE(0b,3b) _ASM_EXTABLE(1b,2b) - : [size8] "=c"(size), [dst] "=&D" (__d0) + : [size8] "=&c"(size), [dst] "=&D" (__d0) : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr), [zero] "r" (0UL), [eight] "r" (8UL)); return size; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 9e268b6b204e..90dfae511a41 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -534,7 +534,7 @@ static int vmalloc_fault(unsigned long address) happen within a race in page table update. In the later case just flush. */ - pgd = pgd_offset(current->mm ?: &init_mm, address); + pgd = pgd_offset(current->active_mm, address); pgd_ref = pgd_offset_k(address); if (pgd_none(*pgd_ref)) return -1; diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 88f1b10de3be..2cef05074413 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -138,6 +138,47 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) return pte_offset_kernel(pmd, 0); } +static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, + unsigned long vaddr, pte_t *lastpte) +{ +#ifdef CONFIG_HIGHMEM + /* + * Something (early fixmap) may already have put a pte + * page here, which causes the page table allocation + * to become nonlinear. Attempt to fix it, and if it + * is still nonlinear then we have to bug. + */ + int pmd_idx_kmap_begin = fix_to_virt(FIX_KMAP_END) >> PMD_SHIFT; + int pmd_idx_kmap_end = fix_to_virt(FIX_KMAP_BEGIN) >> PMD_SHIFT; + + if (pmd_idx_kmap_begin != pmd_idx_kmap_end + && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin + && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end + && ((__pa(pte) >> PAGE_SHIFT) < table_start + || (__pa(pte) >> PAGE_SHIFT) >= table_end)) { + pte_t *newpte; + int i; + + BUG_ON(after_init_bootmem); + newpte = alloc_low_page(); + for (i = 0; i < PTRS_PER_PTE; i++) + set_pte(newpte + i, pte[i]); + + paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT); + set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE)); + BUG_ON(newpte != pte_offset_kernel(pmd, 0)); + __flush_tlb_all(); + + paravirt_release_pte(__pa(pte) >> PAGE_SHIFT); + pte = newpte; + } + BUG_ON(vaddr < fix_to_virt(FIX_KMAP_BEGIN - 1) + && vaddr > fix_to_virt(FIX_KMAP_END) + && lastpte && lastpte + PTRS_PER_PTE != pte); +#endif + return pte; +} + /* * This function initializes a certain range of kernel virtual memory * with new bootmem page tables, everywhere page tables are missing in @@ -154,6 +195,7 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) unsigned long vaddr; pgd_t *pgd; pmd_t *pmd; + pte_t *pte = NULL; vaddr = start; pgd_idx = pgd_index(vaddr); @@ -165,7 +207,8 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) pmd = pmd + pmd_index(vaddr); for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { - one_page_table_init(pmd); + pte = page_table_kmap_check(one_page_table_init(pmd), + pmd, vaddr, pte); vaddr += PMD_SIZE; } @@ -508,7 +551,6 @@ static void __init early_ioremap_page_table_range_init(pgd_t *pgd_base) * Fixed mappings, only the page table structure has to be * created - mappings will be set by set_fixmap(): */ - early_ioremap_clear(); vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK; page_table_range_init(vaddr, end, pgd_base); @@ -801,7 +843,7 @@ static void __init find_early_table_space(unsigned long end, int use_pse) tables += PAGE_ALIGN(ptes * sizeof(pte_t)); /* for fixmap */ - tables += PAGE_SIZE * 2; + tables += PAGE_ALIGN(__end_of_fixed_addresses * sizeof(pte_t)); /* * RED-PEN putting page tables only on node 0 could diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 23f68e77ad1f..e6d36b490250 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -596,7 +596,7 @@ static void __init init_gbpages(void) direct_gbpages = 0; } -static unsigned long __init kernel_physical_mapping_init(unsigned long start, +static unsigned long __meminit kernel_physical_mapping_init(unsigned long start, unsigned long end, unsigned long page_size_mask) { diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index d0151d8ce452..ca53224fc56c 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -17,6 +17,7 @@ */ #include <asm/iomap.h> +#include <asm/pat.h> #include <linux/module.h> /* Map 'pfn' using fixed map 'type' and protections 'prot' @@ -29,6 +30,15 @@ iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) pagefault_disable(); + /* + * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS. + * PAGE_KERNEL_WC maps to PWT, which translates to uncached if the + * MTRR is UC or WC. UC_MINUS gets the real intention, of the + * user, which is "WC if the MTRR is WC, UC if you can't do that." + */ + if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC)) + prot = PAGE_KERNEL_UC_MINUS; + idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); set_pte(kmap_pte-idx, pfn_pte(pfn, prot)); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index bd85d42819e1..af750ab973b6 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -557,34 +557,9 @@ void __init early_ioremap_init(void) } } -void __init early_ioremap_clear(void) -{ - pmd_t *pmd; - - if (early_ioremap_debug) - printk(KERN_INFO "early_ioremap_clear()\n"); - - pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); - pmd_clear(pmd); - paravirt_release_pte(__pa(bm_pte) >> PAGE_SHIFT); - __flush_tlb_all(); -} - void __init early_ioremap_reset(void) { - enum fixed_addresses idx; - unsigned long addr, phys; - pte_t *pte; - after_paging_init = 1; - for (idx = FIX_BTMAP_BEGIN; idx >= FIX_BTMAP_END; idx--) { - addr = fix_to_virt(idx); - pte = early_ioremap_pte(addr); - if (pte_present(*pte)) { - phys = pte_val(*pte) & PAGE_MASK; - set_fixmap(idx, phys); - } - } } static void __init __early_set_fixmap(enum fixed_addresses idx, diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index e89d24815f26..84ba74820ad6 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -534,6 +534,36 @@ out_unlock: return 0; } +static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr, + int primary) +{ + /* + * Ignore all non primary paths. + */ + if (!primary) + return 0; + + /* + * Ignore the NULL PTE for kernel identity mapping, as it is expected + * to have holes. + * Also set numpages to '1' indicating that we processed cpa req for + * one virtual address page and its pfn. TBD: numpages can be set based + * on the initial value and the level returned by lookup_address(). + */ + if (within(vaddr, PAGE_OFFSET, + PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) { + cpa->numpages = 1; + cpa->pfn = __pa(vaddr) >> PAGE_SHIFT; + return 0; + } else { + WARN(1, KERN_WARNING "CPA: called for zero pte. " + "vaddr = %lx cpa->vaddr = %lx\n", vaddr, + *cpa->vaddr); + + return -EFAULT; + } +} + static int __change_page_attr(struct cpa_data *cpa, int primary) { unsigned long address; @@ -549,17 +579,11 @@ static int __change_page_attr(struct cpa_data *cpa, int primary) repeat: kpte = lookup_address(address, &level); if (!kpte) - return 0; + return __cpa_process_fault(cpa, address, primary); old_pte = *kpte; - if (!pte_val(old_pte)) { - if (!primary) - return 0; - WARN(1, KERN_WARNING "CPA: called for zero pte. " - "vaddr = %lx cpa->vaddr = %lx\n", address, - *cpa->vaddr); - return -EINVAL; - } + if (!pte_val(old_pte)) + return __cpa_process_fault(cpa, address, primary); if (level == PG_LEVEL_4K) { pte_t new_pte; @@ -657,12 +681,7 @@ static int cpa_process_alias(struct cpa_data *cpa) vaddr = *cpa->vaddr; if (!(within(vaddr, PAGE_OFFSET, - PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT)) -#ifdef CONFIG_X86_64 - || within(vaddr, PAGE_OFFSET + (1UL<<32), - PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)) -#endif - )) { + PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) { alias_cpa = *cpa; temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 85cbd3cd3723..7b61036427df 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -333,11 +333,23 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, req_type & _PAGE_CACHE_MASK); } - is_range_ram = pagerange_is_ram(start, end); - if (is_range_ram == 1) - return reserve_ram_pages_type(start, end, req_type, new_type); - else if (is_range_ram < 0) - return -EINVAL; + if (new_type) + *new_type = actual_type; + + /* + * For legacy reasons, some parts of the physical address range in the + * legacy 1MB region is treated as non-RAM (even when listed as RAM in + * the e820 tables). So we will track the memory attributes of this + * legacy 1MB region using the linear memtype_list always. + */ + if (end >= ISA_END_ADDRESS) { + is_range_ram = pagerange_is_ram(start, end); + if (is_range_ram == 1) + return reserve_ram_pages_type(start, end, req_type, + new_type); + else if (is_range_ram < 0) + return -EINVAL; + } new = kmalloc(sizeof(struct memtype), GFP_KERNEL); if (!new) @@ -347,9 +359,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, new->end = end; new->type = actual_type; - if (new_type) - *new_type = actual_type; - spin_lock(&memtype_lock); if (cached_entry && start >= cached_start) @@ -437,11 +446,19 @@ int free_memtype(u64 start, u64 end) if (is_ISA_range(start, end - 1)) return 0; - is_range_ram = pagerange_is_ram(start, end); - if (is_range_ram == 1) - return free_ram_pages_type(start, end); - else if (is_range_ram < 0) - return -EINVAL; + /* + * For legacy reasons, some parts of the physical address range in the + * legacy 1MB region is treated as non-RAM (even when listed as RAM in + * the e820 tables). So we will track the memory attributes of this + * legacy 1MB region using the linear memtype_list always. + */ + if (end >= ISA_END_ADDRESS) { + is_range_ram = pagerange_is_ram(start, end); + if (is_range_ram == 1) + return free_ram_pages_type(start, end); + else if (is_range_ram < 0) + return -EINVAL; + } spin_lock(&memtype_lock); list_for_each_entry(entry, &memtype_list, nd) { @@ -601,12 +618,13 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot) * Reserved non RAM regions only and after successful reserve_memtype, * this func also keeps identity mapping (if any) in sync with this new prot. */ -static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t vma_prot) +static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, + int strict_prot) { int is_ram = 0; int id_sz, ret; unsigned long flags; - unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK); + unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK); is_ram = pagerange_is_ram(paddr, paddr + size); @@ -625,15 +643,24 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t vma_prot) return ret; if (flags != want_flags) { - free_memtype(paddr, paddr + size); - printk(KERN_ERR - "%s:%d map pfn expected mapping type %s for %Lx-%Lx, got %s\n", - current->comm, current->pid, - cattr_name(want_flags), - (unsigned long long)paddr, - (unsigned long long)(paddr + size), - cattr_name(flags)); - return -EINVAL; + if (strict_prot || !is_new_memtype_allowed(want_flags, flags)) { + free_memtype(paddr, paddr + size); + printk(KERN_ERR "%s:%d map pfn expected mapping type %s" + " for %Lx-%Lx, got %s\n", + current->comm, current->pid, + cattr_name(want_flags), + (unsigned long long)paddr, + (unsigned long long)(paddr + size), + cattr_name(flags)); + return -EINVAL; + } + /* + * We allow returning different type than the one requested in + * non strict case. + */ + *vma_prot = __pgprot((pgprot_val(*vma_prot) & + (~_PAGE_CACHE_MASK)) | + flags); } /* Need to keep identity mapping in sync */ @@ -689,6 +716,7 @@ int track_pfn_vma_copy(struct vm_area_struct *vma) unsigned long vma_start = vma->vm_start; unsigned long vma_end = vma->vm_end; unsigned long vma_size = vma_end - vma_start; + pgprot_t pgprot; if (!pat_enabled) return 0; @@ -702,7 +730,8 @@ int track_pfn_vma_copy(struct vm_area_struct *vma) WARN_ON_ONCE(1); return -EINVAL; } - return reserve_pfn_range(paddr, vma_size, __pgprot(prot)); + pgprot = __pgprot(prot); + return reserve_pfn_range(paddr, vma_size, &pgprot, 1); } /* reserve entire vma page by page, using pfn and prot from pte */ @@ -710,7 +739,8 @@ int track_pfn_vma_copy(struct vm_area_struct *vma) if (follow_phys(vma, vma_start + i, 0, &prot, &paddr)) continue; - retval = reserve_pfn_range(paddr, PAGE_SIZE, __pgprot(prot)); + pgprot = __pgprot(prot); + retval = reserve_pfn_range(paddr, PAGE_SIZE, &pgprot, 1); if (retval) goto cleanup_ret; } @@ -741,7 +771,7 @@ cleanup_ret: * Note that this function can be called with caller trying to map only a * subrange/page inside the vma. */ -int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot, +int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, unsigned long pfn, unsigned long size) { int retval = 0; @@ -758,14 +788,14 @@ int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot, if (is_linear_pfn_mapping(vma)) { /* reserve the whole chunk starting from vm_pgoff */ paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; - return reserve_pfn_range(paddr, vma_size, prot); + return reserve_pfn_range(paddr, vma_size, prot, 0); } /* reserve page by page using pfn and size */ base_paddr = (resource_size_t)pfn << PAGE_SHIFT; for (i = 0; i < size; i += PAGE_SIZE) { paddr = base_paddr + i; - retval = reserve_pfn_range(paddr, PAGE_SIZE, prot); + retval = reserve_pfn_range(paddr, PAGE_SIZE, prot, 0); if (retval) goto cleanup_ret; } diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index f884740da318..5ead808dd70c 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -314,17 +314,7 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, return retval; if (flags != new_flags) { - /* - * Do not fallback to certain memory types with certain - * requested type: - * - request is uncached, return cannot be write-back - * - request is uncached, return cannot be write-combine - * - request is write-combine, return cannot be write-back - */ - if ((flags == _PAGE_CACHE_UC_MINUS && - (new_flags == _PAGE_CACHE_WB)) || - (flags == _PAGE_CACHE_WC && - new_flags == _PAGE_CACHE_WB)) { + if (!is_new_memtype_allowed(flags, new_flags)) { free_memtype(addr, addr+len); return -EINVAL; } |