diff options
Diffstat (limited to 'arch/x86')
41 files changed, 457 insertions, 241 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4a88cf7695b4..6c70fed0f9a0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -21,7 +21,8 @@ config X86 select HAVE_IDE select HAVE_OPROFILE select HAVE_KPROBES - select HAVE_KVM + select HAVE_KRETPROBES + select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) config GENERIC_LOCKBREAK @@ -65,9 +66,6 @@ config MMU config ZONE_DMA def_bool y -config QUICKLIST - def_bool X86_32 - config SBUS bool @@ -1261,7 +1259,7 @@ menuconfig APM machines with more than one CPU. In order to use APM, you will need supporting software. For location - and more information, read <file:Documentation/pm.txt> and the + and more information, read <file:Documentation/power/pm.txt> and the Battery Powered Linux mini-HOWTO, available from <http://www.tldp.org/docs.html#howto>. diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 6d50064db182..9304bfba7d45 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -388,7 +388,7 @@ config X86_OOSTORE # config X86_P6_NOP def_bool y - depends on (X86_64 || !X86_GENERIC) && (M686 || MPENTIUMII || MPENTIUMIII || MPENTIUMM || MCORE2 || PENTIUM4) + depends on (X86_64 || !X86_GENERIC) && (M686 || MPENTIUMII || MPENTIUMIII || MPENTIUMM || MCORE2 || MPENTIUM4) config X86_TSC def_bool y diff --git a/arch/x86/boot/vesa.h b/arch/x86/boot/vesa.h index ff5b73cd406f..468e444622c5 100644 --- a/arch/x86/boot/vesa.h +++ b/arch/x86/boot/vesa.h @@ -26,17 +26,10 @@ struct vesa_general_info { far_ptr video_mode_ptr; /* 14 */ u16 total_memory; /* 18 */ - u16 oem_software_rev; /* 20 */ - far_ptr oem_vendor_name_ptr; /* 22 */ - far_ptr oem_product_name_ptr; /* 26 */ - far_ptr oem_product_rev_ptr; /* 30 */ - - u8 reserved[222]; /* 34 */ - u8 oem_data[256]; /* 256 */ + u8 reserved[236]; /* 20 */ } __attribute__ ((packed)); #define VESA_MAGIC ('V' + ('E' << 8) + ('S' << 16) + ('A' << 24)) -#define VBE2_MAGIC ('V' + ('B' << 8) + ('E' << 16) + ('2' << 24)) struct vesa_mode_info { u16 mode_attr; /* 0 */ diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c index 662dd2f13068..419b5c273374 100644 --- a/arch/x86/boot/video-vesa.c +++ b/arch/x86/boot/video-vesa.c @@ -37,8 +37,6 @@ static int vesa_probe(void) video_vesa.modes = GET_HEAP(struct mode_info, 0); - vginfo.signature = VBE2_MAGIC; - ax = 0x4f00; di = (size_t)&vginfo; asm(INT10 diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 1c0503bdfb1a..5e7771a3ba2f 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -500,7 +500,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, regs->ss = __USER32_DS; set_fs(USER_DS); - regs->flags &= ~X86_EFLAGS_TF; + regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF); if (test_thread_flag(TIF_SINGLESTEP)) ptrace_notify(SIGTRAP); @@ -600,7 +600,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, regs->ss = __USER32_DS; set_fs(USER_DS); - regs->flags &= ~X86_EFLAGS_TF; + regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF); if (test_thread_flag(TIF_SINGLESTEP)) ptrace_notify(SIGTRAP); diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 608152a2a05e..00df126169b4 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -18,6 +18,7 @@ #include <linux/pci.h> #include <linux/bitops.h> #include <linux/ioport.h> +#include <linux/suspend.h> #include <asm/e820.h> #include <asm/io.h> #include <asm/gart.h> @@ -76,6 +77,8 @@ static u32 __init allocate_aperture(void) printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n", aper_size >> 10, __pa(p)); insert_aperture_resource((u32)__pa(p), aper_size); + register_nosave_region((u32)__pa(p) >> PAGE_SHIFT, + (u32)__pa(p+aper_size) >> PAGE_SHIFT); return (u32)__pa(p); } diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c index 39f8cb18296c..c2f930d86640 100644 --- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c +++ b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c @@ -55,7 +55,6 @@ static int eps_set_state(struct eps_cpu_data *centaur, { struct cpufreq_freqs freqs; u32 lo, hi; - u8 current_multiplier, current_voltage; int err = 0; int i; @@ -95,6 +94,10 @@ postchange: rdmsr(MSR_IA32_PERF_STATUS, lo, hi); freqs.new = centaur->fsb * ((lo >> 8) & 0xff); +#ifdef DEBUG + { + u8 current_multiplier, current_voltage; + /* Print voltage and multiplier */ rdmsr(MSR_IA32_PERF_STATUS, lo, hi); current_voltage = lo & 0xff; @@ -103,7 +106,8 @@ postchange: current_multiplier = (lo >> 8) & 0xff; printk(KERN_INFO "eps: Current multiplier = %d\n", current_multiplier); - + } +#endif cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); return err; } diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c index f2b5a621d27b..8a85c93bd62a 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c @@ -63,7 +63,7 @@ static struct cpufreq_frequency_table speedstep_freqs[] = { */ static int speedstep_smi_ownership (void) { - u32 command, result, magic; + u32 command, result, magic, dummy; u32 function = GET_SPEEDSTEP_OWNER; unsigned char magic_data[] = "Copyright (c) 1999 Intel Corporation"; @@ -73,8 +73,11 @@ static int speedstep_smi_ownership (void) dprintk("trying to obtain ownership with command %x at port %x\n", command, smi_port); __asm__ __volatile__( + "push %%ebp\n" "out %%al, (%%dx)\n" - : "=D" (result) + "pop %%ebp\n" + : "=D" (result), "=a" (dummy), "=b" (dummy), "=c" (dummy), "=d" (dummy), + "=S" (dummy) : "a" (command), "b" (function), "c" (0), "d" (smi_port), "D" (0), "S" (magic) : "memory" @@ -96,7 +99,7 @@ static int speedstep_smi_ownership (void) */ static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high) { - u32 command, result = 0, edi, high_mhz, low_mhz; + u32 command, result = 0, edi, high_mhz, low_mhz, dummy; u32 state=0; u32 function = GET_SPEEDSTEP_FREQS; @@ -109,10 +112,12 @@ static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high) dprintk("trying to determine frequencies with command %x at port %x\n", command, smi_port); - __asm__ __volatile__("movl $0, %%edi\n" + __asm__ __volatile__( + "push %%ebp\n" "out %%al, (%%dx)\n" - : "=a" (result), "=b" (high_mhz), "=c" (low_mhz), "=d" (state), "=D" (edi) - : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0) + "pop %%ebp" + : "=a" (result), "=b" (high_mhz), "=c" (low_mhz), "=d" (state), "=D" (edi), "=S" (dummy) + : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0), "D" (0) ); dprintk("result %x, low_freq %u, high_freq %u\n", result, low_mhz, high_mhz); @@ -135,16 +140,18 @@ static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high) static int speedstep_get_state (void) { u32 function=GET_SPEEDSTEP_STATE; - u32 result, state, edi, command; + u32 result, state, edi, command, dummy; command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); dprintk("trying to determine current setting with command %x at port %x\n", command, smi_port); - __asm__ __volatile__("movl $0, %%edi\n" + __asm__ __volatile__( + "push %%ebp\n" "out %%al, (%%dx)\n" - : "=a" (result), "=b" (state), "=D" (edi) - : "a" (command), "b" (function), "c" (0), "d" (smi_port), "S" (0) + "pop %%ebp\n" + : "=a" (result), "=b" (state), "=D" (edi), "=c" (dummy), "=d" (dummy), "=S" (dummy) + : "a" (command), "b" (function), "c" (0), "d" (smi_port), "S" (0), "D" (0) ); dprintk("state is %x, result is %x\n", state, result); @@ -160,7 +167,7 @@ static int speedstep_get_state (void) */ static void speedstep_set_state (unsigned int state) { - unsigned int result = 0, command, new_state; + unsigned int result = 0, command, new_state, dummy; unsigned long flags; unsigned int function=SET_SPEEDSTEP_STATE; unsigned int retry = 0; @@ -182,10 +189,12 @@ static void speedstep_set_state (unsigned int state) } retry++; __asm__ __volatile__( - "movl $0, %%edi\n" + "push %%ebp\n" "out %%al, (%%dx)\n" - : "=b" (new_state), "=D" (result) - : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0) + "pop %%ebp" + : "=b" (new_state), "=D" (result), "=c" (dummy), "=a" (dummy), + "=d" (dummy), "=S" (dummy) + : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0), "D" (0) ); } while ((new_state != state) && (retry <= SMI_TRIES)); @@ -195,7 +204,7 @@ static void speedstep_set_state (unsigned int state) if (new_state == state) { dprintk("change to %u MHz succeeded after %u tries with result %u\n", (speedstep_freqs[new_state].frequency / 1000), retry, result); } else { - printk(KERN_ERR "cpufreq: change failed with new_state %u and result %u\n", new_state, result); + printk(KERN_ERR "cpufreq: change to state %u failed with new_state %u and result %u\n", state, new_state, result); } return; diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 103d61a59b19..3e18db4cefee 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -176,12 +176,13 @@ static inline void k8_enable_fixed_iorrs(void) } /** - * Checks and updates an fixed-range MTRR if it differs from the value it - * should have. If K8 extentions are wanted, update the K8 SYSCFG MSR also. - * see AMD publication no. 24593, chapter 7.8.1, page 233 for more information - * \param msr MSR address of the MTTR which should be checked and updated - * \param changed pointer which indicates whether the MTRR needed to be changed - * \param msrwords pointer to the MSR values which the MSR should have + * set_fixed_range - checks & updates a fixed-range MTRR if it differs from the value it should have + * @msr: MSR address of the MTTR which should be checked and updated + * @changed: pointer which indicates whether the MTRR needed to be changed + * @msrwords: pointer to the MSR values which the MSR should have + * + * If K8 extentions are wanted, update the K8 SYSCFG MSR also. + * See AMD publication no. 24593, chapter 7.8.1, page 233 for more information. */ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords) { @@ -199,12 +200,15 @@ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords) } } +/** + * generic_get_free_region - Get a free MTRR. + * @base: The starting (base) address of the region. + * @size: The size (in bytes) of the region. + * @replace_reg: mtrr index to be replaced; set to invalid value if none. + * + * Returns: The index of the region on success, else negative on error. + */ int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg) -/* [SUMMARY] Get a free MTRR. - <base> The starting (base) address of the region. - <size> The size (in bytes) of the region. - [RETURNS] The index of the region on success, else -1 on error. -*/ { int i, max; mtrr_type ltype; @@ -249,8 +253,8 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, } /** - * Checks and updates the fixed-range MTRRs if they differ from the saved set - * \param frs pointer to fixed-range MTRR values, saved by get_fixed_ranges() + * set_fixed_ranges - checks & updates the fixed-range MTRRs if they differ from the saved set + * @frs: pointer to fixed-range MTRR values, saved by get_fixed_ranges() */ static int set_fixed_ranges(mtrr_type * frs) { @@ -294,13 +298,13 @@ static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) static u32 deftype_lo, deftype_hi; +/** + * set_mtrr_state - Set the MTRR state for this CPU. + * + * NOTE: The CPU must already be in a safe state for MTRR changes. + * RETURNS: 0 if no changes made, else a mask indicating what was changed. + */ static unsigned long set_mtrr_state(void) -/* [SUMMARY] Set the MTRR state for this CPU. - <state> The MTRR state information to read. - <ctxt> Some relevant CPU context. - [NOTE] The CPU must already be in a safe state for MTRR changes. - [RETURNS] 0 if no changes made, else a mask indication what was changed. -*/ { unsigned int i; unsigned long change_mask = 0; diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index be83336fddba..a6450b3ae759 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -711,7 +711,8 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) trim_size = end_pfn; trim_size <<= PAGE_SHIFT; trim_size -= trim_start; - add_memory_region(trim_start, trim_size, E820_RESERVED); + update_memory_range(trim_start, trim_size, E820_RAM, + E820_RESERVED); update_e820(); return 1; } diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c index 4e16ef4a2659..80444c5c9b14 100644 --- a/arch/x86/kernel/e820_32.c +++ b/arch/x86/kernel/e820_32.c @@ -749,6 +749,32 @@ static int __init parse_memmap(char *arg) return 0; } early_param("memmap", parse_memmap); +void __init update_memory_range(u64 start, u64 size, unsigned old_type, + unsigned new_type) +{ + int i; + + BUG_ON(old_type == new_type); + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + u64 final_start, final_end; + if (ei->type != old_type) + continue; + /* totally covered? */ + if (ei->addr >= start && ei->size <= size) { + ei->type = new_type; + continue; + } + /* partially covered */ + final_start = max(start, ei->addr); + final_end = min(start + size, ei->addr + ei->size); + if (final_start >= final_end) + continue; + add_memory_region(final_start, final_end - final_start, + new_type); + } +} void __init update_e820(void) { u8 nr_map; diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 9f65b4cc323c..9be697126013 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -744,6 +744,33 @@ void __init finish_e820_parsing(void) } } +void __init update_memory_range(u64 start, u64 size, unsigned old_type, + unsigned new_type) +{ + int i; + + BUG_ON(old_type == new_type); + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + u64 final_start, final_end; + if (ei->type != old_type) + continue; + /* totally covered? */ + if (ei->addr >= start && ei->size <= size) { + ei->type = new_type; + continue; + } + /* partially covered */ + final_start = max(start, ei->addr); + final_end = min(start + size, ei->addr + ei->size); + if (final_start >= final_end) + continue; + add_memory_region(final_start, final_end - final_start, + new_type); + } +} + void __init update_e820(void) { u8 nr_map; diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index fd8ca53943a8..74d87ea85b5c 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -657,7 +657,7 @@ int_msg: .asciz "Unknown interrupt or fault at EIP %p %p %p\n" fault_msg: - .ascii \ + .asciz \ /* fault info: */ "BUG: Int %d: CR2 %p\n" \ /* pusha regs: */ " EDI %p ESI %p EBP %p ESP %p\n" \ " EBX %p EDX %p ECX %p EAX %p\n" \ diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 763dfc407232..d2e39e69aaf8 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -132,7 +132,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, if (!cpu_has_fxsr) return -ENODEV; - unlazy_fpu(target); + init_fpu(target); return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.i387.fxsave, 0, -1); @@ -147,7 +147,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, if (!cpu_has_fxsr) return -ENODEV; - unlazy_fpu(target); + init_fpu(target); set_stopped_child_used_math(target); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, @@ -261,7 +261,7 @@ static void convert_from_fxsr(struct user_i387_ia32_struct *env, } #else env->fip = fxsave->fip; - env->fcs = fxsave->fcs; + env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16); env->foo = fxsave->foo; env->fos = fxsave->fos; #endif @@ -307,7 +307,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, if (!HAVE_HWFP) return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); - unlazy_fpu(target); + init_fpu(target); if (!cpu_has_fxsr) return user_regset_copyout(&pos, &count, &kbuf, &ubuf, @@ -332,7 +332,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, if (!HAVE_HWFP) return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); - unlazy_fpu(target); + init_fpu(target); set_stopped_child_used_math(target); if (!cpu_has_fxsr) diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c index c706a3061553..5921e5f0a640 100644 --- a/arch/x86/kernel/io_delay.c +++ b/arch/x86/kernel/io_delay.c @@ -78,6 +78,14 @@ static struct dmi_system_id __initdata io_delay_0xed_port_dmi_table[] = { }, { .callback = dmi_io_delay_0xed_port, + .ident = "HP Pavilion dv6000", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), + DMI_MATCH(DMI_BOARD_NAME, "30B8") + } + }, + { + .callback = dmi_io_delay_0xed_port, .ident = "HP Pavilion tx1000", .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c index 027fc067b399..b402c0f3f192 100644 --- a/arch/x86/kernel/mfgpt_32.c +++ b/arch/x86/kernel/mfgpt_32.c @@ -30,6 +30,7 @@ #include <linux/kernel.h> #include <linux/interrupt.h> +#include <linux/module.h> #include <asm/geode.h> static struct mfgpt_timer_t { diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index a82473d192a3..375cb2bc45be 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -53,11 +53,6 @@ dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) int node; node = dev_to_node(dev); - if (node == -1) - node = numa_node_id(); - - if (node < first_node(node_online_map)) - node = first_node(node_online_map); page = alloc_pages_node(node, gfp, order); return page ? page_address(page) : NULL; diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index f41fdc98efb1..d5904eef1d31 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -323,6 +323,16 @@ static int putreg(struct task_struct *child, return set_flags(child, value); #ifdef CONFIG_X86_64 + /* + * Orig_ax is really just a flag with small positive and + * negative values, so make sure to always sign-extend it + * from 32 bits so that it works correctly regardless of + * whether we come from a 32-bit environment or not. + */ + case offsetof(struct user_regs_struct, orig_ax): + value = (long) (s32) value; + break; + case offsetof(struct user_regs_struct,fs_base): if (value >= TASK_SIZE_OF(child)) return -EIO; @@ -1045,10 +1055,17 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value) R32(esi, si); R32(ebp, bp); R32(eax, ax); - R32(orig_eax, orig_ax); R32(eip, ip); R32(esp, sp); + case offsetof(struct user32, regs.orig_eax): + /* + * Sign-extend the value so that orig_eax = -1 + * causes (long)orig_ax < 0 tests to fire correctly. + */ + regs->orig_ax = (long) (s32) value; + break; + case offsetof(struct user32, regs.eflags): return set_flags(child, value); diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index c47208fc5932..d89a648fe710 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -363,6 +363,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0051, nvidia_force_enable_hpet); /* LPC bridges */ +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0260, + nvidia_force_enable_hpet); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0360, nvidia_force_enable_hpet); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0361, diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 7fd6ac43e4a1..484c4a80d38a 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -152,6 +152,24 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "0WF810"), }, }, + { /* Handle problems with rebooting on Dell Optiplex 745's DFF*/ + .callback = set_bios_reboot, + .ident = "Dell OptiPlex 745", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"), + DMI_MATCH(DMI_BOARD_NAME, "0MM599"), + }, + }, + { /* Handle problems with rebooting on Dell Optiplex 745 with 0KW626 */ + .callback = set_bios_reboot, + .ident = "Dell OptiPlex 745", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"), + DMI_MATCH(DMI_BOARD_NAME, "0KW626"), + }, + }, { /* Handle problems with rebooting on Dell 2400's */ .callback = set_bios_reboot, .ident = "Dell PowerEdge 2400", @@ -326,6 +344,10 @@ static inline void kb_wait(void) } } +void __attribute__((weak)) mach_reboot_fixups(void) +{ +} + static void native_machine_emergency_restart(void) { int i; @@ -337,6 +359,8 @@ static void native_machine_emergency_restart(void) /* Could also try the reset bit in the Hammer NB */ switch (reboot_type) { case BOOT_KBD: + mach_reboot_fixups(); /* for board specific fixups */ + for (i = 0; i < 10; i++) { kb_wait(); udelay(50); diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c index 309366f8f603..e24c45677094 100644 --- a/arch/x86/kernel/setup64.c +++ b/arch/x86/kernel/setup64.c @@ -142,14 +142,16 @@ void __init setup_per_cpu_areas(void) printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size); for_each_cpu_mask (i, cpu_possible_map) { char *ptr; +#ifndef CONFIG_NEED_MULTIPLE_NODES + ptr = alloc_bootmem_pages(size); +#else + int node = early_cpu_to_node(i); - if (!NODE_DATA(early_cpu_to_node(i))) { - printk("cpu with no node %d, num_online_nodes %d\n", - i, num_online_nodes()); + if (!node_online(node) || !NODE_DATA(node)) ptr = alloc_bootmem_pages(size); - } else { - ptr = alloc_bootmem_pages_node(NODE_DATA(early_cpu_to_node(i)), size); - } + else + ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); +#endif if (!ptr) panic("Cannot allocate cpu data for CPU %d\n", i); cpu_pda(i)->data_offset = ptr - __per_cpu_start; diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index a1d7071a51c9..2b3e5d45176b 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -406,8 +406,6 @@ static unsigned long __init setup_memory(void) */ min_low_pfn = PFN_UP(init_pg_tables_end); - find_max_pfn(); - max_low_pfn = find_max_low_pfn(); #ifdef CONFIG_HIGHMEM @@ -764,12 +762,13 @@ void __init setup_arch(char **cmdline_p) if (efi_enabled) efi_init(); - max_low_pfn = setup_memory(); - /* update e820 for memory not covered by WB MTRRs */ + find_max_pfn(); mtrr_bp_init(); if (mtrr_trim_uncached_memory(max_pfn)) - max_low_pfn = setup_memory(); + find_max_pfn(); + + max_low_pfn = setup_memory(); #ifdef CONFIG_VMI /* diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 7637dc91c79b..f4f7ecfb898c 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -801,7 +801,7 @@ static void __cpuinit srat_detect_node(void) /* Don't do the funky fallback heuristics the AMD version employs for now. */ node = apicid_to_node[apicid]; - if (node == NUMA_NO_NODE) + if (node == NUMA_NO_NODE || !node_online(node)) node = first_node(node_online_map); numa_set_node(cpu, node); diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index caee1f002fed..0157a6f0f41f 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -407,7 +407,7 @@ static int setup_frame(int sig, struct k_sigaction *ka, * The tracer may want to single-step inside the * handler too. */ - regs->flags &= ~TF_MASK; + regs->flags &= ~(TF_MASK | X86_EFLAGS_DF); if (test_thread_flag(TIF_SINGLESTEP)) ptrace_notify(SIGTRAP); @@ -500,7 +500,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, * The tracer may want to single-step inside the * handler too. */ - regs->flags &= ~TF_MASK; + regs->flags &= ~(TF_MASK | X86_EFLAGS_DF); if (test_thread_flag(TIF_SINGLESTEP)) ptrace_notify(SIGTRAP); diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 7347bb14e306..1c83e5124c65 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -295,7 +295,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, see include/asm-x86_64/uaccess.h for details. */ set_fs(USER_DS); - regs->flags &= ~X86_EFLAGS_TF; + regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF); if (test_thread_flag(TIF_SINGLESTEP)) ptrace_notify(SIGTRAP); #ifdef DEBUG_SIG @@ -311,6 +311,35 @@ give_sigsegv: } /* + * Return -1L or the syscall number that @regs is executing. + */ +static long current_syscall(struct pt_regs *regs) +{ + /* + * We always sign-extend a -1 value being set here, + * so this is always either -1L or a syscall number. + */ + return regs->orig_ax; +} + +/* + * Return a value that is -EFOO if the system call in @regs->orig_ax + * returned an error. This only works for @regs from @current. + */ +static long current_syscall_ret(struct pt_regs *regs) +{ +#ifdef CONFIG_IA32_EMULATION + if (test_thread_flag(TIF_IA32)) + /* + * Sign-extend the value so (int)-EFOO becomes (long)-EFOO + * and will match correctly in comparisons. + */ + return (int) regs->ax; +#endif + return regs->ax; +} + +/* * OK, we're invoking a handler */ @@ -327,9 +356,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, #endif /* Are we from a system call? */ - if ((long)regs->orig_ax >= 0) { + if (current_syscall(regs) >= 0) { /* If so, check system call restarting.. */ - switch (regs->ax) { + switch (current_syscall_ret(regs)) { case -ERESTART_RESTARTBLOCK: case -ERESTARTNOHAND: regs->ax = -EINTR; @@ -426,10 +455,9 @@ static void do_signal(struct pt_regs *regs) } /* Did we come from a system call? */ - if ((long)regs->orig_ax >= 0) { + if (current_syscall(regs) >= 0) { /* Restart the system call - no handlers present */ - long res = regs->ax; - switch (res) { + switch (current_syscall_ret(regs)) { case -ERESTARTNOHAND: case -ERESTARTSYS: case -ERESTARTNOINTR: diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 2ef1a5f8d675..9d406cdc847f 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -166,7 +166,7 @@ static void enable_step(struct task_struct *child, bool block) child->thread.debugctlmsr | DEBUGCTLMSR_BTF); } else { write_debugctlmsr(child, - child->thread.debugctlmsr & ~TIF_DEBUGCTLMSR); + child->thread.debugctlmsr & ~DEBUGCTLMSR_BTF); if (!child->thread.debugctlmsr) clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); @@ -189,7 +189,7 @@ void user_disable_single_step(struct task_struct *child) * Make sure block stepping (BTF) is disabled. */ write_debugctlmsr(child, - child->thread.debugctlmsr & ~TIF_DEBUGCTLMSR); + child->thread.debugctlmsr & ~DEBUGCTLMSR_BTF); if (!child->thread.debugctlmsr) clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 2cbee9479ce4..68a6b1511934 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -647,6 +647,10 @@ static void start_apic_timer(struct kvm_lapic *apic) apic->timer.period = apic_get_reg(apic, APIC_TMICT) * APIC_BUS_CYCLE_NS * apic->timer.divide_count; atomic_set(&apic->timer.pending, 0); + + if (!apic->timer.period) + return; + hrtimer_start(&apic->timer.dev, ktime_add_ns(now, apic->timer.period), HRTIMER_MODE_ABS); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 8efdcdbebb03..e55af12e11b7 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -222,8 +222,7 @@ static int is_io_pte(unsigned long pte) static int is_rmap_pte(u64 pte) { - return pte != shadow_trap_nonpresent_pte - && pte != shadow_notrap_nonpresent_pte; + return is_shadow_present_pte(pte); } static gfn_t pse36_gfn_delta(u32 gpte) @@ -681,8 +680,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, unsigned level, int metaphysical, unsigned access, - u64 *parent_pte, - bool *new_page) + u64 *parent_pte) { union kvm_mmu_page_role role; unsigned index; @@ -722,8 +720,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, vcpu->arch.mmu.prefetch_page(vcpu, sp); if (!metaphysical) rmap_write_protect(vcpu->kvm, gfn); - if (new_page) - *new_page = 1; return sp; } @@ -876,11 +872,18 @@ static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) { + struct page *page; + gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); if (gpa == UNMAPPED_GVA) return NULL; - return gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); + + down_read(¤t->mm->mmap_sem); + page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); + up_read(¤t->mm->mmap_sem); + + return page; } static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, @@ -889,14 +892,25 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, int *ptwrite, gfn_t gfn, struct page *page) { u64 spte; - int was_rmapped = is_rmap_pte(*shadow_pte); + int was_rmapped = 0; int was_writeble = is_writeble_pte(*shadow_pte); + hfn_t host_pfn = (*shadow_pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; pgprintk("%s: spte %llx access %x write_fault %d" " user_fault %d gfn %lx\n", __FUNCTION__, *shadow_pte, pt_access, write_fault, user_fault, gfn); + if (is_rmap_pte(*shadow_pte)) { + if (host_pfn != page_to_pfn(page)) { + pgprintk("hfn old %lx new %lx\n", + host_pfn, page_to_pfn(page)); + rmap_remove(vcpu->kvm, shadow_pte); + } + else + was_rmapped = 1; + } + /* * We don't set the accessed bit, since we sometimes want to see * whether the guest actually used the pte (in order to detect @@ -999,8 +1013,7 @@ static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, >> PAGE_SHIFT; new_table = kvm_mmu_get_page(vcpu, pseudo_gfn, v, level - 1, - 1, ACC_ALL, &table[index], - NULL); + 1, ACC_ALL, &table[index]); if (!new_table) { pgprintk("nonpaging_map: ENOMEM\n"); kvm_release_page_clean(page); @@ -1020,15 +1033,18 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) struct page *page; + down_read(&vcpu->kvm->slots_lock); + down_read(¤t->mm->mmap_sem); page = gfn_to_page(vcpu->kvm, gfn); + up_read(¤t->mm->mmap_sem); spin_lock(&vcpu->kvm->mmu_lock); kvm_mmu_free_some_pages(vcpu); r = __nonpaging_map(vcpu, v, write, gfn, page); spin_unlock(&vcpu->kvm->mmu_lock); - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); return r; } @@ -1090,7 +1106,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) ASSERT(!VALID_PAGE(root)); sp = kvm_mmu_get_page(vcpu, root_gfn, 0, - PT64_ROOT_LEVEL, 0, ACC_ALL, NULL, NULL); + PT64_ROOT_LEVEL, 0, ACC_ALL, NULL); root = __pa(sp->spt); ++sp->root_count; vcpu->arch.mmu.root_hpa = root; @@ -1111,7 +1127,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) root_gfn = 0; sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL, !is_paging(vcpu), - ACC_ALL, NULL, NULL); + ACC_ALL, NULL); root = __pa(sp->spt); ++sp->root_count; vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; @@ -1172,7 +1188,7 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) static void paging_new_cr3(struct kvm_vcpu *vcpu) { - pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->cr3); + pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->arch.cr3); mmu_free_roots(vcpu); } @@ -1362,6 +1378,7 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, gfn_t gfn; int r; u64 gpte = 0; + struct page *page; if (bytes != 4 && bytes != 8) return; @@ -1389,8 +1406,13 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, if (!is_present_pte(gpte)) return; gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; + + down_read(¤t->mm->mmap_sem); + page = gfn_to_page(vcpu->kvm, gfn); + up_read(¤t->mm->mmap_sem); + vcpu->arch.update_pte.gfn = gfn; - vcpu->arch.update_pte.page = gfn_to_page(vcpu->kvm, gfn); + vcpu->arch.update_pte.page = page; } void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, @@ -1496,9 +1518,9 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) gpa_t gpa; int r; - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); spin_lock(&vcpu->kvm->mmu_lock); r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 03ba8608fe0f..ecc0856268c4 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -91,7 +91,10 @@ static bool FNAME(cmpxchg_gpte)(struct kvm *kvm, pt_element_t *table; struct page *page; + down_read(¤t->mm->mmap_sem); page = gfn_to_page(kvm, table_gfn); + up_read(¤t->mm->mmap_sem); + table = kmap_atomic(page, KM_USER0); ret = CMPXCHG(&table[index], orig_pte, new_pte); @@ -140,7 +143,7 @@ walk: } #endif ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || - (vcpu->cr3 & CR3_NONPAE_RESERVED_BITS) == 0); + (vcpu->arch.cr3 & CR3_NONPAE_RESERVED_BITS) == 0); pt_access = ACC_ALL; @@ -297,7 +300,6 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, u64 shadow_pte; int metaphysical; gfn_t table_gfn; - bool new_page = 0; shadow_ent = ((u64 *)__va(shadow_addr)) + index; if (level == PT_PAGE_TABLE_LEVEL) @@ -319,8 +321,8 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, } shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, metaphysical, access, - shadow_ent, &new_page); - if (new_page && !metaphysical) { + shadow_ent); + if (!metaphysical) { int r; pt_element_t curr_pte; r = kvm_read_guest_atomic(vcpu->kvm, @@ -378,7 +380,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, if (r) return r; - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); /* * Look up the shadow pte for the faulting address. */ @@ -392,11 +394,13 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, pgprintk("%s: guest page fault\n", __FUNCTION__); inject_page_fault(vcpu, addr, walker.error_code); vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); return 0; } + down_read(¤t->mm->mmap_sem); page = gfn_to_page(vcpu->kvm, walker.gfn); + up_read(¤t->mm->mmap_sem); spin_lock(&vcpu->kvm->mmu_lock); kvm_mmu_free_some_pages(vcpu); @@ -413,14 +417,14 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, */ if (shadow_pte && is_io_pte(*shadow_pte)) { spin_unlock(&vcpu->kvm->mmu_lock); - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); return 1; } ++vcpu->stat.pf_fixed; kvm_mmu_audit(vcpu, "post page fault (fixed)"); spin_unlock(&vcpu->kvm->mmu_lock); - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); return write_pt; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index de755cb1431d..1a582f1090e8 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -792,6 +792,10 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) vcpu->arch.cr0 = cr0; cr0 |= X86_CR0_PG | X86_CR0_WP; cr0 &= ~(X86_CR0_CD | X86_CR0_NW); + if (!vcpu->fpu_active) { + svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR); + cr0 |= X86_CR0_TS; + } svm->vmcb->save.cr0 = cr0; } @@ -1096,6 +1100,24 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) case MSR_IA32_SYSENTER_ESP: *data = svm->vmcb->save.sysenter_esp; break; + /* Nobody will change the following 5 values in the VMCB so + we can safely return them on rdmsr. They will always be 0 + until LBRV is implemented. */ + case MSR_IA32_DEBUGCTLMSR: + *data = svm->vmcb->save.dbgctl; + break; + case MSR_IA32_LASTBRANCHFROMIP: + *data = svm->vmcb->save.br_from; + break; + case MSR_IA32_LASTBRANCHTOIP: + *data = svm->vmcb->save.br_to; + break; + case MSR_IA32_LASTINTFROMIP: + *data = svm->vmcb->save.last_excp_from; + break; + case MSR_IA32_LASTINTTOIP: + *data = svm->vmcb->save.last_excp_to; + break; default: return kvm_get_msr_common(vcpu, ecx, data); } @@ -1156,6 +1178,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) case MSR_IA32_SYSENTER_ESP: svm->vmcb->save.sysenter_esp = data; break; + case MSR_IA32_DEBUGCTLMSR: + pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n", + __FUNCTION__, data); + break; case MSR_K7_EVNTSEL0: case MSR_K7_EVNTSEL1: case MSR_K7_EVNTSEL2: diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ad36447e696e..8e1462880d1f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -349,8 +349,6 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) static void reload_tss(void) { -#ifndef CONFIG_X86_64 - /* * VT restores TR but not its size. Useless. */ @@ -361,7 +359,6 @@ static void reload_tss(void) descs = (void *)gdt.base; descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ load_TR_desc(); -#endif } static void load_transition_efer(struct vcpu_vmx *vmx) @@ -638,6 +635,7 @@ static void setup_msrs(struct vcpu_vmx *vmx) { int save_nmsrs; + vmx_load_host_state(vmx); save_nmsrs = 0; #ifdef CONFIG_X86_64 if (is_long_mode(&vmx->vcpu)) { @@ -1435,7 +1433,7 @@ static int init_rmode_tss(struct kvm *kvm) int ret = 0; int r; - down_read(¤t->mm->mmap_sem); + down_read(&kvm->slots_lock); r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); if (r < 0) goto out; @@ -1458,7 +1456,7 @@ static int init_rmode_tss(struct kvm *kvm) ret = 1; out: - up_read(¤t->mm->mmap_sem); + up_read(&kvm->slots_lock); return ret; } @@ -1477,7 +1475,7 @@ static int alloc_apic_access_page(struct kvm *kvm) struct kvm_userspace_memory_region kvm_userspace_mem; int r = 0; - down_write(¤t->mm->mmap_sem); + down_write(&kvm->slots_lock); if (kvm->arch.apic_access_page) goto out; kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; @@ -1487,9 +1485,12 @@ static int alloc_apic_access_page(struct kvm *kvm) r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0); if (r) goto out; + + down_read(¤t->mm->mmap_sem); kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00); + up_read(¤t->mm->mmap_sem); out: - up_write(¤t->mm->mmap_sem); + up_write(&kvm->slots_lock); return r; } @@ -1602,9 +1603,6 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK); - if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) - if (alloc_apic_access_page(vmx->vcpu.kvm) != 0) - return -ENOMEM; return 0; } @@ -2534,6 +2532,9 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) put_cpu(); if (err) goto free_vmcs; + if (vm_need_virtualize_apic_accesses(kvm)) + if (alloc_apic_access_page(kvm) != 0) + goto free_vmcs; return &vmx->vcpu; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cf5308148689..6b01552bd1f1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -46,6 +46,9 @@ #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU +static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 __user *entries); + struct kvm_x86_ops *kvm_x86_ops; struct kvm_stats_debugfs_item debugfs_entries[] = { @@ -181,7 +184,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) int ret; u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)]; - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte, offset * sizeof(u64), sizeof(pdpte)); if (ret < 0) { @@ -198,7 +201,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs)); out: - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); return ret; } @@ -212,13 +215,13 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu) if (is_long_mode(vcpu) || !is_pae(vcpu)) return false; - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte)); if (r < 0) goto out; changed = memcmp(pdpte, vcpu->arch.pdptrs, sizeof(pdpte)) != 0; out: - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); return changed; } @@ -356,7 +359,7 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) */ } - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); /* * Does the new cr3 value map to physical memory? (Note, we * catch an invalid cr3 even in real-mode, because it would @@ -372,7 +375,7 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) vcpu->arch.cr3 = cr3; vcpu->arch.mmu.new_cr3(vcpu); } - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); } EXPORT_SYMBOL_GPL(set_cr3); @@ -484,6 +487,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n", __FUNCTION__, data); break; + case MSR_IA32_MCG_CTL: + pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n", + __FUNCTION__, data); + break; case MSR_IA32_UCODE_REV: case MSR_IA32_UCODE_WRITE: case 0x200 ... 0x2ff: /* MTRRs */ @@ -526,6 +533,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case MSR_IA32_MC0_CTL: case MSR_IA32_MCG_STATUS: case MSR_IA32_MCG_CAP: + case MSR_IA32_MCG_CTL: case MSR_IA32_MC0_MISC: case MSR_IA32_MC0_MISC+4: case MSR_IA32_MC0_MISC+8: @@ -727,6 +735,24 @@ long kvm_arch_dev_ioctl(struct file *filp, r = 0; break; } + case KVM_GET_SUPPORTED_CPUID: { + struct kvm_cpuid2 __user *cpuid_arg = argp; + struct kvm_cpuid2 cpuid; + + r = -EFAULT; + if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) + goto out; + r = kvm_dev_ioctl_get_supported_cpuid(&cpuid, + cpuid_arg->entries); + if (r) + goto out; + + r = -EFAULT; + if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid)) + goto out; + r = 0; + break; + } default: r = -EINVAL; } @@ -974,8 +1000,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, put_cpu(); } -static int kvm_vm_ioctl_get_supported_cpuid(struct kvm *kvm, - struct kvm_cpuid2 *cpuid, +static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 __user *entries) { struct kvm_cpuid_entry2 *cpuid_entries; @@ -1207,12 +1232,12 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES) return -EINVAL; - down_write(¤t->mm->mmap_sem); + down_write(&kvm->slots_lock); kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; - up_write(¤t->mm->mmap_sem); + up_write(&kvm->slots_lock); return 0; } @@ -1261,7 +1286,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, < alias->target_phys_addr) goto out; - down_write(¤t->mm->mmap_sem); + down_write(&kvm->slots_lock); p = &kvm->arch.aliases[alias->slot]; p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; @@ -1275,7 +1300,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, kvm_mmu_zap_all(kvm); - up_write(¤t->mm->mmap_sem); + up_write(&kvm->slots_lock); return 0; @@ -1351,7 +1376,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot; int is_dirty = 0; - down_write(¤t->mm->mmap_sem); + down_write(&kvm->slots_lock); r = kvm_get_dirty_log(kvm, log, &is_dirty); if (r) @@ -1367,7 +1392,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, } r = 0; out: - up_write(¤t->mm->mmap_sem); + up_write(&kvm->slots_lock); return r; } @@ -1487,24 +1512,6 @@ long kvm_arch_vm_ioctl(struct file *filp, r = 0; break; } - case KVM_GET_SUPPORTED_CPUID: { - struct kvm_cpuid2 __user *cpuid_arg = argp; - struct kvm_cpuid2 cpuid; - - r = -EFAULT; - if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) - goto out; - r = kvm_vm_ioctl_get_supported_cpuid(kvm, &cpuid, - cpuid_arg->entries); - if (r) - goto out; - - r = -EFAULT; - if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid)) - goto out; - r = 0; - break; - } default: ; } @@ -1563,7 +1570,7 @@ int emulator_read_std(unsigned long addr, void *data = val; int r = X86EMUL_CONTINUE; - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); while (bytes) { gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); unsigned offset = addr & (PAGE_SIZE-1); @@ -1585,7 +1592,7 @@ int emulator_read_std(unsigned long addr, addr += tocopy; } out: - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); return r; } EXPORT_SYMBOL_GPL(emulator_read_std); @@ -1604,9 +1611,9 @@ static int emulator_read_emulated(unsigned long addr, return X86EMUL_CONTINUE; } - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); /* For APIC access vmexit */ if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) @@ -1644,14 +1651,14 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, { int ret; - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); if (ret < 0) { - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); return 0; } kvm_mmu_pte_write(vcpu, gpa, val, bytes); - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); return 1; } @@ -1663,9 +1670,9 @@ static int emulator_write_emulated_onepage(unsigned long addr, struct kvm_io_device *mmio_dev; gpa_t gpa; - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); if (gpa == UNMAPPED_GVA) { kvm_inject_page_fault(vcpu, addr, 2); @@ -1742,7 +1749,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr, char *kaddr; u64 val; - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); if (gpa == UNMAPPED_GVA || @@ -1753,13 +1760,17 @@ static int emulator_cmpxchg_emulated(unsigned long addr, goto emul_write; val = *(u64 *)new; + + down_read(¤t->mm->mmap_sem); page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); + up_read(¤t->mm->mmap_sem); + kaddr = kmap_atomic(page, KM_USER0); set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val); kunmap_atomic(kaddr, KM_USER0); kvm_release_page_dirty(page); emul_write: - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); } #endif @@ -2152,10 +2163,10 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, kvm_x86_ops->skip_emulated_instruction(vcpu); for (i = 0; i < nr_pages; ++i) { - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); page = gva_to_page(vcpu, address + i * PAGE_SIZE); vcpu->arch.pio.guest_pages[i] = page; - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); if (!page) { kvm_inject_gp(vcpu, 0); free_pio_guest_pages(vcpu); @@ -2478,8 +2489,9 @@ static void vapic_enter(struct kvm_vcpu *vcpu) down_read(¤t->mm->mmap_sem); page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); - vcpu->arch.apic->vapic_page = page; up_read(¤t->mm->mmap_sem); + + vcpu->arch.apic->vapic_page = page; } static void vapic_exit(struct kvm_vcpu *vcpu) @@ -2861,8 +2873,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, kvm_x86_ops->decache_cr4_guest_bits(vcpu); mmu_reset_needed |= vcpu->arch.cr0 != sregs->cr0; - vcpu->arch.cr0 = sregs->cr0; kvm_x86_ops->set_cr0(vcpu, sregs->cr0); + vcpu->arch.cr0 = sregs->cr0; mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4; kvm_x86_ops->set_cr4(vcpu, sregs->cr4); @@ -2952,9 +2964,9 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, gpa_t gpa; vcpu_load(vcpu); - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, vaddr); - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); tr->physical_address = gpa; tr->valid = gpa != UNMAPPED_GVA; tr->writeable = 1; @@ -3227,11 +3239,13 @@ int kvm_arch_set_memory_region(struct kvm *kvm, */ if (!user_alloc) { if (npages && !old.rmap) { + down_write(¤t->mm->mmap_sem); memslot->userspace_addr = do_mmap(NULL, 0, npages * PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0); + up_write(¤t->mm->mmap_sem); if (IS_ERR((void *)memslot->userspace_addr)) return PTR_ERR((void *)memslot->userspace_addr); @@ -3239,8 +3253,10 @@ int kvm_arch_set_memory_region(struct kvm *kvm, if (!old.user_alloc && old.rmap) { int ret; + down_write(¤t->mm->mmap_sem); ret = do_munmap(current->mm, old.userspace_addr, old.npages * PAGE_SIZE); + up_write(¤t->mm->mmap_sem); if (ret < 0) printk(KERN_WARNING "kvm_vm_ioctl_set_memory_region: " diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index cccb38a59653..a104c532ff70 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -84,7 +84,6 @@ struct lguest_data lguest_data = { .blocked_interrupts = { 1 }, /* Block timer interrupts */ .syscall_vec = SYSCALL_VECTOR, }; -static cycle_t clock_base; /*G:037 async_hcall() is pretty simple: I'm quite proud of it really. We have a * ring buffer of stored hypercalls which the Host will run though next time we @@ -327,8 +326,8 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx, case 1: /* Basic feature request. */ /* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */ *cx &= 0x00002201; - /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, FPU. */ - *dx &= 0x07808101; + /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU. */ + *dx &= 0x07808111; /* The Host can do a nice optimization if it knows that the * kernel mappings (addresses above 0xC0000000 or whatever * PAGE_OFFSET is set to) haven't changed. But Linux calls @@ -481,7 +480,7 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) { *pmdp = pmdval; lazy_hcall(LHCALL_SET_PMD, __pa(pmdp)&PAGE_MASK, - (__pa(pmdp)&(PAGE_SIZE-1)), 0); + (__pa(pmdp)&(PAGE_SIZE-1))/4, 0); } /* There are a couple of legacy places where the kernel sets a PTE, but we @@ -595,19 +594,25 @@ static unsigned long lguest_get_wallclock(void) return lguest_data.time.tv_sec; } +/* The TSC is a Time Stamp Counter. The Host tells us what speed it runs at, + * or 0 if it's unusable as a reliable clock source. This matches what we want + * here: if we return 0 from this function, the x86 TSC clock will not register + * itself. */ +static unsigned long lguest_cpu_khz(void) +{ + return lguest_data.tsc_khz; +} + +/* If we can't use the TSC, the kernel falls back to our "lguest_clock", where + * we read the time value given to us by the Host. */ static cycle_t lguest_clock_read(void) { unsigned long sec, nsec; - /* If the Host tells the TSC speed, we can trust that. */ - if (lguest_data.tsc_khz) - return native_read_tsc(); - - /* If we can't use the TSC, we read the time value written by the Host. - * Since it's in two parts (seconds and nanoseconds), we risk reading - * it just as it's changing from 99 & 0.999999999 to 100 and 0, and - * getting 99 and 0. As Linux tends to come apart under the stress of - * time travel, we must be careful: */ + /* Since the time is in two parts (seconds and nanoseconds), we risk + * reading it just as it's changing from 99 & 0.999999999 to 100 and 0, + * and getting 99 and 0. As Linux tends to come apart under the stress + * of time travel, we must be careful: */ do { /* First we read the seconds part. */ sec = lguest_data.time.tv_sec; @@ -622,14 +627,14 @@ static cycle_t lguest_clock_read(void) /* Now if the seconds part has changed, try again. */ } while (unlikely(lguest_data.time.tv_sec != sec)); - /* Our non-TSC clock is in real nanoseconds. */ + /* Our lguest clock is in real nanoseconds. */ return sec*1000000000ULL + nsec; } -/* This is what we tell the kernel is our clocksource. */ +/* This is the fallback clocksource: lower priority than the TSC clocksource. */ static struct clocksource lguest_clock = { .name = "lguest", - .rating = 400, + .rating = 200, .read = lguest_clock_read, .mask = CLOCKSOURCE_MASK(64), .mult = 1 << 22, @@ -637,12 +642,6 @@ static struct clocksource lguest_clock = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; -/* The "scheduler clock" is just our real clock, adjusted to start at zero */ -static unsigned long long lguest_sched_clock(void) -{ - return cyc2ns(&lguest_clock, lguest_clock_read() - clock_base); -} - /* We also need a "struct clock_event_device": Linux asks us to set it to go * off some time in the future. Actually, James Morris figured all this out, I * just applied the patch. */ @@ -712,19 +711,8 @@ static void lguest_time_init(void) /* Set up the timer interrupt (0) to go to our simple timer routine */ set_irq_handler(0, lguest_time_irq); - /* Our clock structure looks like arch/x86/kernel/tsc_32.c if we can - * use the TSC, otherwise it's a dumb nanosecond-resolution clock. - * Either way, the "rating" is set so high that it's always chosen over - * any other clocksource. */ - if (lguest_data.tsc_khz) - lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz, - lguest_clock.shift); - clock_base = lguest_clock_read(); clocksource_register(&lguest_clock); - /* Now we've set up our clock, we can use it as the scheduler clock */ - pv_time_ops.sched_clock = lguest_sched_clock; - /* We can't set cpumask in the initializer: damn C limitations! Set it * here and register our timer device. */ lguest_clockevent.cpumask = cpumask_of_cpu(0); @@ -995,6 +983,7 @@ __init void lguest_init(void) /* time operations */ pv_time_ops.get_wallclock = lguest_get_wallclock; pv_time_ops.time_init = lguest_time_init; + pv_time_ops.get_cpu_khz = lguest_cpu_khz; /* Now is a good time to look at the implementations of these functions * before returning to the rest of lguest_init(). */ diff --git a/arch/x86/mach-visws/traps.c b/arch/x86/mach-visws/traps.c index 843b67acf43b..bfac6ba10f8a 100644 --- a/arch/x86/mach-visws/traps.c +++ b/arch/x86/mach-visws/traps.c @@ -46,8 +46,9 @@ static __init void cobalt_init(void) */ set_fixmap(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE); setup_local_APIC(); - printk(KERN_INFO "Local APIC Version %#lx, ID %#lx\n", - apic_read(APIC_LVR), apic_read(APIC_ID)); + printk(KERN_INFO "Local APIC Version %#x, ID %#x\n", + (unsigned int)apic_read(APIC_LVR), + (unsigned int)apic_read(APIC_ID)); set_fixmap(FIX_CO_CPU, CO_CPU_PHYS); set_fixmap(FIX_CO_APIC, CO_APIC_PHYS); diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index c394ca0720b8..8e25e06ff730 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c @@ -324,7 +324,6 @@ unsigned long __init setup_memory(void) * this space and use it to adjust the boundary between ZONE_NORMAL * and ZONE_HIGHMEM. */ - find_max_pfn(); get_memcfg_numa(); kva_pages = calculate_numa_remap_pages(); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index ac3c959e271d..794895c6dcc9 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -106,7 +106,7 @@ static int ioremap_change_attr(unsigned long vaddr, unsigned long size, * have to convert them into an offset in a page-aligned mapping, but the * caller shouldn't need to know that small detail. */ -static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size, +static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size, enum ioremap_mode mode) { unsigned long pfn, offset, last_addr, vaddr; @@ -134,12 +134,14 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size, return NULL; } - WARN_ON_ONCE(page_is_ram(pfn)); - switch (mode) { case IOR_MODE_UNCACHED: default: - prot = PAGE_KERNEL_NOCACHE; + /* + * FIXME: we will use UC MINUS for now, as video fb drivers + * depend on it. Upcoming ioremap_wc() will fix this behavior. + */ + prot = PAGE_KERNEL_UC_MINUS; break; case IOR_MODE_CACHED: prot = PAGE_KERNEL; @@ -195,13 +197,13 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size, * * Must be freed with iounmap. */ -void __iomem *ioremap_nocache(unsigned long phys_addr, unsigned long size) +void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size) { return __ioremap(phys_addr, size, IOR_MODE_UNCACHED); } EXPORT_SYMBOL(ioremap_nocache); -void __iomem *ioremap_cache(unsigned long phys_addr, unsigned long size) +void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size) { return __ioremap(phys_addr, size, IOR_MODE_CACHED); } diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 59898fb0a4aa..16b82ad34b96 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -221,8 +221,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, bootmap_pages<<PAGE_SHIFT, PAGE_SIZE); if (bootmap == NULL) { if (nodedata_phys < start || nodedata_phys >= end) - free_bootmem((unsigned long)node_data[nodeid], - pgdat_size); + free_bootmem(nodedata_phys, pgdat_size); node_data[nodeid] = NULL; return; } @@ -622,13 +621,17 @@ void __init init_cpu_to_node(void) int i; for (i = 0; i < NR_CPUS; i++) { + int node; u16 apicid = x86_cpu_to_apicid_init[i]; if (apicid == BAD_APICID) continue; - if (apicid_to_node[apicid] == NUMA_NO_NODE) + node = apicid_to_node[apicid]; + if (node == NUMA_NO_NODE) continue; - numa_set_node(i, apicid_to_node[apicid]); + if (!node_online(node)) + continue; + numa_set_node(i, node); } } diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 7049294fb469..7b79f6be4e7d 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -26,7 +26,6 @@ struct cpa_data { pgprot_t mask_set; pgprot_t mask_clr; int numpages; - int processed; int flushtlb; unsigned long pfn; }; @@ -291,8 +290,8 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, */ nextpage_addr = (address + psize) & pmask; numpages = (nextpage_addr - address) >> PAGE_SHIFT; - if (numpages < cpa->processed) - cpa->processed = numpages; + if (numpages < cpa->numpages) + cpa->numpages = numpages; /* * We are safe now. Check whether the new pgprot is the same: @@ -319,7 +318,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, */ addr = address + PAGE_SIZE; pfn++; - for (i = 1; i < cpa->processed; i++, addr += PAGE_SIZE, pfn++) { + for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE, pfn++) { pgprot_t chk_prot = static_protections(new_prot, addr, pfn); if (pgprot_val(chk_prot) != pgprot_val(new_prot)) @@ -343,7 +342,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, * that we limited the number of possible pages already to * the number of pages in the large page. */ - if (address == (nextpage_addr - psize) && cpa->processed == numpages) { + if (address == (nextpage_addr - psize) && cpa->numpages == numpages) { /* * The address is aligned and the number of pages * covers the full page. @@ -573,7 +572,7 @@ repeat: set_pte_atomic(kpte, new_pte); cpa->flushtlb = 1; } - cpa->processed = 1; + cpa->numpages = 1; return 0; } @@ -584,7 +583,7 @@ repeat: do_split = try_preserve_large_page(kpte, address, cpa); /* * When the range fits into the existing large page, - * return. cp->processed and cpa->tlbflush have been updated in + * return. cp->numpages and cpa->tlbflush have been updated in * try_large_page: */ if (do_split <= 0) @@ -663,7 +662,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) * Store the remaining nr of pages for the large page * preservation check. */ - cpa->numpages = cpa->processed = numpages; + cpa->numpages = numpages; ret = __change_page_attr(cpa, checkalias); if (ret) @@ -680,9 +679,9 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) * CPA operation. Either a large page has been * preserved or a single page update happened. */ - BUG_ON(cpa->processed > numpages); - numpages -= cpa->processed; - cpa->vaddr += cpa->processed * PAGE_SIZE; + BUG_ON(cpa->numpages > numpages); + numpages -= cpa->numpages; + cpa->vaddr += cpa->numpages * PAGE_SIZE; } return 0; } @@ -772,7 +771,7 @@ static inline int change_page_attr_clear(unsigned long addr, int numpages, int set_memory_uc(unsigned long addr, int numpages) { return change_page_attr_set(addr, numpages, - __pgprot(_PAGE_PCD | _PAGE_PWT)); + __pgprot(_PAGE_PCD)); } EXPORT_SYMBOL(set_memory_uc); diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 73aba7125203..2f9e9afcb9f4 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -342,12 +342,16 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) pgd_t *pgd_alloc(struct mm_struct *mm) { - pgd_t *pgd = quicklist_alloc(0, GFP_KERNEL, pgd_ctor); + pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); - mm->pgd = pgd; /* so that alloc_pd can use it */ + /* so that alloc_pd can use it */ + mm->pgd = pgd; + if (pgd) + pgd_ctor(pgd); if (pgd && !pgd_prepopulate_pmd(mm, pgd)) { - quicklist_free(0, pgd_dtor, pgd); + pgd_dtor(pgd); + free_page((unsigned long)pgd); pgd = NULL; } @@ -357,12 +361,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm) void pgd_free(struct mm_struct *mm, pgd_t *pgd) { pgd_mop_up_pmds(mm, pgd); - quicklist_free(0, pgd_dtor, pgd); -} - -void check_pgt_cache(void) -{ - quicklist_trim(0, pgd_dtor, 25, 16); + pgd_dtor(pgd); + free_page((unsigned long)pgd); } void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte) diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c index 10ac8c316c46..2f7109ac4c15 100644 --- a/arch/x86/pci/pcbios.c +++ b/arch/x86/pci/pcbios.c @@ -198,6 +198,11 @@ static int pci_bios_read(unsigned int seg, unsigned int bus, "b" (bx), "D" ((long)reg), "S" (&pci_indirect)); + /* + * Zero-extend the result beyond 8 bits, do not trust the + * BIOS having done it: + */ + *value &= 0xff; break; case 2: __asm__("lcall *(%%esi); cld\n\t" @@ -210,6 +215,11 @@ static int pci_bios_read(unsigned int seg, unsigned int bus, "b" (bx), "D" ((long)reg), "S" (&pci_indirect)); + /* + * Zero-extend the result beyond 16 bits, do not trust the + * BIOS having done it: + */ + *value &= 0xffff; break; case 4: __asm__("lcall *(%%esi); cld\n\t" diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 3bad4773a2f3..2341492bf7a0 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -38,7 +38,8 @@ char * __init xen_memory_setup(void) unsigned long max_pfn = xen_start_info->nr_pages; e820.nr_map = 0; - add_memory_region(0, PFN_PHYS(max_pfn), E820_RAM); + add_memory_region(0, LOWMEMSIZE(), E820_RAM); + add_memory_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM); return "Xen"; } |