diff options
Diffstat (limited to 'arch/x86')
30 files changed, 271 insertions, 142 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e8327686d3c5..e330da21b84f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -21,7 +21,7 @@ config X86 select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_IDE select HAVE_OPROFILE - select HAVE_PERF_EVENTS if (!M386 && !M486) + select HAVE_PERF_EVENTS select HAVE_IRQ_WORK select HAVE_IOREMAP_PROT select HAVE_KPROBES diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 4d293dced62f..9479a037419f 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -216,8 +216,8 @@ static inline unsigned long virt_to_fix(const unsigned long vaddr) } /* Return an pointer with offset calculated */ -static inline unsigned long __set_fixmap_offset(enum fixed_addresses idx, - phys_addr_t phys, pgprot_t flags) +static __always_inline unsigned long +__set_fixmap_offset(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) { __set_fixmap(idx, phys, flags); return fix_to_virt(idx) + (phys & (PAGE_SIZE - 1)); diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 3ea3dc487047..6b89f5e86021 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -128,7 +128,7 @@ #define FAM10H_MMIO_CONF_ENABLE (1<<0) #define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf #define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2 -#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffff +#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL #define FAM10H_MMIO_CONF_BASE_SHIFT 20 #define MSR_FAM10H_NODE_ID 0xc001100c diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 18e3b8a8709f..ef9975812c77 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -824,27 +824,27 @@ static __always_inline void arch_spin_unlock(struct arch_spinlock *lock) #define __PV_IS_CALLEE_SAVE(func) \ ((struct paravirt_callee_save) { func }) -static inline unsigned long arch_local_save_flags(void) +static inline notrace unsigned long arch_local_save_flags(void) { return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl); } -static inline void arch_local_irq_restore(unsigned long f) +static inline notrace void arch_local_irq_restore(unsigned long f) { PVOP_VCALLEE1(pv_irq_ops.restore_fl, f); } -static inline void arch_local_irq_disable(void) +static inline notrace void arch_local_irq_disable(void) { PVOP_VCALLEE0(pv_irq_ops.irq_disable); } -static inline void arch_local_irq_enable(void) +static inline notrace void arch_local_irq_enable(void) { PVOP_VCALLEE0(pv_irq_ops.irq_enable); } -static inline unsigned long arch_local_irq_save(void) +static inline notrace unsigned long arch_local_irq_save(void) { unsigned long f; diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 7f7e577a0e39..31d84acc1512 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -11,6 +11,7 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src); void pvclock_read_wallclock(struct pvclock_wall_clock *wall, struct pvclock_vcpu_time_info *vcpu, struct timespec *ts); +void pvclock_resume(void); /* * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index e969f691cbfd..a501741c2335 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -199,6 +199,8 @@ union uvh_apicid { #define UVH_APICID 0x002D0E00L #define UV_APIC_PNODE_SHIFT 6 +#define UV_APICID_HIBIT_MASK 0xffff0000 + /* Local Bus from cpu's perspective */ #define LOCAL_BUS_BASE 0x1c00000 #define LOCAL_BUS_SIZE (4 * 1024 * 1024) @@ -491,8 +493,10 @@ static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value) } } +extern unsigned int uv_apicid_hibits; static unsigned long uv_hub_ipi_value(int apicid, int vector, int mode) { + apicid |= uv_apicid_hibits; return (1UL << UVH_IPI_INT_SEND_SHFT) | ((apicid) << UVH_IPI_INT_APIC_ID_SHFT) | (mode << UVH_IPI_INT_DELIVERY_MODE_SHFT) | diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index 6d90adf4428a..20cafeac7455 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h @@ -5,7 +5,7 @@ * * SGI UV MMR definitions * - * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved. */ #ifndef _ASM_X86_UV_UV_MMRS_H @@ -754,6 +754,23 @@ union uvh_lb_bau_sb_descriptor_base_u { }; /* ========================================================================= */ +/* UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK */ +/* ========================================================================= */ +#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK 0x320130UL +#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_32 0x009f0 + +#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_SHFT 0 +#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_MASK 0x00000000ffffffffUL + +union uvh_lb_target_physical_apic_id_mask_u { + unsigned long v; + struct uvh_lb_target_physical_apic_id_mask_s { + unsigned long bit_enables : 32; /* RW */ + unsigned long rsvd_32_63 : 32; /* */ + } s; +}; + +/* ========================================================================= */ /* UVH_NODE_ID */ /* ========================================================================= */ #define UVH_NODE_ID 0x0UL diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index e8506c1f0c55..1c10c88ee4e1 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h @@ -61,9 +61,9 @@ DEFINE_GUEST_HANDLE(void); #define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) #endif -#ifndef machine_to_phys_mapping -#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) -#endif +#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) +#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) +#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>__MACH2PHYS_SHIFT) /* Maximum number of virtual CPUs in multi-processor guests. */ #define MAX_VIRT_CPUS 32 diff --git a/arch/x86/include/asm/xen/interface_32.h b/arch/x86/include/asm/xen/interface_32.h index 42a7e004ae5c..8413688b2571 100644 --- a/arch/x86/include/asm/xen/interface_32.h +++ b/arch/x86/include/asm/xen/interface_32.h @@ -32,6 +32,11 @@ /* And the trap vector is... */ #define TRAP_INSTR "int $0x82" +#define __MACH2PHYS_VIRT_START 0xF5800000 +#define __MACH2PHYS_VIRT_END 0xF6800000 + +#define __MACH2PHYS_SHIFT 2 + /* * Virtual addresses beyond this are not modifiable by guest OSes. The * machine->physical mapping table starts at this address, read-only. diff --git a/arch/x86/include/asm/xen/interface_64.h b/arch/x86/include/asm/xen/interface_64.h index 100d2662b97c..839a4811cf98 100644 --- a/arch/x86/include/asm/xen/interface_64.h +++ b/arch/x86/include/asm/xen/interface_64.h @@ -39,18 +39,7 @@ #define __HYPERVISOR_VIRT_END 0xFFFF880000000000 #define __MACH2PHYS_VIRT_START 0xFFFF800000000000 #define __MACH2PHYS_VIRT_END 0xFFFF804000000000 - -#ifndef HYPERVISOR_VIRT_START -#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) -#define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END) -#endif - -#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) -#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) -#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3) -#ifndef machine_to_phys_mapping -#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) -#endif +#define __MACH2PHYS_SHIFT 3 /* * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base) diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index dd8c1414b3d5..8760cc60a21c 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -5,6 +5,7 @@ #include <linux/types.h> #include <linux/spinlock.h> #include <linux/pfn.h> +#include <linux/mm.h> #include <asm/uaccess.h> #include <asm/page.h> @@ -35,6 +36,8 @@ typedef struct xpaddr { #define MAX_DOMAIN_PAGES \ ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE)) +extern unsigned long *machine_to_phys_mapping; +extern unsigned int machine_to_phys_order; extern unsigned long get_phys_to_machine(unsigned long pfn); extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); @@ -69,10 +72,8 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) if (xen_feature(XENFEAT_auto_translated_physmap)) return mfn; -#if 0 if (unlikely((mfn >> machine_to_phys_order) != 0)) - return max_mapnr; -#endif + return ~0; pfn = 0; /* diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index cefd6942f0e9..62f6e1e55b90 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -17,15 +17,16 @@ #include <linux/nmi.h> #include <linux/module.h> -/* For reliability, we're prepared to waste bits here. */ -static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; - u64 hw_nmi_get_sample_period(void) { return (u64)(cpu_khz) * 1000 * 60; } #ifdef ARCH_HAS_NMI_WATCHDOG + +/* For reliability, we're prepared to waste bits here. */ +static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; + void arch_trigger_all_cpu_backtrace(void) { int i; diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 194539aea175..c1c52c341f40 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -44,6 +44,8 @@ static u64 gru_start_paddr, gru_end_paddr; static union uvh_apicid uvh_apicid; int uv_min_hub_revision_id; EXPORT_SYMBOL_GPL(uv_min_hub_revision_id); +unsigned int uv_apicid_hibits; +EXPORT_SYMBOL_GPL(uv_apicid_hibits); static DEFINE_SPINLOCK(uv_nmi_lock); static inline bool is_GRU_range(u64 start, u64 end) @@ -85,6 +87,23 @@ static void __init early_get_apic_pnode_shift(void) uvh_apicid.s.pnode_shift = UV_APIC_PNODE_SHIFT; } +/* + * Add an extra bit as dictated by bios to the destination apicid of + * interrupts potentially passing through the UV HUB. This prevents + * a deadlock between interrupts and IO port operations. + */ +static void __init uv_set_apicid_hibit(void) +{ + union uvh_lb_target_physical_apic_id_mask_u apicid_mask; + unsigned long *mmr; + + mmr = early_ioremap(UV_LOCAL_MMR_BASE | + UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK, sizeof(*mmr)); + apicid_mask.v = *mmr; + early_iounmap(mmr, sizeof(*mmr)); + uv_apicid_hibits = apicid_mask.s.bit_enables & UV_APICID_HIBIT_MASK; +} + static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { int nodeid; @@ -102,6 +121,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) __get_cpu_var(x2apic_extra_bits) = nodeid << (uvh_apicid.s.pnode_shift - 1); uv_system_type = UV_NON_UNIQUE_APIC; + uv_set_apicid_hibit(); return 1; } } @@ -155,6 +175,7 @@ static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_ri int pnode; pnode = uv_apicid_to_pnode(phys_apicid); + phys_apicid |= uv_apicid_hibits; val = (1UL << UVH_IPI_INT_SEND_SHFT) | (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | @@ -236,7 +257,7 @@ static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask) int cpu = cpumask_first(cpumask); if ((unsigned)cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_apicid, cpu); + return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits; else return BAD_APICID; } @@ -255,7 +276,7 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, if (cpumask_test_cpu(cpu, cpu_online_mask)) break; } - return per_cpu(x86_cpu_to_apicid, cpu); + return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits; } static unsigned int x2apic_get_apic_id(unsigned long x) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index ed6310183efb..6d75b9145b13 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -381,6 +381,20 @@ static void release_pmc_hardware(void) {} #endif +static bool check_hw_exists(void) +{ + u64 val, val_new = 0; + int ret = 0; + + val = 0xabcdUL; + ret |= checking_wrmsrl(x86_pmu.perfctr, val); + ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new); + if (ret || val != val_new) + return false; + + return true; +} + static void reserve_ds_buffers(void); static void release_ds_buffers(void); @@ -1372,6 +1386,12 @@ void __init init_hw_perf_events(void) pmu_check_apic(); + /* sanity check that the hardware exists or is emulated */ + if (!check_hw_exists()) { + pr_cont("Broken PMU hardware detected, software events only.\n"); + return; + } + pr_cont("%s PMU driver.\n", x86_pmu.name); if (x86_pmu.quirks) diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 59e175e89599..591e60104278 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -395,7 +395,7 @@ sysenter_past_esp: * A tiny bit of offset fixup is necessary - 4*4 means the 4 words * pushed above; +8 corresponds to copy_thread's esp0 setting. */ - pushl_cfi (TI_sysenter_return-THREAD_SIZE_asm+8+4*4)(%esp) + pushl_cfi ((TI_sysenter_return)-THREAD_SIZE_asm+8+4*4)(%esp) CFI_REL_OFFSET eip, 0 pushl_cfi %eax diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index fe2690d71c0c..e3ba417e8697 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -295,6 +295,7 @@ ENDPROC(native_usergs_sysret64) .endm /* save partial stack frame */ + .pushsection .kprobes.text, "ax" ENTRY(save_args) XCPT_FRAME cld @@ -334,6 +335,7 @@ ENTRY(save_args) ret CFI_ENDPROC END(save_args) + .popsection ENTRY(save_rest) PARTIAL_FRAME 1 REST_SKIP+8 diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index ff15c9dcc25d..42c594254507 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -433,6 +433,10 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) dr6_p = (unsigned long *)ERR_PTR(args->err); dr6 = *dr6_p; + /* If it's a single step, TRAP bits are random */ + if (dr6 & DR_STEP) + return NOTIFY_DONE; + /* Do an early return if no trap bits are set in DR6 */ if ((dr6 & DR_TRAP_BITS) == 0) return NOTIFY_DONE; diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index 6da143c2a6b8..ac861b8348e2 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c @@ -25,7 +25,6 @@ struct pci_hostbridge_probe { }; static u64 __cpuinitdata fam10h_pci_mmconf_base; -static int __cpuinitdata fam10h_pci_mmconf_base_status; static struct pci_hostbridge_probe pci_probes[] __cpuinitdata = { { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1200 }, @@ -44,10 +43,12 @@ static int __cpuinit cmp_range(const void *x1, const void *x2) return start1 - start2; } -/*[47:0] */ -/* need to avoid (0xfd<<32) and (0xfe<<32), ht used space */ +#define MMCONF_UNIT (1ULL << FAM10H_MMIO_CONF_BASE_SHIFT) +#define MMCONF_MASK (~(MMCONF_UNIT - 1)) +#define MMCONF_SIZE (MMCONF_UNIT << 8) +/* need to avoid (0xfd<<32), (0xfe<<32), and (0xff<<32), ht used space */ #define FAM10H_PCI_MMCONF_BASE (0xfcULL<<32) -#define BASE_VALID(b) ((b != (0xfdULL << 32)) && (b != (0xfeULL << 32))) +#define BASE_VALID(b) ((b) + MMCONF_SIZE <= (0xfdULL<<32) || (b) >= (1ULL<<40)) static void __cpuinit get_fam10h_pci_mmconf_base(void) { int i; @@ -64,12 +65,11 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void) struct range range[8]; /* only try to get setting from BSP */ - /* -1 or 1 */ - if (fam10h_pci_mmconf_base_status) + if (fam10h_pci_mmconf_base) return; if (!early_pci_allowed()) - goto fail; + return; found = 0; for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { @@ -91,7 +91,7 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void) } if (!found) - goto fail; + return; /* SYS_CFG */ address = MSR_K8_SYSCFG; @@ -99,16 +99,16 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void) /* TOP_MEM2 is not enabled? */ if (!(val & (1<<21))) { - tom2 = 0; + tom2 = 1ULL << 32; } else { /* TOP_MEM2 */ address = MSR_K8_TOP_MEM2; rdmsrl(address, val); - tom2 = val & (0xffffULL<<32); + tom2 = max(val & 0xffffff800000ULL, 1ULL << 32); } if (base <= tom2) - base = tom2 + (1ULL<<32); + base = (tom2 + 2 * MMCONF_UNIT - 1) & MMCONF_MASK; /* * need to check if the range is in the high mmio range that is @@ -123,11 +123,11 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void) if (!(reg & 3)) continue; - start = (((u64)reg) << 8) & (0xffULL << 32); /* 39:16 on 31:8*/ + start = (u64)(reg & 0xffffff00) << 8; /* 39:16 on 31:8*/ reg = read_pci_config(bus, slot, 1, 0x84 + (i << 3)); - end = (((u64)reg) << 8) & (0xffULL << 32); /* 39:16 on 31:8*/ + end = ((u64)(reg & 0xffffff00) << 8) | 0xffff; /* 39:16 on 31:8*/ - if (!end) + if (end < tom2) continue; range[hi_mmio_num].start = start; @@ -143,32 +143,27 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void) if (range[hi_mmio_num - 1].end < base) goto out; - if (range[0].start > base) + if (range[0].start > base + MMCONF_SIZE) goto out; /* need to find one window */ - base = range[0].start - (1ULL << 32); + base = (range[0].start & MMCONF_MASK) - MMCONF_UNIT; if ((base > tom2) && BASE_VALID(base)) goto out; - base = range[hi_mmio_num - 1].end + (1ULL << 32); - if ((base > tom2) && BASE_VALID(base)) + base = (range[hi_mmio_num - 1].end + MMCONF_UNIT) & MMCONF_MASK; + if (BASE_VALID(base)) goto out; /* need to find window between ranges */ - if (hi_mmio_num > 1) - for (i = 0; i < hi_mmio_num - 1; i++) { - if (range[i + 1].start > (range[i].end + (1ULL << 32))) { - base = range[i].end + (1ULL << 32); - if ((base > tom2) && BASE_VALID(base)) - goto out; - } + for (i = 1; i < hi_mmio_num; i++) { + base = (range[i - 1].end + MMCONF_UNIT) & MMCONF_MASK; + val = range[i].start & MMCONF_MASK; + if (val >= base + MMCONF_SIZE && BASE_VALID(base)) + goto out; } - -fail: - fam10h_pci_mmconf_base_status = -1; return; + out: fam10h_pci_mmconf_base = base; - fam10h_pci_mmconf_base_status = 1; } void __cpuinit fam10h_check_enable_mmcfg(void) @@ -190,11 +185,10 @@ void __cpuinit fam10h_check_enable_mmcfg(void) /* only trust the one handle 256 buses, if acpi=off */ if (!acpi_pci_disabled || busnbits >= 8) { - u64 base; - base = val & (0xffffULL << 32); - if (fam10h_pci_mmconf_base_status <= 0) { + u64 base = val & MMCONF_MASK; + + if (!fam10h_pci_mmconf_base) { fam10h_pci_mmconf_base = base; - fam10h_pci_mmconf_base_status = 1; return; } else if (fam10h_pci_mmconf_base == base) return; @@ -206,8 +200,10 @@ void __cpuinit fam10h_check_enable_mmcfg(void) * with 256 buses */ get_fam10h_pci_mmconf_base(); - if (fam10h_pci_mmconf_base_status <= 0) + if (!fam10h_pci_mmconf_base) { + pci_probe &= ~PCI_CHECK_ENABLE_AMD_MMCONF; return; + } printk(KERN_INFO "Enable MMCONFIG on AMD Family 10h\n"); val &= ~((FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT) | diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 008b91eefa18..42eb3300dfc6 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -83,6 +83,11 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src) static atomic64_t last_value = ATOMIC64_INIT(0); +void pvclock_resume(void) +{ + atomic64_set(&last_value, 0); +} + cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) { struct pvclock_shadow_time shadow; diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 12cdbb17ad18..6acc724d5d8f 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -223,7 +223,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask, static void __cpuinit calculate_tlb_offset(void) { - int cpu, node, nr_node_vecs; + int cpu, node, nr_node_vecs, idx = 0; /* * we are changing tlb_vector_offset for each CPU in runtime, but this * will not cause inconsistency, as the write is atomic under X86. we @@ -239,7 +239,7 @@ static void __cpuinit calculate_tlb_offset(void) nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes; for_each_online_node(node) { - int node_offset = (node % NUM_INVALIDATE_TLB_VECTORS) * + int node_offset = (idx % NUM_INVALIDATE_TLB_VECTORS) * nr_node_vecs; int cpu_offset = 0; for_each_cpu(cpu, cpumask_of_node(node)) { @@ -248,6 +248,7 @@ static void __cpuinit calculate_tlb_offset(void) cpu_offset++; cpu_offset = cpu_offset % nr_node_vecs; } + idx++; } } diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index d7b5109f7a9c..25cd4a07d09f 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -70,6 +70,9 @@ static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi, struct xen_pci_frontend_ops *xen_pci_frontend; EXPORT_SYMBOL_GPL(xen_pci_frontend); +#define XEN_PIRQ_MSI_DATA (MSI_DATA_TRIGGER_EDGE | \ + MSI_DATA_LEVEL_ASSERT | (3 << 8) | MSI_DATA_VECTOR(0)) + static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq, struct msi_msg *msg) { @@ -83,12 +86,7 @@ static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq, MSI_ADDR_REDIRECTION_CPU | MSI_ADDR_DEST_ID(pirq); - msg->data = - MSI_DATA_TRIGGER_EDGE | - MSI_DATA_LEVEL_ASSERT | - /* delivery mode reserved */ - (3 << 8) | - MSI_DATA_VECTOR(0); + msg->data = XEN_PIRQ_MSI_DATA; } static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) @@ -98,8 +96,23 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) struct msi_msg msg; list_for_each_entry(msidesc, &dev->msi_list, list) { + __read_msi_msg(msidesc, &msg); + pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) | + ((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff); + if (xen_irq_from_pirq(pirq) >= 0 && msg.data == XEN_PIRQ_MSI_DATA) { + xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ? + "msi-x" : "msi", &irq, &pirq, XEN_ALLOC_IRQ); + if (irq < 0) + goto error; + ret = set_irq_msi(irq, msidesc); + if (ret < 0) + goto error_while; + printk(KERN_DEBUG "xen: msi already setup: msi --> irq=%d" + " pirq=%d\n", irq, pirq); + return 0; + } xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ? - "msi-x" : "msi", &irq, &pirq); + "msi-x" : "msi", &irq, &pirq, (XEN_ALLOC_IRQ | XEN_ALLOC_PIRQ)); if (irq < 0 || pirq < 0) goto error; printk(KERN_DEBUG "xen: msi --> irq=%d, pirq=%d\n", irq, pirq); diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index a318194002b5..ba9caa808a9c 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1455,7 +1455,7 @@ static void __init uv_init_uvhub(int uvhub, int vector) * the below initialization can't be in firmware because the * messaging IRQ will be determined by the OS */ - apicid = uvhub_to_first_apicid(uvhub); + apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits; uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, ((apicid << 32) | vector)); } diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c index 56e421bc379b..9daf5d1af9f1 100644 --- a/arch/x86/platform/uv/uv_time.c +++ b/arch/x86/platform/uv/uv_time.c @@ -89,6 +89,7 @@ static void uv_rtc_send_IPI(int cpu) apicid = cpu_physical_id(cpu); pnode = uv_apicid_to_pnode(apicid); + apicid |= uv_apicid_hibits; val = (1UL << UVH_IPI_INT_SEND_SHFT) | (apicid << UVH_IPI_INT_APIC_ID_SHFT) | (X86_PLATFORM_IPI_VECTOR << UVH_IPI_INT_VECTOR_SHFT); @@ -107,6 +108,7 @@ static int uv_intr_pending(int pnode) static int uv_setup_intr(int cpu, u64 expires) { u64 val; + unsigned long apicid = cpu_physical_id(cpu) | uv_apicid_hibits; int pnode = uv_cpu_to_pnode(cpu); uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, @@ -117,7 +119,7 @@ static int uv_setup_intr(int cpu, u64 expires) UVH_EVENT_OCCURRED0_RTC1_MASK); val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) | - ((u64)cpu_physical_id(cpu) << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT); + ((u64)apicid << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT); /* Set configuration */ uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, val); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 235c0f4d3861..44dcad43989d 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -75,6 +75,11 @@ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); enum xen_domain_type xen_domain_type = XEN_NATIVE; EXPORT_SYMBOL_GPL(xen_domain_type); +unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START; +EXPORT_SYMBOL(machine_to_phys_mapping); +unsigned int machine_to_phys_order; +EXPORT_SYMBOL(machine_to_phys_order); + struct start_info *xen_start_info; EXPORT_SYMBOL_GPL(xen_start_info); @@ -1016,10 +1021,6 @@ static void xen_reboot(int reason) { struct sched_shutdown r = { .reason = reason }; -#ifdef CONFIG_SMP - stop_other_cpus(); -#endif - if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r)) BUG(); } @@ -1090,6 +1091,8 @@ static void __init xen_setup_stackprotector(void) /* First C function to be called on Xen boot */ asmlinkage void __init xen_start_kernel(void) { + struct physdev_set_iopl set_iopl; + int rc; pgd_t *pgd; if (!xen_start_info) @@ -1097,6 +1100,8 @@ asmlinkage void __init xen_start_kernel(void) xen_domain_type = XEN_PV_DOMAIN; + xen_setup_machphys_mapping(); + /* Install Xen paravirt ops */ pv_info = xen_info; pv_init_ops = xen_init_ops; @@ -1191,8 +1196,6 @@ asmlinkage void __init xen_start_kernel(void) /* Allocate and initialize top and mid mfn levels for p2m structure */ xen_build_mfn_list_list(); - init_mm.pgd = pgd; - /* keep using Xen gdt for now; no urgent need to change it */ #ifdef CONFIG_X86_32 @@ -1202,10 +1205,18 @@ asmlinkage void __init xen_start_kernel(void) #else pv_info.kernel_rpl = 0; #endif - /* set the limit of our address space */ xen_reserve_top(); + /* We used to do this in xen_arch_setup, but that is too late on AMD + * were early_cpu_init (run before ->arch_setup()) calls early_amd_init + * which pokes 0xcf8 port. + */ + set_iopl.iopl = 1; + rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); + if (rc != 0) + xen_raw_printk("physdev_op failed %d\n", rc); + #ifdef CONFIG_X86_32 /* set up basic CPUID stuff */ cpu_detect(&new_cpu_data); diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 21ed8d7f75a5..44924e551fde 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2034,6 +2034,20 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) set_page_prot(pmd, PAGE_KERNEL_RO); } +void __init xen_setup_machphys_mapping(void) +{ + struct xen_machphys_mapping mapping; + unsigned long machine_to_phys_nr_ents; + + if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) { + machine_to_phys_mapping = (unsigned long *)mapping.v_start; + machine_to_phys_nr_ents = mapping.max_mfn + 1; + } else { + machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES; + } + machine_to_phys_order = fls(machine_to_phys_nr_ents - 1); +} + #ifdef CONFIG_X86_64 static void convert_pfn_mfn(void *v) { @@ -2119,44 +2133,83 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, return pgd; } #else /* !CONFIG_X86_64 */ -static RESERVE_BRK_ARRAY(pmd_t, level2_kernel_pgt, PTRS_PER_PMD); +static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); +static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD); + +static __init void xen_write_cr3_init(unsigned long cr3) +{ + unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir)); + + BUG_ON(read_cr3() != __pa(initial_page_table)); + BUG_ON(cr3 != __pa(swapper_pg_dir)); + + /* + * We are switching to swapper_pg_dir for the first time (from + * initial_page_table) and therefore need to mark that page + * read-only and then pin it. + * + * Xen disallows sharing of kernel PMDs for PAE + * guests. Therefore we must copy the kernel PMD from + * initial_page_table into a new kernel PMD to be used in + * swapper_pg_dir. + */ + swapper_kernel_pmd = + extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); + memcpy(swapper_kernel_pmd, initial_kernel_pmd, + sizeof(pmd_t) * PTRS_PER_PMD); + swapper_pg_dir[KERNEL_PGD_BOUNDARY] = + __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT); + set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO); + + set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); + xen_write_cr3(cr3); + pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, pfn); + + pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, + PFN_DOWN(__pa(initial_page_table))); + set_page_prot(initial_page_table, PAGE_KERNEL); + set_page_prot(initial_kernel_pmd, PAGE_KERNEL); + + pv_mmu_ops.write_cr3 = &xen_write_cr3; +} __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) { pmd_t *kernel_pmd; - level2_kernel_pgt = extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); + initial_kernel_pmd = + extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) + xen_start_info->nr_pt_frames * PAGE_SIZE + 512*1024); kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); - memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); + memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); - xen_map_identity_early(level2_kernel_pgt, max_pfn); + xen_map_identity_early(initial_kernel_pmd, max_pfn); - memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD); - set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY], - __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT)); + memcpy(initial_page_table, pgd, sizeof(pgd_t) * PTRS_PER_PGD); + initial_page_table[KERNEL_PGD_BOUNDARY] = + __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT); - set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); - set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); + set_page_prot(initial_kernel_pmd, PAGE_KERNEL_RO); + set_page_prot(initial_page_table, PAGE_KERNEL_RO); set_page_prot(empty_zero_page, PAGE_KERNEL_RO); pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); - xen_write_cr3(__pa(swapper_pg_dir)); - - pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir))); + pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, + PFN_DOWN(__pa(initial_page_table))); + xen_write_cr3(__pa(initial_page_table)); memblock_x86_reserve_range(__pa(xen_start_info->pt_base), __pa(xen_start_info->pt_base + xen_start_info->nr_pt_frames * PAGE_SIZE), "XEN PAGETABLES"); - return swapper_pg_dir; + return initial_page_table; } #endif /* CONFIG_X86_64 */ @@ -2290,7 +2343,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .write_cr2 = xen_write_cr2, .read_cr3 = xen_read_cr3, +#ifdef CONFIG_X86_32 + .write_cr3 = xen_write_cr3_init, +#else .write_cr3 = xen_write_cr3, +#endif .flush_tlb_user = xen_flush_tlb, .flush_tlb_kernel = xen_flush_tlb, @@ -2358,8 +2415,6 @@ void __init xen_init_mmu_ops(void) x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; pv_mmu_ops = xen_mmu_ops; - vmap_lazy_unmap = false; - memset(dummy_mapping, 0xff, PAGE_SIZE); } @@ -2627,7 +2682,8 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); - vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; + BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) == + (VM_PFNMAP | VM_RESERVED | VM_IO))); rmd.mfn = mfn; rmd.prot = prot; diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c index 0f456386cce5..25c52f94a27c 100644 --- a/arch/x86/xen/platform-pci-unplug.c +++ b/arch/x86/xen/platform-pci-unplug.c @@ -68,7 +68,7 @@ static int __init check_platform_magic(void) return 0; } -void __init xen_unplug_emulated_devices(void) +void xen_unplug_emulated_devices(void) { int r; diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 769c4b01fa32..b5a7f928234b 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -23,7 +23,6 @@ #include <xen/interface/callback.h> #include <xen/interface/memory.h> #include <xen/interface/physdev.h> -#include <xen/interface/memory.h> #include <xen/features.h> #include "xen-ops.h" @@ -182,24 +181,21 @@ char * __init xen_memory_setup(void) for (i = 0; i < memmap.nr_entries; i++) { unsigned long long end = map[i].addr + map[i].size; - if (map[i].type == E820_RAM) { - if (map[i].addr < mem_end && end > mem_end) { - /* Truncate region to max_mem. */ - u64 delta = end - mem_end; + if (map[i].type == E820_RAM && end > mem_end) { + /* RAM off the end - may be partially included */ + u64 delta = min(map[i].size, end - mem_end); - map[i].size -= delta; - extra_pages += PFN_DOWN(delta); + map[i].size -= delta; + end -= delta; - end = mem_end; - } + extra_pages += PFN_DOWN(delta); } - if (end > xen_extra_mem_start) + if (map[i].size > 0 && end > xen_extra_mem_start) xen_extra_mem_start = end; - /* If region is non-RAM or below mem_end, add what remains */ - if ((map[i].type != E820_RAM || map[i].addr < mem_end) && - map[i].size > 0) + /* Add region if any remains */ + if (map[i].size > 0) e820_add_region(map[i].addr, map[i].size, map[i].type); } @@ -248,26 +244,11 @@ char * __init xen_memory_setup(void) else extra_pages = 0; - if (!xen_initial_domain()) - xen_add_extra_mem(extra_pages); + xen_add_extra_mem(extra_pages); return "Xen"; } -static void xen_idle(void) -{ - local_irq_disable(); - - if (need_resched()) - local_irq_enable(); - else { - current_thread_info()->status &= ~TS_POLLING; - smp_mb__after_clear_bit(); - safe_halt(); - current_thread_info()->status |= TS_POLLING; - } -} - /* * Set the bit indicating "nosegneg" library variants should be used. * We only need to bother in pure 32-bit mode; compat 32-bit processes @@ -337,9 +318,6 @@ void __cpuinit xen_enable_syscall(void) void __init xen_arch_setup(void) { - struct physdev_set_iopl set_iopl; - int rc; - xen_panic_handler_init(); HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); @@ -356,11 +334,6 @@ void __init xen_arch_setup(void) xen_enable_sysenter(); xen_enable_syscall(); - set_iopl.iopl = 1; - rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); - if (rc != 0) - printk(KERN_INFO "physdev_op failed %d\n", rc); - #ifdef CONFIG_ACPI if (!(xen_start_info->flags & SIF_INITDOMAIN)) { printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); @@ -372,7 +345,11 @@ void __init xen_arch_setup(void) MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ? COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE); - pm_idle = xen_idle; + /* Set up idle, making sure it calls safe_halt() pvop */ +#ifdef CONFIG_X86_32 + boot_cpu_data.hlt_works_ok = 1; +#endif + pm_idle = default_idle; fiddle_vdso(); } diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 1d789d56877c..9bbd63a129b5 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -31,6 +31,7 @@ void xen_hvm_post_suspend(int suspend_cancelled) int cpu; xen_hvm_init_shared_info(); xen_callback_vector(); + xen_unplug_emulated_devices(); if (xen_feature(XENFEAT_hvm_safe_pvclock)) { for_each_online_cpu(cpu) { xen_setup_runstate_info(cpu); diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index b2bb5aa3b054..5da5e53fb94c 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -426,6 +426,8 @@ void xen_timer_resume(void) { int cpu; + pvclock_resume(); + if (xen_clockevent != &xen_vcpuop_clockevent) return; diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 64044747348e..9d41bf985757 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -43,7 +43,7 @@ void xen_vcpu_restore(void); void xen_callback_vector(void); void xen_hvm_init_shared_info(void); -void __init xen_unplug_emulated_devices(void); +void xen_unplug_emulated_devices(void); void __init xen_build_dynamic_phys_to_machine(void); |