summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Makefile5
-rw-r--r--arch/x86/boot/Makefile4
-rw-r--r--arch/x86/boot/compressed/misc.c2
-rw-r--r--arch/x86/include/asm/hpet.h1
-rw-r--r--arch/x86/include/asm/hugetlb.h1
-rw-r--r--arch/x86/include/asm/page_64_types.h2
-rw-r--r--arch/x86/include/asm/pci.h1
-rw-r--r--arch/x86/include/asm/xen/hypercall.h2
-rw-r--r--arch/x86/include/asm/xen/interface.h3
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h2
-rw-r--r--arch/x86/kernel/acpi/sleep.c2
-rw-r--r--arch/x86/kernel/aperture_64.c59
-rw-r--r--arch/x86/kernel/apic/io_apic.c7
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/threshold.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c1
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_rapl.c3
-rw-r--r--arch/x86/kernel/cpu/rdrand.c1
-rw-r--r--arch/x86/kernel/early-quirks.c16
-rw-r--r--arch/x86/kernel/head32.c2
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/ldt.c4
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--arch/x86/kernel/reboot.c10
-rw-r--r--arch/x86/kernel/smp.c2
-rw-r--r--arch/x86/kernel/traps.c6
-rw-r--r--arch/x86/kernel/vsmp_64.c17
-rw-r--r--arch/x86/kernel/vsyscall_gtod.c2
-rw-r--r--arch/x86/kvm/vmx.c60
-rw-r--r--arch/x86/kvm/x86.c8
-rw-r--r--arch/x86/lguest/boot.c4
-rw-r--r--arch/x86/lib/msr.c2
-rw-r--r--arch/x86/math-emu/errors.c16
-rw-r--r--arch/x86/net/bpf_jit_comp.c2
-rw-r--r--arch/x86/pci/acpi.c6
-rw-r--r--arch/x86/pci/amd_bus.c83
-rw-r--r--arch/x86/pci/broadcom_bus.c4
-rw-r--r--arch/x86/pci/fixup.c18
-rw-r--r--arch/x86/pci/i386.c27
-rw-r--r--arch/x86/platform/efi/early_printk.c83
-rw-r--r--arch/x86/platform/olpc/olpc-xo1-pm.c2
-rw-r--r--arch/x86/power/hibernate_64.c2
-rw-r--r--arch/x86/vdso/vdso32-setup.c8
-rw-r--r--arch/x86/xen/enlighten.c3
-rw-r--r--arch/x86/xen/irq.c6
-rw-r--r--arch/x86/xen/mmu.c125
-rw-r--r--arch/x86/xen/p2m.c174
-rw-r--r--arch/x86/xen/setup.c15
-rw-r--r--arch/x86/xen/suspend.c23
-rw-r--r--arch/x86/xen/xen-ops.h2
51 files changed, 615 insertions, 227 deletions
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index d1b7c377a234..33f71b01fd22 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -79,11 +79,14 @@ else
UTS_MACHINE := x86_64
CHECKFLAGS += -D__x86_64__ -m64
+ biarch := -m64
KBUILD_AFLAGS += -m64
KBUILD_CFLAGS += -m64
# Don't autogenerate traditional x87, MMX or SSE instructions
- KBUILD_CFLAGS += -mno-mmx -mno-sse -mno-80387 -mno-fp-ret-in-387
+ KBUILD_CFLAGS += -mno-mmx -mno-sse
+ KBUILD_CFLAGS += $(call cc-option,-mno-80387)
+ KBUILD_CFLAGS += $(call cc-option,-mno-fp-ret-in-387)
# Use -mpreferred-stack-boundary=3 if supported.
KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=3)
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index abb9eba61b50..dbe8dd2fe247 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -71,7 +71,7 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
SETUP_OBJS = $(addprefix $(obj)/,$(setup-y))
-sed-voffset := -e 's/^\([0-9a-fA-F]*\) . \(_text\|_end\)$$/\#define VO_\2 0x\1/p'
+sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|_end\)$$/\#define VO_\2 0x\1/p'
quiet_cmd_voffset = VOFFSET $@
cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@
@@ -80,7 +80,7 @@ targets += voffset.h
$(obj)/voffset.h: vmlinux FORCE
$(call if_changed,voffset)
-sed-zoffset := -e 's/^\([0-9a-fA-F]*\) . \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p'
+sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p'
quiet_cmd_zoffset = ZOFFSET $@
cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 17684615374b..57ab74df7eea 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -354,7 +354,7 @@ static void parse_elf(void *output)
free(phdrs);
}
-asmlinkage void *decompress_kernel(void *rmode, memptr heap,
+asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
unsigned char *input_data,
unsigned long input_len,
unsigned char *output,
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index b18df579c0e9..36f7125945e3 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -63,6 +63,7 @@
/* hpet memory map physical address */
extern unsigned long hpet_address;
extern unsigned long force_hpet_address;
+extern int boot_hpet_disable;
extern u8 hpet_blockid;
extern int hpet_force_user;
extern u8 hpet_msi_disable;
diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h
index a8091216963b..68c05398bba9 100644
--- a/arch/x86/include/asm/hugetlb.h
+++ b/arch/x86/include/asm/hugetlb.h
@@ -52,6 +52,7 @@ static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
+ ptep_clear_flush(vma, addr, ptep);
}
static inline int huge_pte_none(pte_t pte)
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 8de6d9cf3b95..678205195ae1 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -1,7 +1,7 @@
#ifndef _ASM_X86_PAGE_64_DEFS_H
#define _ASM_X86_PAGE_64_DEFS_H
-#define THREAD_SIZE_ORDER 1
+#define THREAD_SIZE_ORDER 2
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
#define CURRENT_MASK (~(THREAD_SIZE - 1))
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 96ae4f4040bb..0892ea0e683f 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -68,7 +68,6 @@ void pcibios_config_init(void);
void pcibios_scan_root(int bus);
void pcibios_set_master(struct pci_dev *dev);
-void pcibios_penalize_isa_irq(int irq, int active);
struct irq_routing_table *pcibios_get_irq_routing_table(void);
int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq);
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index e709884d0ef9..ca08a27b90b3 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -343,7 +343,7 @@ HYPERVISOR_memory_op(unsigned int cmd, void *arg)
}
static inline int
-HYPERVISOR_multicall(void *call_list, int nr_calls)
+HYPERVISOR_multicall(void *call_list, uint32_t nr_calls)
{
return _hypercall2(int, multicall, call_list, nr_calls);
}
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
index fd9cb7695b5f..3400dbaec3c3 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -54,6 +54,9 @@ typedef unsigned long xen_pfn_t;
#define PRI_xen_pfn "lx"
typedef unsigned long xen_ulong_t;
#define PRI_xen_ulong "lx"
+typedef long xen_long_t;
+#define PRI_xen_long "lx"
+
/* Guest handles for primitive C types. */
__DEFINE_GUEST_HANDLE(uchar, unsigned char);
__DEFINE_GUEST_HANDLE(uint, unsigned int);
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index c827ace3121b..fcf2b3ae1bf0 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -384,7 +384,7 @@
#define MSR_IA32_MISC_ENABLE_MWAIT_BIT 18
#define MSR_IA32_MISC_ENABLE_MWAIT (1ULL << MSR_IA32_MISC_ENABLE_MWAIT_BIT)
#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT 22
-#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1ULL << MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT);
+#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1ULL << MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT)
#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT 23
#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT)
#define MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT 34
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 3a2ae4c88948..31368207837c 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -31,7 +31,7 @@ static char temp_stack[4096];
*
* Wrapper around acpi_enter_sleep_state() to be called by assmebly.
*/
-acpi_status asmlinkage x86_acpi_enter_sleep_state(u8 state)
+acpi_status asmlinkage __visible x86_acpi_enter_sleep_state(u8 state)
{
return acpi_enter_sleep_state(state);
}
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 9fa8aa051f54..76164e173a24 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -10,6 +10,8 @@
*
* Copyright 2002 Andi Kleen, SuSE Labs.
*/
+#define pr_fmt(fmt) "AGP: " fmt
+
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/init.h>
@@ -75,14 +77,13 @@ static u32 __init allocate_aperture(void)
addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR,
aper_size, aper_size);
if (!addr) {
- printk(KERN_ERR
- "Cannot allocate aperture memory hole (%lx,%uK)\n",
- addr, aper_size>>10);
+ pr_err("Cannot allocate aperture memory hole [mem %#010lx-%#010lx] (%uKB)\n",
+ addr, addr + aper_size - 1, aper_size >> 10);
return 0;
}
memblock_reserve(addr, aper_size);
- printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n",
- aper_size >> 10, addr);
+ pr_info("Mapping aperture over RAM [mem %#010lx-%#010lx] (%uKB)\n",
+ addr, addr + aper_size - 1, aper_size >> 10);
register_nosave_region(addr >> PAGE_SHIFT,
(addr+aper_size) >> PAGE_SHIFT);
@@ -126,10 +127,11 @@ static u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order)
u64 aper;
u32 old_order;
- printk(KERN_INFO "AGP bridge at %02x:%02x:%02x\n", bus, slot, func);
+ pr_info("pci 0000:%02x:%02x:%02x: AGP bridge\n", bus, slot, func);
apsizereg = read_pci_config_16(bus, slot, func, cap + 0x14);
if (apsizereg == 0xffffffff) {
- printk(KERN_ERR "APSIZE in AGP bridge unreadable\n");
+ pr_err("pci 0000:%02x:%02x.%d: APSIZE unreadable\n",
+ bus, slot, func);
return 0;
}
@@ -153,16 +155,18 @@ static u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order)
* On some sick chips, APSIZE is 0. It means it wants 4G
* so let double check that order, and lets trust AMD NB settings:
*/
- printk(KERN_INFO "Aperture from AGP @ %Lx old size %u MB\n",
- aper, 32 << old_order);
+ pr_info("pci 0000:%02x:%02x.%d: AGP aperture [bus addr %#010Lx-%#010Lx] (old size %uMB)\n",
+ bus, slot, func, aper, aper + (32ULL << (old_order + 20)) - 1,
+ 32 << old_order);
if (aper + (32ULL<<(20 + *order)) > 0x100000000ULL) {
- printk(KERN_INFO "Aperture size %u MB (APSIZE %x) is not right, using settings from NB\n",
- 32 << *order, apsizereg);
+ pr_info("pci 0000:%02x:%02x.%d: AGP aperture size %uMB (APSIZE %#x) is not right, using settings from NB\n",
+ bus, slot, func, 32 << *order, apsizereg);
*order = old_order;
}
- printk(KERN_INFO "Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n",
- aper, 32 << *order, apsizereg);
+ pr_info("pci 0000:%02x:%02x.%d: AGP aperture [bus addr %#010Lx-%#010Lx] (%uMB, APSIZE %#x)\n",
+ bus, slot, func, aper, aper + (32ULL << (*order + 20)) - 1,
+ 32 << *order, apsizereg);
if (!aperture_valid(aper, (32*1024*1024) << *order, 32<<20))
return 0;
@@ -218,7 +222,7 @@ static u32 __init search_agp_bridge(u32 *order, int *valid_agp)
}
}
}
- printk(KERN_INFO "No AGP bridge found\n");
+ pr_info("No AGP bridge found\n");
return 0;
}
@@ -310,7 +314,8 @@ void __init early_gart_iommu_check(void)
if (e820_any_mapped(aper_base, aper_base + aper_size,
E820_RAM)) {
/* reserve it, so we can reuse it in second kernel */
- printk(KERN_INFO "update e820 for GART\n");
+ pr_info("e820: reserve [mem %#010Lx-%#010Lx] for GART\n",
+ aper_base, aper_base + aper_size - 1);
e820_add_region(aper_base, aper_size, E820_RESERVED);
update_e820();
}
@@ -354,7 +359,7 @@ int __init gart_iommu_hole_init(void)
!early_pci_allowed())
return -ENODEV;
- printk(KERN_INFO "Checking aperture...\n");
+ pr_info("Checking aperture...\n");
if (!fallback_aper_force)
agp_aper_base = search_agp_bridge(&agp_aper_order, &valid_agp);
@@ -395,8 +400,9 @@ int __init gart_iommu_hole_init(void)
aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff;
aper_base <<= 25;
- printk(KERN_INFO "Node %d: aperture @ %Lx size %u MB\n",
- node, aper_base, aper_size >> 20);
+ pr_info("Node %d: aperture [bus addr %#010Lx-%#010Lx] (%uMB)\n",
+ node, aper_base, aper_base + aper_size - 1,
+ aper_size >> 20);
node++;
if (!aperture_valid(aper_base, aper_size, 64<<20)) {
@@ -407,9 +413,9 @@ int __init gart_iommu_hole_init(void)
if (!no_iommu &&
max_pfn > MAX_DMA32_PFN &&
!printed_gart_size_msg) {
- printk(KERN_ERR "you are using iommu with agp, but GART size is less than 64M\n");
- printk(KERN_ERR "please increase GART size in your BIOS setup\n");
- printk(KERN_ERR "if BIOS doesn't have that option, contact your HW vendor!\n");
+ pr_err("you are using iommu with agp, but GART size is less than 64MB\n");
+ pr_err("please increase GART size in your BIOS setup\n");
+ pr_err("if BIOS doesn't have that option, contact your HW vendor!\n");
printed_gart_size_msg = 1;
}
} else {
@@ -446,13 +452,10 @@ out:
force_iommu ||
valid_agp ||
fallback_aper_force) {
- printk(KERN_INFO
- "Your BIOS doesn't leave a aperture memory hole\n");
- printk(KERN_INFO
- "Please enable the IOMMU option in the BIOS setup\n");
- printk(KERN_INFO
- "This costs you %d MB of RAM\n",
- 32 << fallback_aper_order);
+ pr_info("Your BIOS doesn't leave a aperture memory hole\n");
+ pr_info("Please enable the IOMMU option in the BIOS setup\n");
+ pr_info("This costs you %dMB of RAM\n",
+ 32 << fallback_aper_order);
aper_order = fallback_aper_order;
aper_alloc = allocate_aperture();
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 6ad4658de705..992060e09897 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2189,7 +2189,7 @@ void send_cleanup_vector(struct irq_cfg *cfg)
cfg->move_in_progress = 0;
}
-asmlinkage void smp_irq_move_cleanup_interrupt(void)
+asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
{
unsigned vector, me;
@@ -3425,6 +3425,11 @@ int get_nr_irqs_gsi(void)
return nr_irqs_gsi;
}
+unsigned int arch_dynirq_lower_bound(unsigned int from)
+{
+ return from < nr_irqs_gsi ? nr_irqs_gsi : from;
+}
+
int __init arch_probe_nr_irqs(void)
{
int nr;
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index d921b7ee6595..36a1bb6d1ee0 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -429,14 +429,14 @@ static inline void __smp_thermal_interrupt(void)
smp_thermal_vector();
}
-asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
+asmlinkage __visible void smp_thermal_interrupt(struct pt_regs *regs)
{
entering_irq();
__smp_thermal_interrupt();
exiting_ack_irq();
}
-asmlinkage void smp_trace_thermal_interrupt(struct pt_regs *regs)
+asmlinkage __visible void smp_trace_thermal_interrupt(struct pt_regs *regs)
{
entering_irq();
trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
index fe6b1c86645b..7245980186ee 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -24,14 +24,14 @@ static inline void __smp_threshold_interrupt(void)
mce_threshold_vector();
}
-asmlinkage void smp_threshold_interrupt(void)
+asmlinkage __visible void smp_threshold_interrupt(void)
{
entering_irq();
__smp_threshold_interrupt();
exiting_ack_irq();
}
-asmlinkage void smp_trace_threshold_interrupt(void)
+asmlinkage __visible void smp_trace_threshold_interrupt(void)
{
entering_irq();
trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index aa333d966886..adb02aa62af5 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -169,7 +169,6 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
- FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
EVENT_CONSTRAINT_END
};
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
index 7c87424d4140..619f7699487a 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -543,7 +543,8 @@ static int rapl_cpu_prepare(int cpu)
if (phys_id < 0)
return -1;
- if (!rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits))
+ /* protect rdmsrl() to handle virtualization */
+ if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits))
return -1;
pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c
index 384df5105fbc..136ac74dee82 100644
--- a/arch/x86/kernel/cpu/rdrand.c
+++ b/arch/x86/kernel/cpu/rdrand.c
@@ -27,6 +27,7 @@
static int __init x86_rdrand_setup(char *s)
{
setup_clear_cpu_cap(X86_FEATURE_RDRAND);
+ setup_clear_cpu_cap(X86_FEATURE_RDSEED);
return 1;
}
__setup("nordrand", x86_rdrand_setup);
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 6e2537c32190..6cda0baeac9d 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -17,6 +17,7 @@
#include <asm/dma.h>
#include <asm/io_apic.h>
#include <asm/apic.h>
+#include <asm/hpet.h>
#include <asm/iommu.h>
#include <asm/gart.h>
#include <asm/irq_remapping.h>
@@ -530,6 +531,15 @@ static void __init intel_graphics_stolen(int num, int slot, int func)
}
}
+static void __init force_disable_hpet(int num, int slot, int func)
+{
+#ifdef CONFIG_HPET_TIMER
+ boot_hpet_disable = 1;
+ pr_info("x86/hpet: Will disable the HPET for this platform because it's not reliable\n");
+#endif
+}
+
+
#define QFLAG_APPLY_ONCE 0x1
#define QFLAG_APPLIED 0x2
#define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED)
@@ -567,6 +577,12 @@ static struct chipset early_qrk[] __initdata = {
PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check },
{ PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA, PCI_ANY_ID,
QFLAG_APPLY_ONCE, intel_graphics_stolen },
+ /*
+ * HPET on current version of Baytrail platform has accuracy
+ * problems, disable it for now:
+ */
+ { PCI_VENDOR_ID_INTEL, 0x0f00,
+ PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
{}
};
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index c61a14a4a310..d6c1b9836995 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -29,7 +29,7 @@ static void __init i386_default_early_setup(void)
reserve_ebda_region();
}
-asmlinkage void __init i386_start_kernel(void)
+asmlinkage __visible void __init i386_start_kernel(void)
{
sanitize_boot_params(&boot_params);
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 85126ccbdf6b..068054f4bf20 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -137,7 +137,7 @@ static void __init copy_bootdata(char *real_mode_data)
}
}
-asmlinkage void __init x86_64_start_kernel(char * real_mode_data)
+asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
{
int i;
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 8d80ae011603..4177bfbc80b0 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -88,7 +88,7 @@ static inline void hpet_clear_mapping(void)
/*
* HPET command line enable / disable
*/
-static int boot_hpet_disable;
+int boot_hpet_disable;
int hpet_force_user;
static int hpet_verbose;
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index af1d14a9ebda..dcbbaa165bde 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -20,6 +20,8 @@
#include <asm/mmu_context.h>
#include <asm/syscalls.h>
+int sysctl_ldt16 = 0;
+
#ifdef CONFIG_SMP
static void flush_ldt(void *current_mm)
{
@@ -234,7 +236,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
* IRET leaking the high bits of the kernel stack address.
*/
#ifdef CONFIG_X86_64
- if (!ldt_info.seg_32bit) {
+ if (!ldt_info.seg_32bit && !sysctl_ldt16) {
error = -EINVAL;
goto out_unlock;
}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 9c0280f93d05..898d077617a9 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -52,7 +52,7 @@
asmlinkage extern void ret_from_fork(void);
-asmlinkage DEFINE_PER_CPU(unsigned long, old_rsp);
+__visible DEFINE_PER_CPU(unsigned long, old_rsp);
/* Prints also some state that isn't saved in the pt_regs */
void __show_regs(struct pt_regs *regs, int all)
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 3399d3a99730..52b1157c53eb 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -191,6 +191,16 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
},
},
+ /* Certec */
+ { /* Handle problems with rebooting on Certec BPC600 */
+ .callback = set_pci_reboot,
+ .ident = "Certec BPC600",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Certec"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "BPC600"),
+ },
+ },
+
/* Dell */
{ /* Handle problems with rebooting on Dell DXP061 */
.callback = set_bios_reboot,
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 7c3a5a61f2e4..be8e1bde07aa 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -168,7 +168,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
* this function calls the 'stop' function on all other CPUs in the system.
*/
-asmlinkage void smp_reboot_interrupt(void)
+asmlinkage __visible void smp_reboot_interrupt(void)
{
ack_APIC_irq();
irq_enter();
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 57409f6b8c62..f73b5d435bdc 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -357,7 +357,7 @@ exit:
* for scheduling or signal handling. The actual stack switch is done in
* entry.S
*/
-asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
+asmlinkage __visible __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
{
struct pt_regs *regs = eregs;
/* Did already sync */
@@ -601,11 +601,11 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
#endif
}
-asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
+asmlinkage __visible void __attribute__((weak)) smp_thermal_interrupt(void)
{
}
-asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
+asmlinkage __visible void __attribute__((weak)) smp_threshold_interrupt(void)
{
}
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index f6584a90aba3..b99b9ad8540c 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -26,6 +26,9 @@
#define TOPOLOGY_REGISTER_OFFSET 0x10
+/* Flag below is initialized once during vSMP PCI initialization. */
+static int irq_routing_comply = 1;
+
#if defined CONFIG_PCI && defined CONFIG_PARAVIRT
/*
* Interrupt control on vSMPowered systems:
@@ -33,7 +36,7 @@
* and vice versa.
*/
-asmlinkage unsigned long vsmp_save_fl(void)
+asmlinkage __visible unsigned long vsmp_save_fl(void)
{
unsigned long flags = native_save_fl();
@@ -53,7 +56,7 @@ __visible void vsmp_restore_fl(unsigned long flags)
}
PV_CALLEE_SAVE_REGS_THUNK(vsmp_restore_fl);
-asmlinkage void vsmp_irq_disable(void)
+asmlinkage __visible void vsmp_irq_disable(void)
{
unsigned long flags = native_save_fl();
@@ -61,7 +64,7 @@ asmlinkage void vsmp_irq_disable(void)
}
PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_disable);
-asmlinkage void vsmp_irq_enable(void)
+asmlinkage __visible void vsmp_irq_enable(void)
{
unsigned long flags = native_save_fl();
@@ -101,6 +104,10 @@ static void __init set_vsmp_pv_ops(void)
#ifdef CONFIG_SMP
if (cap & ctl & BIT(8)) {
ctl &= ~BIT(8);
+
+ /* Interrupt routing set to ignore */
+ irq_routing_comply = 0;
+
#ifdef CONFIG_PROC_FS
/* Don't let users change irq affinity via procfs */
no_irq_affinity = 1;
@@ -218,7 +225,9 @@ static void vsmp_apic_post_init(void)
{
/* need to update phys_pkg_id */
apic->phys_pkg_id = apicid_phys_pkg_id;
- apic->vector_allocation_domain = fill_vector_allocation_domain;
+
+ if (!irq_routing_comply)
+ apic->vector_allocation_domain = fill_vector_allocation_domain;
}
void __init vsmp_init(void)
diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c
index f9c6e56e14b5..9531fbb123ba 100644
--- a/arch/x86/kernel/vsyscall_gtod.c
+++ b/arch/x86/kernel/vsyscall_gtod.c
@@ -43,7 +43,7 @@ void update_vsyscall(struct timekeeper *tk)
vdata->monotonic_time_sec = tk->xtime_sec
+ tk->wall_to_monotonic.tv_sec;
vdata->monotonic_time_snsec = tk->xtime_nsec
- + (tk->wall_to_monotonic.tv_nsec
+ + ((u64)tk->wall_to_monotonic.tv_nsec
<< tk->shift);
while (vdata->monotonic_time_snsec >=
(((u64)NSEC_PER_SEC) << tk->shift)) {
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1f68c5831924..138ceffc6377 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -503,7 +503,7 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
[number##_HIGH] = VMCS12_OFFSET(name)+4
-static const unsigned long shadow_read_only_fields[] = {
+static unsigned long shadow_read_only_fields[] = {
/*
* We do NOT shadow fields that are modified when L0
* traps and emulates any vmx instruction (e.g. VMPTRLD,
@@ -526,10 +526,10 @@ static const unsigned long shadow_read_only_fields[] = {
GUEST_LINEAR_ADDRESS,
GUEST_PHYSICAL_ADDRESS
};
-static const int max_shadow_read_only_fields =
+static int max_shadow_read_only_fields =
ARRAY_SIZE(shadow_read_only_fields);
-static const unsigned long shadow_read_write_fields[] = {
+static unsigned long shadow_read_write_fields[] = {
GUEST_RIP,
GUEST_RSP,
GUEST_CR0,
@@ -558,7 +558,7 @@ static const unsigned long shadow_read_write_fields[] = {
HOST_FS_SELECTOR,
HOST_GS_SELECTOR
};
-static const int max_shadow_read_write_fields =
+static int max_shadow_read_write_fields =
ARRAY_SIZE(shadow_read_write_fields);
static const unsigned short vmcs_field_to_offset_table[] = {
@@ -3009,6 +3009,41 @@ static void free_kvm_area(void)
}
}
+static void init_vmcs_shadow_fields(void)
+{
+ int i, j;
+
+ /* No checks for read only fields yet */
+
+ for (i = j = 0; i < max_shadow_read_write_fields; i++) {
+ switch (shadow_read_write_fields[i]) {
+ case GUEST_BNDCFGS:
+ if (!vmx_mpx_supported())
+ continue;
+ break;
+ default:
+ break;
+ }
+
+ if (j < i)
+ shadow_read_write_fields[j] =
+ shadow_read_write_fields[i];
+ j++;
+ }
+ max_shadow_read_write_fields = j;
+
+ /* shadowed fields guest access without vmexit */
+ for (i = 0; i < max_shadow_read_write_fields; i++) {
+ clear_bit(shadow_read_write_fields[i],
+ vmx_vmwrite_bitmap);
+ clear_bit(shadow_read_write_fields[i],
+ vmx_vmread_bitmap);
+ }
+ for (i = 0; i < max_shadow_read_only_fields; i++)
+ clear_bit(shadow_read_only_fields[i],
+ vmx_vmread_bitmap);
+}
+
static __init int alloc_kvm_area(void)
{
int cpu;
@@ -3039,6 +3074,8 @@ static __init int hardware_setup(void)
enable_vpid = 0;
if (!cpu_has_vmx_shadow_vmcs())
enable_shadow_vmcs = 0;
+ if (enable_shadow_vmcs)
+ init_vmcs_shadow_fields();
if (!cpu_has_vmx_ept() ||
!cpu_has_vmx_ept_4levels()) {
@@ -7741,7 +7778,8 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
exec_control = vmcs12->pin_based_vm_exec_control;
exec_control |= vmcs_config.pin_based_exec_ctrl;
- exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
+ exec_control &= ~(PIN_BASED_VMX_PREEMPTION_TIMER |
+ PIN_BASED_POSTED_INTR);
vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control);
vmx->nested.preemption_timer_expired = false;
@@ -7778,7 +7816,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
if (!vmx->rdtscp_enabled)
exec_control &= ~SECONDARY_EXEC_RDTSCP;
/* Take the following fields only from vmcs12 */
- exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+ exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
+ SECONDARY_EXEC_APIC_REGISTER_VIRT);
if (nested_cpu_has(vmcs12,
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
exec_control |= vmcs12->secondary_vm_exec_control;
@@ -8803,14 +8843,6 @@ static int __init vmx_init(void)
memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
- /* shadowed read/write fields */
- for (i = 0; i < max_shadow_read_write_fields; i++) {
- clear_bit(shadow_read_write_fields[i], vmx_vmwrite_bitmap);
- clear_bit(shadow_read_write_fields[i], vmx_vmread_bitmap);
- }
- /* shadowed read only fields */
- for (i = 0; i < max_shadow_read_only_fields; i++)
- clear_bit(shadow_read_only_fields[i], vmx_vmread_bitmap);
/*
* Allow direct access to the PC debug port (it is often used for I/O
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8b8fc0b792ba..20316c67b824 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -106,6 +106,8 @@ EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
static u32 tsc_tolerance_ppm = 250;
module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
+static bool backwards_tsc_observed = false;
+
#define KVM_NR_SHARED_MSRS 16
struct kvm_shared_msrs_global {
@@ -280,7 +282,7 @@ int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
}
EXPORT_SYMBOL_GPL(kvm_set_apic_base);
-asmlinkage void kvm_spurious_fault(void)
+asmlinkage __visible void kvm_spurious_fault(void)
{
/* Fault while not rebooting. We want the trace. */
BUG();
@@ -1486,7 +1488,8 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
&ka->master_kernel_ns,
&ka->master_cycle_now);
- ka->use_master_clock = host_tsc_clocksource & vcpus_matched;
+ ka->use_master_clock = host_tsc_clocksource && vcpus_matched
+ && !backwards_tsc_observed;
if (ka->use_master_clock)
atomic_set(&kvm_guest_has_master_clock, 1);
@@ -6945,6 +6948,7 @@ int kvm_arch_hardware_enable(void *garbage)
*/
if (backwards_tsc) {
u64 delta_cyc = max_tsc - local_tsc;
+ backwards_tsc_observed = true;
list_for_each_entry(kvm, &vm_list, vm_list) {
kvm_for_each_vcpu(i, vcpu, kvm) {
vcpu->arch.tsc_offset_adjustment += delta_cyc;
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index ad1fb5f53925..aae94132bc24 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -233,13 +233,13 @@ static void lguest_end_context_switch(struct task_struct *next)
* flags word contains all kind of stuff, but in practice Linux only cares
* about the interrupt flag. Our "save_flags()" just returns that.
*/
-asmlinkage unsigned long lguest_save_fl(void)
+asmlinkage __visible unsigned long lguest_save_fl(void)
{
return lguest_data.irq_enabled;
}
/* Interrupts go off... */
-asmlinkage void lguest_irq_disable(void)
+asmlinkage __visible void lguest_irq_disable(void)
{
lguest_data.irq_enabled = 0;
}
diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c
index db9db446b71a..43623739c7cf 100644
--- a/arch/x86/lib/msr.c
+++ b/arch/x86/lib/msr.c
@@ -76,7 +76,7 @@ static inline int __flip_bit(u32 msr, u8 bit, bool set)
if (m1.q == m.q)
return 0;
- err = msr_write(msr, &m);
+ err = msr_write(msr, &m1);
if (err)
return err;
diff --git a/arch/x86/math-emu/errors.c b/arch/x86/math-emu/errors.c
index a5449089cd9f..9e6545f269e5 100644
--- a/arch/x86/math-emu/errors.c
+++ b/arch/x86/math-emu/errors.c
@@ -302,7 +302,7 @@ static struct {
0x242 in div_Xsig.S
*/
-asmlinkage void FPU_exception(int n)
+asmlinkage __visible void FPU_exception(int n)
{
int i, int_type;
@@ -492,7 +492,7 @@ int real_2op_NaN(FPU_REG const *b, u_char tagb,
/* Invalid arith operation on Valid registers */
/* Returns < 0 if the exception is unmasked */
-asmlinkage int arith_invalid(int deststnr)
+asmlinkage __visible int arith_invalid(int deststnr)
{
EXCEPTION(EX_Invalid);
@@ -507,7 +507,7 @@ asmlinkage int arith_invalid(int deststnr)
}
/* Divide a finite number by zero */
-asmlinkage int FPU_divide_by_zero(int deststnr, u_char sign)
+asmlinkage __visible int FPU_divide_by_zero(int deststnr, u_char sign)
{
FPU_REG *dest = &st(deststnr);
int tag = TAG_Valid;
@@ -539,7 +539,7 @@ int set_precision_flag(int flags)
}
/* This may be called often, so keep it lean */
-asmlinkage void set_precision_flag_up(void)
+asmlinkage __visible void set_precision_flag_up(void)
{
if (control_word & CW_Precision)
partial_status |= (SW_Precision | SW_C1); /* The masked response */
@@ -548,7 +548,7 @@ asmlinkage void set_precision_flag_up(void)
}
/* This may be called often, so keep it lean */
-asmlinkage void set_precision_flag_down(void)
+asmlinkage __visible void set_precision_flag_down(void)
{
if (control_word & CW_Precision) { /* The masked response */
partial_status &= ~SW_C1;
@@ -557,7 +557,7 @@ asmlinkage void set_precision_flag_down(void)
EXCEPTION(EX_Precision);
}
-asmlinkage int denormal_operand(void)
+asmlinkage __visible int denormal_operand(void)
{
if (control_word & CW_Denormal) { /* The masked response */
partial_status |= SW_Denorm_Op;
@@ -568,7 +568,7 @@ asmlinkage int denormal_operand(void)
}
}
-asmlinkage int arith_overflow(FPU_REG *dest)
+asmlinkage __visible int arith_overflow(FPU_REG *dest)
{
int tag = TAG_Valid;
@@ -596,7 +596,7 @@ asmlinkage int arith_overflow(FPU_REG *dest)
}
-asmlinkage int arith_underflow(FPU_REG *dest)
+asmlinkage __visible int arith_underflow(FPU_REG *dest)
{
int tag = TAG_Valid;
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index dc017735bb91..6d5663a599a7 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -171,7 +171,7 @@ static struct bpf_binary_header *bpf_alloc_binary(unsigned int proglen,
memset(header, 0xcc, sz); /* fill whole space with int3 instructions */
header->pages = sz / PAGE_SIZE;
- hole = sz - (proglen + sizeof(*header));
+ hole = min(sz - (proglen + sizeof(*header)), PAGE_SIZE - sizeof(*header));
/* insert a random number of int3 instructions before BPF code */
*image_ptr = &header->image[prandom_u32() % hole];
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 01edac6c5e18..5075371ab593 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -489,8 +489,12 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
}
node = acpi_get_node(device->handle);
- if (node == NUMA_NO_NODE)
+ if (node == NUMA_NO_NODE) {
node = x86_pci_root_bus_node(busnum);
+ if (node != 0 && node != NUMA_NO_NODE)
+ dev_info(&device->dev, FW_BUG "no _PXM; falling back to node %d from hardware (may be inconsistent with ACPI node numbers)\n",
+ node);
+ }
if (node != NUMA_NO_NODE && !node_online(node))
node = NUMA_NO_NODE;
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index e88f4c53d7f6..c20d2cc7ef64 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -11,27 +11,33 @@
#include "bus_numa.h"
-/*
- * This discovers the pcibus <-> node mapping on AMD K8.
- * also get peer root bus resource for io,mmio
- */
+#define AMD_NB_F0_NODE_ID 0x60
+#define AMD_NB_F0_UNIT_ID 0x64
+#define AMD_NB_F1_CONFIG_MAP_REG 0xe0
+
+#define RANGE_NUM 16
+#define AMD_NB_F1_CONFIG_MAP_RANGES 4
-struct pci_hostbridge_probe {
+struct amd_hostbridge {
u32 bus;
u32 slot;
- u32 vendor;
u32 device;
};
-static struct pci_hostbridge_probe pci_probes[] __initdata = {
- { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1100 },
- { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1200 },
- { 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 },
- { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1300 },
+/*
+ * IMPORTANT NOTE:
+ * hb_probes[] and early_root_info_init() is in maintenance mode.
+ * It only supports K8, Fam10h, Fam11h, and Fam15h_00h-0fh .
+ * Future processor will rely on information in ACPI.
+ */
+static struct amd_hostbridge hb_probes[] __initdata = {
+ { 0, 0x18, 0x1100 }, /* K8 */
+ { 0, 0x18, 0x1200 }, /* Family10h */
+ { 0xff, 0, 0x1200 }, /* Family10h */
+ { 0, 0x18, 0x1300 }, /* Family11h */
+ { 0, 0x18, 0x1600 }, /* Family15h */
};
-#define RANGE_NUM 16
-
static struct pci_root_info __init *find_pci_root_info(int node, int link)
{
struct pci_root_info *info;
@@ -45,12 +51,12 @@ static struct pci_root_info __init *find_pci_root_info(int node, int link)
}
/**
- * early_fill_mp_bus_to_node()
+ * early_root_info_init()
* called before pcibios_scan_root and pci_scan_bus
- * fills the mp_bus_to_cpumask array based according to the LDT Bus Number
- * Registers found in the K8 northbridge
+ * fills the mp_bus_to_cpumask array based according
+ * to the LDT Bus Number Registers found in the northbridge.
*/
-static int __init early_fill_mp_bus_info(void)
+static int __init early_root_info_init(void)
{
int i;
unsigned bus;
@@ -75,19 +81,21 @@ static int __init early_fill_mp_bus_info(void)
return -1;
found = false;
- for (i = 0; i < ARRAY_SIZE(pci_probes); i++) {
+ for (i = 0; i < ARRAY_SIZE(hb_probes); i++) {
u32 id;
u16 device;
u16 vendor;
- bus = pci_probes[i].bus;
- slot = pci_probes[i].slot;
+ bus = hb_probes[i].bus;
+ slot = hb_probes[i].slot;
id = read_pci_config(bus, slot, 0, PCI_VENDOR_ID);
-
vendor = id & 0xffff;
device = (id>>16) & 0xffff;
- if (pci_probes[i].vendor == vendor &&
- pci_probes[i].device == device) {
+
+ if (vendor != PCI_VENDOR_ID_AMD)
+ continue;
+
+ if (hb_probes[i].device == device) {
found = true;
break;
}
@@ -96,10 +104,16 @@ static int __init early_fill_mp_bus_info(void)
if (!found)
return 0;
- for (i = 0; i < 4; i++) {
+ /*
+ * We should learn topology and routing information from _PXM and
+ * _CRS methods in the ACPI namespace. We extract node numbers
+ * here to work around BIOSes that don't supply _PXM.
+ */
+ for (i = 0; i < AMD_NB_F1_CONFIG_MAP_RANGES; i++) {
int min_bus;
int max_bus;
- reg = read_pci_config(bus, slot, 1, 0xe0 + (i << 2));
+ reg = read_pci_config(bus, slot, 1,
+ AMD_NB_F1_CONFIG_MAP_REG + (i << 2));
/* Check if that register is enabled for bus range */
if ((reg & 7) != 3)
@@ -113,10 +127,21 @@ static int __init early_fill_mp_bus_info(void)
info = alloc_pci_root_info(min_bus, max_bus, node, link);
}
+ /*
+ * The following code extracts routing information for use on old
+ * systems where Linux doesn't automatically use host bridge _CRS
+ * methods (or when the user specifies "pci=nocrs").
+ *
+ * We only do this through Fam11h, because _CRS should be enough on
+ * newer systems.
+ */
+ if (boot_cpu_data.x86 > 0x11)
+ return 0;
+
/* get the default node and link for left over res */
- reg = read_pci_config(bus, slot, 0, 0x60);
+ reg = read_pci_config(bus, slot, 0, AMD_NB_F0_NODE_ID);
def_node = (reg >> 8) & 0x07;
- reg = read_pci_config(bus, slot, 0, 0x64);
+ reg = read_pci_config(bus, slot, 0, AMD_NB_F0_UNIT_ID);
def_link = (reg >> 8) & 0x03;
memset(range, 0, sizeof(range));
@@ -363,7 +388,7 @@ static int __init pci_io_ecs_init(void)
int cpu;
/* assume all cpus from fam10h have IO ECS */
- if (boot_cpu_data.x86 < 0x10)
+ if (boot_cpu_data.x86 < 0x10)
return 0;
/* Try the PCI method first. */
@@ -387,7 +412,7 @@ static int __init amd_postcore_init(void)
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
return 0;
- early_fill_mp_bus_info();
+ early_root_info_init();
pci_io_ecs_init();
return 0;
diff --git a/arch/x86/pci/broadcom_bus.c b/arch/x86/pci/broadcom_bus.c
index 614392ced7d6..bb461cfd01ab 100644
--- a/arch/x86/pci/broadcom_bus.c
+++ b/arch/x86/pci/broadcom_bus.c
@@ -60,8 +60,8 @@ static void __init cnb20le_res(u8 bus, u8 slot, u8 func)
word1 = read_pci_config_16(bus, slot, func, 0xc4);
word2 = read_pci_config_16(bus, slot, func, 0xc6);
if (word1 != word2) {
- res.start = (word1 << 16) | 0x0000;
- res.end = (word2 << 16) | 0xffff;
+ res.start = ((resource_size_t) word1 << 16) | 0x0000;
+ res.end = ((resource_size_t) word2 << 16) | 0xffff;
res.flags = IORESOURCE_MEM | IORESOURCE_PREFETCH;
update_res(info, res.start, res.end, res.flags, 0);
}
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 94ae9ae9574f..b5e60268d93f 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -6,6 +6,7 @@
#include <linux/dmi.h>
#include <linux/pci.h>
#include <linux/vgaarb.h>
+#include <asm/hpet.h>
#include <asm/pci_x86.h>
static void pci_fixup_i450nx(struct pci_dev *d)
@@ -337,9 +338,7 @@ static void pci_fixup_video(struct pci_dev *pdev)
* type BRIDGE, or CARDBUS. Host to PCI controllers use
* PCI header type NORMAL.
*/
- if (bridge
- && ((bridge->hdr_type == PCI_HEADER_TYPE_BRIDGE)
- || (bridge->hdr_type == PCI_HEADER_TYPE_CARDBUS))) {
+ if (bridge && (pci_is_bridge(bridge))) {
pci_read_config_word(bridge, PCI_BRIDGE_CONTROL,
&config);
if (!(config & PCI_BRIDGE_CTL_VGA))
@@ -526,6 +525,19 @@ static void sb600_disable_hpet_bar(struct pci_dev *dev)
}
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ATI, 0x4385, sb600_disable_hpet_bar);
+#ifdef CONFIG_HPET_TIMER
+static void sb600_hpet_quirk(struct pci_dev *dev)
+{
+ struct resource *r = &dev->resource[1];
+
+ if (r->flags & IORESOURCE_MEM && r->start == hpet_address) {
+ r->flags |= IORESOURCE_PCI_FIXED;
+ dev_info(&dev->dev, "reg 0x14 contains HPET; making it immovable\n");
+ }
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, 0x4385, sb600_hpet_quirk);
+#endif
+
/*
* Twinhead H12Y needs us to block out a region otherwise we map devices
* there and any access kills the box.
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index db6b1ab43255..a19ed92e74e4 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -271,11 +271,16 @@ static void pcibios_allocate_dev_resources(struct pci_dev *dev, int pass)
"BAR %d: reserving %pr (d=%d, p=%d)\n",
idx, r, disabled, pass);
if (pci_claim_resource(dev, idx) < 0) {
- /* We'll assign a new address later */
- pcibios_save_fw_addr(dev,
- idx, r->start);
- r->end -= r->start;
- r->start = 0;
+ if (r->flags & IORESOURCE_PCI_FIXED) {
+ dev_info(&dev->dev, "BAR %d %pR is immovable\n",
+ idx, r);
+ } else {
+ /* We'll assign a new address later */
+ pcibios_save_fw_addr(dev,
+ idx, r->start);
+ r->end -= r->start;
+ r->start = 0;
+ }
}
}
}
@@ -356,6 +361,12 @@ static int __init pcibios_assign_resources(void)
return 0;
}
+/**
+ * called in fs_initcall (one below subsys_initcall),
+ * give a chance for motherboard reserve resources
+ */
+fs_initcall(pcibios_assign_resources);
+
void pcibios_resource_survey_bus(struct pci_bus *bus)
{
dev_printk(KERN_DEBUG, &bus->dev, "Allocating resources\n");
@@ -392,12 +403,6 @@ void __init pcibios_resource_survey(void)
ioapic_insert_resources();
}
-/**
- * called in fs_initcall (one below subsys_initcall),
- * give a chance for motherboard reserve resources
- */
-fs_initcall(pcibios_assign_resources);
-
static const struct vm_operations_struct pci_mmap_ops = {
.access = generic_access_phys,
};
diff --git a/arch/x86/platform/efi/early_printk.c b/arch/x86/platform/efi/early_printk.c
index 81b506d5befd..524142117296 100644
--- a/arch/x86/platform/efi/early_printk.c
+++ b/arch/x86/platform/efi/early_printk.c
@@ -14,48 +14,92 @@
static const struct font_desc *font;
static u32 efi_x, efi_y;
+static void *efi_fb;
+static bool early_efi_keep;
-static __init void early_efi_clear_scanline(unsigned int y)
+/*
+ * efi earlyprintk need use early_ioremap to map the framebuffer.
+ * But early_ioremap is not usable for earlyprintk=efi,keep, ioremap should
+ * be used instead. ioremap will be available after paging_init() which is
+ * earlier than initcall callbacks. Thus adding this early initcall function
+ * early_efi_map_fb to map the whole efi framebuffer.
+ */
+static __init int early_efi_map_fb(void)
{
- unsigned long base, *dst;
- u16 len;
+ unsigned long base, size;
+
+ if (!early_efi_keep)
+ return 0;
base = boot_params.screen_info.lfb_base;
- len = boot_params.screen_info.lfb_linelength;
+ size = boot_params.screen_info.lfb_size;
+ efi_fb = ioremap(base, size);
+
+ return efi_fb ? 0 : -ENOMEM;
+}
+early_initcall(early_efi_map_fb);
+
+/*
+ * early_efi_map maps efi framebuffer region [start, start + len -1]
+ * In case earlyprintk=efi,keep we have the whole framebuffer mapped already
+ * so just return the offset efi_fb + start.
+ */
+static __init_refok void *early_efi_map(unsigned long start, unsigned long len)
+{
+ unsigned long base;
+
+ base = boot_params.screen_info.lfb_base;
+
+ if (efi_fb)
+ return (efi_fb + start);
+ else
+ return early_ioremap(base + start, len);
+}
- dst = early_ioremap(base + y*len, len);
+static __init_refok void early_efi_unmap(void *addr, unsigned long len)
+{
+ if (!efi_fb)
+ early_iounmap(addr, len);
+}
+
+static void early_efi_clear_scanline(unsigned int y)
+{
+ unsigned long *dst;
+ u16 len;
+
+ len = boot_params.screen_info.lfb_linelength;
+ dst = early_efi_map(y*len, len);
if (!dst)
return;
memset(dst, 0, len);
- early_iounmap(dst, len);
+ early_efi_unmap(dst, len);
}
-static __init void early_efi_scroll_up(void)
+static void early_efi_scroll_up(void)
{
- unsigned long base, *dst, *src;
+ unsigned long *dst, *src;
u16 len;
u32 i, height;
- base = boot_params.screen_info.lfb_base;
len = boot_params.screen_info.lfb_linelength;
height = boot_params.screen_info.lfb_height;
for (i = 0; i < height - font->height; i++) {
- dst = early_ioremap(base + i*len, len);
+ dst = early_efi_map(i*len, len);
if (!dst)
return;
- src = early_ioremap(base + (i + font->height) * len, len);
+ src = early_efi_map((i + font->height) * len, len);
if (!src) {
- early_iounmap(dst, len);
+ early_efi_unmap(dst, len);
return;
}
memmove(dst, src, len);
- early_iounmap(src, len);
- early_iounmap(dst, len);
+ early_efi_unmap(src, len);
+ early_efi_unmap(dst, len);
}
}
@@ -79,16 +123,14 @@ static void early_efi_write_char(u32 *dst, unsigned char c, unsigned int h)
}
}
-static __init void
+static void
early_efi_write(struct console *con, const char *str, unsigned int num)
{
struct screen_info *si;
- unsigned long base;
unsigned int len;
const char *s;
void *dst;
- base = boot_params.screen_info.lfb_base;
si = &boot_params.screen_info;
len = si->lfb_linelength;
@@ -109,7 +151,7 @@ early_efi_write(struct console *con, const char *str, unsigned int num)
for (h = 0; h < font->height; h++) {
unsigned int n, x;
- dst = early_ioremap(base + (efi_y + h) * len, len);
+ dst = early_efi_map((efi_y + h) * len, len);
if (!dst)
return;
@@ -123,7 +165,7 @@ early_efi_write(struct console *con, const char *str, unsigned int num)
s++;
}
- early_iounmap(dst, len);
+ early_efi_unmap(dst, len);
}
num -= count;
@@ -179,6 +221,9 @@ static __init int early_efi_setup(struct console *con, char *options)
for (i = 0; i < (yres - efi_y) / font->height; i++)
early_efi_scroll_up();
+ /* early_console_register will unset CON_BOOT in case ,keep */
+ if (!(con->flags & CON_BOOT))
+ early_efi_keep = true;
return 0;
}
diff --git a/arch/x86/platform/olpc/olpc-xo1-pm.c b/arch/x86/platform/olpc/olpc-xo1-pm.c
index ff0174dda810..a9acde72d4ed 100644
--- a/arch/x86/platform/olpc/olpc-xo1-pm.c
+++ b/arch/x86/platform/olpc/olpc-xo1-pm.c
@@ -75,7 +75,7 @@ static int xo1_power_state_enter(suspend_state_t pm_state)
return 0;
}
-asmlinkage int xo1_do_sleep(u8 sleep_state)
+asmlinkage __visible int xo1_do_sleep(u8 sleep_state)
{
void *pgd_addr = __va(read_cr3());
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index 304fca20d96e..35e2bb6c0f37 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -23,7 +23,7 @@
extern __visible const void __nosave_begin, __nosave_end;
/* Defined in hibernate_asm_64.S */
-extern asmlinkage int restore_image(void);
+extern asmlinkage __visible int restore_image(void);
/*
* Address to jump to in the last phase of restore in order to get to the image
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 00348980a3a6..e1f220e3ca68 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -39,6 +39,7 @@
#ifdef CONFIG_X86_64
#define vdso_enabled sysctl_vsyscall32
#define arch_setup_additional_pages syscall32_setup_pages
+extern int sysctl_ldt16;
#endif
/*
@@ -249,6 +250,13 @@ static struct ctl_table abi_table2[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "ldt16",
+ .data = &sysctl_ldt16,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
{}
};
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 201d09a7c46b..f17b29210ac4 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1339,6 +1339,7 @@ xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
static struct notifier_block xen_panic_block = {
.notifier_call= xen_panic_event,
+ .priority = INT_MIN
};
int xen_panic_handler_init(void)
@@ -1515,7 +1516,7 @@ static void __init xen_pvh_early_guest_init(void)
}
/* First C function to be called on Xen boot */
-asmlinkage void __init xen_start_kernel(void)
+asmlinkage __visible void __init xen_start_kernel(void)
{
struct physdev_set_iopl set_iopl;
int rc;
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 08f763de26fe..a1207cb6472a 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -23,7 +23,7 @@ void xen_force_evtchn_callback(void)
(void)HYPERVISOR_xen_version(0, NULL);
}
-asmlinkage unsigned long xen_save_fl(void)
+asmlinkage __visible unsigned long xen_save_fl(void)
{
struct vcpu_info *vcpu;
unsigned long flags;
@@ -63,7 +63,7 @@ __visible void xen_restore_fl(unsigned long flags)
}
PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl);
-asmlinkage void xen_irq_disable(void)
+asmlinkage __visible void xen_irq_disable(void)
{
/* There's a one instruction preempt window here. We need to
make sure we're don't switch CPUs between getting the vcpu
@@ -74,7 +74,7 @@ asmlinkage void xen_irq_disable(void)
}
PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable);
-asmlinkage void xen_irq_enable(void)
+asmlinkage __visible void xen_irq_enable(void)
{
struct vcpu_info *vcpu;
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 86e02eabb640..6f6e15d28466 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2510,6 +2510,95 @@ void __init xen_hvm_init_mmu_ops(void)
}
#endif
+#ifdef CONFIG_XEN_PVH
+/*
+ * Map foreign gfn (fgfn), to local pfn (lpfn). This for the user
+ * space creating new guest on pvh dom0 and needing to map domU pages.
+ */
+static int xlate_add_to_p2m(unsigned long lpfn, unsigned long fgfn,
+ unsigned int domid)
+{
+ int rc, err = 0;
+ xen_pfn_t gpfn = lpfn;
+ xen_ulong_t idx = fgfn;
+
+ struct xen_add_to_physmap_range xatp = {
+ .domid = DOMID_SELF,
+ .foreign_domid = domid,
+ .size = 1,
+ .space = XENMAPSPACE_gmfn_foreign,
+ };
+ set_xen_guest_handle(xatp.idxs, &idx);
+ set_xen_guest_handle(xatp.gpfns, &gpfn);
+ set_xen_guest_handle(xatp.errs, &err);
+
+ rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &xatp);
+ if (rc < 0)
+ return rc;
+ return err;
+}
+
+static int xlate_remove_from_p2m(unsigned long spfn, int count)
+{
+ struct xen_remove_from_physmap xrp;
+ int i, rc;
+
+ for (i = 0; i < count; i++) {
+ xrp.domid = DOMID_SELF;
+ xrp.gpfn = spfn+i;
+ rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp);
+ if (rc)
+ break;
+ }
+ return rc;
+}
+
+struct xlate_remap_data {
+ unsigned long fgfn; /* foreign domain's gfn */
+ pgprot_t prot;
+ domid_t domid;
+ int index;
+ struct page **pages;
+};
+
+static int xlate_map_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr,
+ void *data)
+{
+ int rc;
+ struct xlate_remap_data *remap = data;
+ unsigned long pfn = page_to_pfn(remap->pages[remap->index++]);
+ pte_t pteval = pte_mkspecial(pfn_pte(pfn, remap->prot));
+
+ rc = xlate_add_to_p2m(pfn, remap->fgfn, remap->domid);
+ if (rc)
+ return rc;
+ native_set_pte(ptep, pteval);
+
+ return 0;
+}
+
+static int xlate_remap_gfn_range(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long mfn,
+ int nr, pgprot_t prot, unsigned domid,
+ struct page **pages)
+{
+ int err;
+ struct xlate_remap_data pvhdata;
+
+ BUG_ON(!pages);
+
+ pvhdata.fgfn = mfn;
+ pvhdata.prot = prot;
+ pvhdata.domid = domid;
+ pvhdata.index = 0;
+ pvhdata.pages = pages;
+ err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT,
+ xlate_map_pte_fn, &pvhdata);
+ flush_tlb_all();
+ return err;
+}
+#endif
+
#define REMAP_BATCH_SIZE 16
struct remap_data {
@@ -2522,7 +2611,7 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
unsigned long addr, void *data)
{
struct remap_data *rmd = data;
- pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot));
+ pte_t pte = pte_mkspecial(mfn_pte(rmd->mfn++, rmd->prot));
rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
rmd->mmu_update->val = pte_val_ma(pte);
@@ -2544,13 +2633,18 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
unsigned long range;
int err = 0;
- if (xen_feature(XENFEAT_auto_translated_physmap))
- return -EINVAL;
-
- prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP);
-
BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
+#ifdef CONFIG_XEN_PVH
+ /* We need to update the local page tables and the xen HAP */
+ return xlate_remap_gfn_range(vma, addr, mfn, nr, prot,
+ domid, pages);
+#else
+ return -EINVAL;
+#endif
+ }
+
rmd.mfn = mfn;
rmd.prot = prot;
@@ -2588,6 +2682,25 @@ int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
if (!pages || !xen_feature(XENFEAT_auto_translated_physmap))
return 0;
+#ifdef CONFIG_XEN_PVH
+ while (numpgs--) {
+ /*
+ * The mmu has already cleaned up the process mmu
+ * resources at this point (lookup_address will return
+ * NULL).
+ */
+ unsigned long pfn = page_to_pfn(pages[numpgs]);
+
+ xlate_remove_from_p2m(pfn, 1);
+ }
+ /*
+ * We don't need to flush tlbs because as part of
+ * xlate_remove_from_p2m, the hypervisor will do tlb flushes
+ * after removing the p2m entries from the EPT/NPT
+ */
+ return 0;
+#else
return -EINVAL;
+#endif
}
EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range);
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 85e5d78c9874..9bb3d82ffec8 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -36,7 +36,7 @@
* pfn_to_mfn(0xc0000)=0xc0000
*
* The benefit of this is, that we can assume for non-RAM regions (think
- * PCI BARs, or ACPI spaces), we can create mappings easily b/c we
+ * PCI BARs, or ACPI spaces), we can create mappings easily because we
* get the PFN value to match the MFN.
*
* For this to work efficiently we have one new page p2m_identity and
@@ -60,7 +60,7 @@
* There is also a digram of the P2M at the end that can help.
* Imagine your E820 looking as so:
*
- * 1GB 2GB
+ * 1GB 2GB 4GB
* /-------------------+---------\/----\ /----------\ /---+-----\
* | System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM |
* \-------------------+---------/\----/ \----------/ \---+-----/
@@ -77,9 +77,8 @@
* of the PFN and the end PFN (263424 and 512256 respectively). The first step
* is to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page
* covers 512^2 of page estate (1GB) and in case the start or end PFN is not
- * aligned on 512^2*PAGE_SIZE (1GB) we loop on aligned 1GB PFNs from start pfn
- * to end pfn. We reserve_brk top leaf pages if they are missing (means they
- * point to p2m_mid_missing).
+ * aligned on 512^2*PAGE_SIZE (1GB) we reserve_brk new middle and leaf pages as
+ * required to split any existing p2m_mid_missing middle pages.
*
* With the E820 example above, 263424 is not 1GB aligned so we allocate a
* reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000.
@@ -88,7 +87,7 @@
* Next stage is to determine if we need to do a more granular boundary check
* on the 4MB (or 2MB depending on architecture) off the start and end pfn's.
* We check if the start pfn and end pfn violate that boundary check, and if
- * so reserve_brk a middle (p2m[x][y]) leaf page. This way we have a much finer
+ * so reserve_brk a (p2m[x][y]) leaf page. This way we have a much finer
* granularity of setting which PFNs are missing and which ones are identity.
* In our example 263424 and 512256 both fail the check so we reserve_brk two
* pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing"
@@ -102,9 +101,10 @@
*
* The next step is to walk from the start pfn to the end pfn setting
* the IDENTITY_FRAME_BIT on each PFN. This is done in set_phys_range_identity.
- * If we find that the middle leaf is pointing to p2m_missing we can swap it
- * over to p2m_identity - this way covering 4MB (or 2MB) PFN space. At this
- * point we do not need to worry about boundary aligment (so no need to
+ * If we find that the middle entry is pointing to p2m_missing we can swap it
+ * over to p2m_identity - this way covering 4MB (or 2MB) PFN space (and
+ * similarly swapping p2m_mid_missing for p2m_mid_identity for larger regions).
+ * At this point we do not need to worry about boundary aligment (so no need to
* reserve_brk a middle page, figure out which PFNs are "missing" and which
* ones are identity), as that has been done earlier. If we find that the
* middle leaf is not occupied by p2m_identity or p2m_missing, we dereference
@@ -118,6 +118,9 @@
* considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511]
* contain the INVALID_P2M_ENTRY value and are considered "missing."
*
+ * Finally, the region beyond the end of of the E820 (4 GB in this example)
+ * is set to be identity (in case there are MMIO regions placed here).
+ *
* This is what the p2m ends up looking (for the E820 above) with this
* fabulous drawing:
*
@@ -129,21 +132,27 @@
* |-----| \ | [p2m_identity]+\\ | .... |
* | 2 |--\ \-------------------->| ... | \\ \----------------/
* |-----| \ \---------------/ \\
- * | 3 |\ \ \\ p2m_identity
- * |-----| \ \-------------------->/---------------\ /-----------------\
- * | .. +->+ | [p2m_identity]+-->| ~0, ~0, ~0, ... |
- * \-----/ / | [p2m_identity]+-->| ..., ~0 |
- * / /---------------\ | .... | \-----------------/
- * / | IDENTITY[@0] | /-+-[x], ~0, ~0.. |
- * / | IDENTITY[@256]|<----/ \---------------/
- * / | ~0, ~0, .... |
- * | \---------------/
- * |
- * p2m_mid_missing p2m_missing
- * /-----------------\ /------------\
- * | [p2m_missing] +---->| ~0, ~0, ~0 |
- * | [p2m_missing] +---->| ..., ~0 |
- * \-----------------/ \------------/
+ * | 3 |-\ \ \\ p2m_identity [1]
+ * |-----| \ \-------------------->/---------------\ /-----------------\
+ * | .. |\ | | [p2m_identity]+-->| ~0, ~0, ~0, ... |
+ * \-----/ | | | [p2m_identity]+-->| ..., ~0 |
+ * | | | .... | \-----------------/
+ * | | +-[x], ~0, ~0.. +\
+ * | | \---------------/ \
+ * | | \-> /---------------\
+ * | V p2m_mid_missing p2m_missing | IDENTITY[@0] |
+ * | /-----------------\ /------------\ | IDENTITY[@256]|
+ * | | [p2m_missing] +---->| ~0, ~0, ...| | ~0, ~0, .... |
+ * | | [p2m_missing] +---->| ..., ~0 | \---------------/
+ * | | ... | \------------/
+ * | \-----------------/
+ * |
+ * | p2m_mid_identity
+ * | /-----------------\
+ * \-->| [p2m_identity] +---->[1]
+ * | [p2m_identity] +---->[1]
+ * | ... |
+ * \-----------------/
*
* where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT)
*/
@@ -187,13 +196,15 @@ static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE);
static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE);
static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE);
+static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_identity, P2M_MID_PER_PAGE);
+static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_identity_mfn, P2M_MID_PER_PAGE);
RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
/* We might hit two boundary violations at the start and end, at max each
* boundary violation will require three middle nodes. */
-RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3);
+RESERVE_BRK(p2m_mid_extra, PAGE_SIZE * 2 * 3);
/* When we populate back during bootup, the amount of pages can vary. The
* max we have is seen is 395979, but that does not mean it can't be more.
@@ -242,20 +253,20 @@ static void p2m_top_mfn_p_init(unsigned long **top)
top[i] = p2m_mid_missing_mfn;
}
-static void p2m_mid_init(unsigned long **mid)
+static void p2m_mid_init(unsigned long **mid, unsigned long *leaf)
{
unsigned i;
for (i = 0; i < P2M_MID_PER_PAGE; i++)
- mid[i] = p2m_missing;
+ mid[i] = leaf;
}
-static void p2m_mid_mfn_init(unsigned long *mid)
+static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf)
{
unsigned i;
for (i = 0; i < P2M_MID_PER_PAGE; i++)
- mid[i] = virt_to_mfn(p2m_missing);
+ mid[i] = virt_to_mfn(leaf);
}
static void p2m_init(unsigned long *p2m)
@@ -286,7 +297,9 @@ void __ref xen_build_mfn_list_list(void)
/* Pre-initialize p2m_top_mfn to be completely missing */
if (p2m_top_mfn == NULL) {
p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
- p2m_mid_mfn_init(p2m_mid_missing_mfn);
+ p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing);
+ p2m_mid_identity_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
+ p2m_mid_mfn_init(p2m_mid_identity_mfn, p2m_identity);
p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_top_mfn_p_init(p2m_top_mfn_p);
@@ -295,7 +308,8 @@ void __ref xen_build_mfn_list_list(void)
p2m_top_mfn_init(p2m_top_mfn);
} else {
/* Reinitialise, mfn's all change after migration */
- p2m_mid_mfn_init(p2m_mid_missing_mfn);
+ p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing);
+ p2m_mid_mfn_init(p2m_mid_identity_mfn, p2m_identity);
}
for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) {
@@ -327,7 +341,7 @@ void __ref xen_build_mfn_list_list(void)
* it too late.
*/
mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
- p2m_mid_mfn_init(mid_mfn_p);
+ p2m_mid_mfn_init(mid_mfn_p, p2m_missing);
p2m_top_mfn_p[topidx] = mid_mfn_p;
}
@@ -365,16 +379,17 @@ void __init xen_build_dynamic_phys_to_machine(void)
p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_init(p2m_missing);
+ p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE);
+ p2m_init(p2m_identity);
p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE);
- p2m_mid_init(p2m_mid_missing);
+ p2m_mid_init(p2m_mid_missing, p2m_missing);
+ p2m_mid_identity = extend_brk(PAGE_SIZE, PAGE_SIZE);
+ p2m_mid_init(p2m_mid_identity, p2m_identity);
p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_top_init(p2m_top);
- p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE);
- p2m_init(p2m_identity);
-
/*
* The domain builder gives us a pre-constructed p2m array in
* mfn_list for all the pages initially given to us, so we just
@@ -386,7 +401,7 @@ void __init xen_build_dynamic_phys_to_machine(void)
if (p2m_top[topidx] == p2m_mid_missing) {
unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
- p2m_mid_init(mid);
+ p2m_mid_init(mid, p2m_missing);
p2m_top[topidx] = mid;
}
@@ -492,7 +507,7 @@ unsigned long get_phys_to_machine(unsigned long pfn)
unsigned topidx, mididx, idx;
if (unlikely(pfn >= MAX_P2M_PFN))
- return INVALID_P2M_ENTRY;
+ return IDENTITY_FRAME(pfn);
topidx = p2m_top_index(pfn);
mididx = p2m_mid_index(pfn);
@@ -545,7 +560,7 @@ static bool alloc_p2m(unsigned long pfn)
if (!mid)
return false;
- p2m_mid_init(mid);
+ p2m_mid_init(mid, p2m_missing);
if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing)
free_p2m_page(mid);
@@ -565,7 +580,7 @@ static bool alloc_p2m(unsigned long pfn)
if (!mid_mfn)
return false;
- p2m_mid_mfn_init(mid_mfn);
+ p2m_mid_mfn_init(mid_mfn, p2m_missing);
missing_mfn = virt_to_mfn(p2m_mid_missing_mfn);
mid_mfn_mfn = virt_to_mfn(mid_mfn);
@@ -596,7 +611,7 @@ static bool alloc_p2m(unsigned long pfn)
return true;
}
-static bool __init early_alloc_p2m_middle(unsigned long pfn, bool check_boundary)
+static bool __init early_alloc_p2m(unsigned long pfn, bool check_boundary)
{
unsigned topidx, mididx, idx;
unsigned long *p2m;
@@ -638,7 +653,7 @@ static bool __init early_alloc_p2m_middle(unsigned long pfn, bool check_boundary
return true;
}
-static bool __init early_alloc_p2m(unsigned long pfn)
+static bool __init early_alloc_p2m_middle(unsigned long pfn)
{
unsigned topidx = p2m_top_index(pfn);
unsigned long *mid_mfn_p;
@@ -649,7 +664,7 @@ static bool __init early_alloc_p2m(unsigned long pfn)
if (mid == p2m_mid_missing) {
mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
- p2m_mid_init(mid);
+ p2m_mid_init(mid, p2m_missing);
p2m_top[topidx] = mid;
@@ -658,12 +673,12 @@ static bool __init early_alloc_p2m(unsigned long pfn)
/* And the save/restore P2M tables.. */
if (mid_mfn_p == p2m_mid_missing_mfn) {
mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
- p2m_mid_mfn_init(mid_mfn_p);
+ p2m_mid_mfn_init(mid_mfn_p, p2m_missing);
p2m_top_mfn_p[topidx] = mid_mfn_p;
p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p);
/* Note: we don't set mid_mfn_p[midix] here,
- * look in early_alloc_p2m_middle */
+ * look in early_alloc_p2m() */
}
return true;
}
@@ -739,7 +754,7 @@ found:
/* This shouldn't happen */
if (WARN_ON(p2m_top[topidx] == p2m_mid_missing))
- early_alloc_p2m(set_pfn);
+ early_alloc_p2m_middle(set_pfn);
if (WARN_ON(p2m_top[topidx][mididx] != p2m_missing))
return false;
@@ -754,13 +769,13 @@ found:
bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{
if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
- if (!early_alloc_p2m(pfn))
+ if (!early_alloc_p2m_middle(pfn))
return false;
if (early_can_reuse_p2m_middle(pfn, mfn))
return __set_phys_to_machine(pfn, mfn);
- if (!early_alloc_p2m_middle(pfn, false /* boundary crossover OK!*/))
+ if (!early_alloc_p2m(pfn, false /* boundary crossover OK!*/))
return false;
if (!__set_phys_to_machine(pfn, mfn))
@@ -769,12 +784,30 @@ bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn)
return true;
}
+
+static void __init early_split_p2m(unsigned long pfn)
+{
+ unsigned long mididx, idx;
+
+ mididx = p2m_mid_index(pfn);
+ idx = p2m_index(pfn);
+
+ /*
+ * Allocate new middle and leaf pages if this pfn lies in the
+ * middle of one.
+ */
+ if (mididx || idx)
+ early_alloc_p2m_middle(pfn);
+ if (idx)
+ early_alloc_p2m(pfn, false);
+}
+
unsigned long __init set_phys_range_identity(unsigned long pfn_s,
unsigned long pfn_e)
{
unsigned long pfn;
- if (unlikely(pfn_s >= MAX_P2M_PFN || pfn_e >= MAX_P2M_PFN))
+ if (unlikely(pfn_s >= MAX_P2M_PFN))
return 0;
if (unlikely(xen_feature(XENFEAT_auto_translated_physmap)))
@@ -783,19 +816,30 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s,
if (pfn_s > pfn_e)
return 0;
- for (pfn = (pfn_s & ~(P2M_MID_PER_PAGE * P2M_PER_PAGE - 1));
- pfn < ALIGN(pfn_e, (P2M_MID_PER_PAGE * P2M_PER_PAGE));
- pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE)
- {
- WARN_ON(!early_alloc_p2m(pfn));
- }
+ if (pfn_e > MAX_P2M_PFN)
+ pfn_e = MAX_P2M_PFN;
- early_alloc_p2m_middle(pfn_s, true);
- early_alloc_p2m_middle(pfn_e, true);
+ early_split_p2m(pfn_s);
+ early_split_p2m(pfn_e);
+
+ for (pfn = pfn_s; pfn < pfn_e;) {
+ unsigned topidx = p2m_top_index(pfn);
+ unsigned mididx = p2m_mid_index(pfn);
- for (pfn = pfn_s; pfn < pfn_e; pfn++)
if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn)))
break;
+ pfn++;
+
+ /*
+ * If the PFN was set to a middle or leaf identity
+ * page the remainder must also be identity, so skip
+ * ahead to the next middle or leaf entry.
+ */
+ if (p2m_top[topidx] == p2m_mid_identity)
+ pfn = ALIGN(pfn, P2M_MID_PER_PAGE * P2M_PER_PAGE);
+ else if (p2m_top[topidx][mididx] == p2m_identity)
+ pfn = ALIGN(pfn, P2M_PER_PAGE);
+ }
if (!WARN((pfn - pfn_s) != (pfn_e - pfn_s),
"Identity mapping failed. We are %ld short of 1-1 mappings!\n",
@@ -825,8 +869,22 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
/* For sparse holes were the p2m leaf has real PFN along with
* PCI holes, stick in the PFN as the MFN value.
+ *
+ * set_phys_range_identity() will have allocated new middle
+ * and leaf pages as required so an existing p2m_mid_missing
+ * or p2m_missing mean that whole range will be identity so
+ * these can be switched to p2m_mid_identity or p2m_identity.
*/
if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) {
+ if (p2m_top[topidx] == p2m_mid_identity)
+ return true;
+
+ if (p2m_top[topidx] == p2m_mid_missing) {
+ WARN_ON(cmpxchg(&p2m_top[topidx], p2m_mid_missing,
+ p2m_mid_identity) != p2m_mid_missing);
+ return true;
+ }
+
if (p2m_top[topidx][mididx] == p2m_identity)
return true;
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 0982233b9b84..210426a26cc0 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -89,10 +89,10 @@ static void __init xen_add_extra_mem(u64 start, u64 size)
for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) {
unsigned long mfn = pfn_to_mfn(pfn);
- if (WARN(mfn == pfn, "Trying to over-write 1-1 mapping (pfn: %lx)\n", pfn))
+ if (WARN_ONCE(mfn == pfn, "Trying to over-write 1-1 mapping (pfn: %lx)\n", pfn))
continue;
- WARN(mfn != INVALID_P2M_ENTRY, "Trying to remove %lx which has %lx mfn!\n",
- pfn, mfn);
+ WARN_ONCE(mfn != INVALID_P2M_ENTRY, "Trying to remove %lx which has %lx mfn!\n",
+ pfn, mfn);
__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
}
@@ -469,6 +469,15 @@ char * __init xen_memory_setup(void)
}
/*
+ * Set the rest as identity mapped, in case PCI BARs are
+ * located here.
+ *
+ * PFNs above MAX_P2M_PFN are considered identity mapped as
+ * well.
+ */
+ set_phys_range_identity(map[i-1].addr / PAGE_SIZE, ~0ul);
+
+ /*
* In domU, the ISA region is normal, usable memory, but we
* reserve ISA memory anyway because too many things poke
* about in there.
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 45329c8c226e..c4df9dbd63b7 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -12,8 +12,10 @@
#include "xen-ops.h"
#include "mmu.h"
-void xen_arch_pre_suspend(void)
+static void xen_pv_pre_suspend(void)
{
+ xen_mm_pin_all();
+
xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
xen_start_info->console.domU.mfn =
mfn_to_pfn(xen_start_info->console.domU.mfn);
@@ -26,7 +28,7 @@ void xen_arch_pre_suspend(void)
BUG();
}
-void xen_arch_hvm_post_suspend(int suspend_cancelled)
+static void xen_hvm_post_suspend(int suspend_cancelled)
{
#ifdef CONFIG_XEN_PVHVM
int cpu;
@@ -41,7 +43,7 @@ void xen_arch_hvm_post_suspend(int suspend_cancelled)
#endif
}
-void xen_arch_post_suspend(int suspend_cancelled)
+static void xen_pv_post_suspend(int suspend_cancelled)
{
xen_build_mfn_list_list();
@@ -60,6 +62,21 @@ void xen_arch_post_suspend(int suspend_cancelled)
xen_vcpu_restore();
}
+ xen_mm_unpin_all();
+}
+
+void xen_arch_pre_suspend(void)
+{
+ if (xen_pv_domain())
+ xen_pv_pre_suspend();
+}
+
+void xen_arch_post_suspend(int cancelled)
+{
+ if (xen_pv_domain())
+ xen_pv_post_suspend(cancelled);
+ else
+ xen_hvm_post_suspend(cancelled);
}
static void xen_vcpu_notify_restore(void *data)
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 1cb6f4c37300..c834d4b231f0 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -31,6 +31,8 @@ void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
void xen_reserve_top(void);
extern unsigned long xen_max_p2m_pfn;
+void xen_mm_pin_all(void);
+void xen_mm_unpin_all(void);
void xen_set_pat(u64);
char * __init xen_memory_setup(void);
OpenPOWER on IntegriCloud