summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/acpi/sleep.c10
-rw-r--r--arch/x86/kernel/amd_iommu.c2
-rw-r--r--arch/x86/kernel/amd_iommu_init.c2
-rw-r--r--arch/x86/kernel/aperture_64.c1
-rw-r--r--arch/x86/kernel/apic_32.c175
-rw-r--r--arch/x86/kernel/apic_64.c26
-rw-r--r--arch/x86/kernel/asm-offsets_64.c11
-rw-r--r--arch/x86/kernel/bios_uv.c48
-rw-r--r--arch/x86/kernel/cpu/amd.c2
-rw-r--r--arch/x86/kernel/cpu/amd_64.c2
-rw-r--r--arch/x86/kernel/cpu/bugs.c23
-rw-r--r--arch/x86/kernel/cpu/common_64.c15
-rw-r--r--arch/x86/kernel/cpu/intel.c10
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c7
-rw-r--r--arch/x86/kernel/cpu/mcheck/p4.c4
-rw-r--r--arch/x86/kernel/e820.c33
-rw-r--r--arch/x86/kernel/early-quirks.c5
-rw-r--r--arch/x86/kernel/entry_32.S24
-rw-r--r--arch/x86/kernel/entry_64.S120
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c23
-rw-r--r--arch/x86/kernel/head64.c11
-rw-r--r--arch/x86/kernel/head_64.S1
-rw-r--r--arch/x86/kernel/io_apic_32.c53
-rw-r--r--arch/x86/kernel/io_apic_64.c41
-rw-r--r--arch/x86/kernel/io_delay.c3
-rw-r--r--arch/x86/kernel/ipi.c6
-rw-r--r--arch/x86/kernel/irq_32.c7
-rw-r--r--arch/x86/kernel/kdebugfs.c8
-rw-r--r--arch/x86/kernel/kprobes.c1
-rw-r--r--arch/x86/kernel/kvmclock.c2
-rw-r--r--arch/x86/kernel/module_64.c10
-rw-r--r--arch/x86/kernel/mpparse.c208
-rw-r--r--arch/x86/kernel/nmi.c11
-rw-r--r--arch/x86/kernel/numaq_32.c197
-rw-r--r--arch/x86/kernel/paravirt.c5
-rw-r--r--arch/x86/kernel/pci-calgary_64.c2
-rw-r--r--arch/x86/kernel/pci-dma.c17
-rw-r--r--arch/x86/kernel/pci-gart_64.c1
-rw-r--r--arch/x86/kernel/pci-nommu.c2
-rw-r--r--arch/x86/kernel/pci-swiotlb_64.c2
-rw-r--r--arch/x86/kernel/process.c5
-rw-r--r--arch/x86/kernel/process_64.c56
-rw-r--r--arch/x86/kernel/ptrace.c151
-rw-r--r--arch/x86/kernel/reboot.c8
-rw-r--r--arch/x86/kernel/setup.c22
-rw-r--r--arch/x86/kernel/signal_32.c8
-rw-r--r--arch/x86/kernel/signal_64.c6
-rw-r--r--arch/x86/kernel/smpboot.c56
-rw-r--r--arch/x86/kernel/smpcommon_32.c1
-rw-r--r--arch/x86/kernel/step.c35
-rw-r--r--arch/x86/kernel/time_32.c1
-rw-r--r--arch/x86/kernel/traps_32.c118
-rw-r--r--arch/x86/kernel/traps_64.c48
-rw-r--r--arch/x86/kernel/visws_quirks.c42
-rw-r--r--arch/x86/kernel/vmi_32.c1
56 files changed, 906 insertions, 784 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index da140611bb57..b78a17b12810 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -102,6 +102,7 @@ obj-$(CONFIG_OLPC) += olpc.o
# 64 bit specific files
ifeq ($(CONFIG_X86_64),y)
obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
+ obj-y += bios_uv.o
obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
obj-$(CONFIG_AUDIT) += audit_64.o
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 868de3d5c39d..a3ddad18aaa3 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -9,6 +9,7 @@
#include <linux/bootmem.h>
#include <linux/dmi.h>
#include <linux/cpumask.h>
+#include <asm/segment.h>
#include "realmode/wakeup.h"
#include "sleep.h"
@@ -23,15 +24,6 @@ static unsigned long acpi_realmode;
static char temp_stack[10240];
#endif
-/* XXX: this macro should move to asm-x86/segment.h and be shared with the
- boot code... */
-#define GDT_ENTRY(flags, base, limit) \
- (((u64)(base & 0xff000000) << 32) | \
- ((u64)flags << 40) | \
- ((u64)(limit & 0x00ff0000) << 32) | \
- ((u64)(base & 0x00ffffff) << 16) | \
- ((u64)(limit & 0x0000ffff)))
-
/**
* acpi_save_state_mem - save kernel state
*
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 8c3deb027d3a..c25210e6ac88 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -23,7 +23,7 @@
#include <linux/scatterlist.h>
#include <linux/iommu-helper.h>
#include <asm/proto.h>
-#include <asm/gart.h>
+#include <asm/iommu.h>
#include <asm/amd_iommu_types.h>
#include <asm/amd_iommu.h>
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 7661b02d7208..c9d8ff2eb130 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -25,7 +25,7 @@
#include <asm/pci-direct.h>
#include <asm/amd_iommu_types.h>
#include <asm/amd_iommu.h>
-#include <asm/gart.h>
+#include <asm/iommu.h>
/*
* definitions for the ACPI scanning code
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 9f907806c1a5..44e21826db11 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -21,6 +21,7 @@
#include <linux/suspend.h>
#include <asm/e820.h>
#include <asm/io.h>
+#include <asm/iommu.h>
#include <asm/gart.h>
#include <asm/pci-direct.h>
#include <asm/dma.h>
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index a437d027f20b..d6c898358371 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -75,7 +75,7 @@ char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
/*
* Debug level, exported for io_apic.c
*/
-int apic_verbosity;
+unsigned int apic_verbosity;
int pic_mode;
@@ -177,7 +177,7 @@ void __cpuinit enable_NMI_through_LVT0(void)
/* Level triggered for 82489DX */
if (!lapic_is_integrated())
v |= APIC_LVT_LEVEL_TRIGGER;
- apic_write_around(APIC_LVT0, v);
+ apic_write(APIC_LVT0, v);
}
/**
@@ -212,9 +212,6 @@ int lapic_get_maxlvt(void)
* this function twice on the boot CPU, once with a bogus timeout
* value, second time for real. The other (noncalibrating) CPUs
* call this function only once, with the real, calibrated value.
- *
- * We do reads before writes even if unnecessary, to get around the
- * P5 APIC double write bug.
*/
static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
{
@@ -229,18 +226,18 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
if (!irqen)
lvtt_value |= APIC_LVT_MASKED;
- apic_write_around(APIC_LVTT, lvtt_value);
+ apic_write(APIC_LVTT, lvtt_value);
/*
* Divide PICLK by 16
*/
tmp_value = apic_read(APIC_TDCR);
- apic_write_around(APIC_TDCR, (tmp_value
- & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
- | APIC_TDR_DIV_16);
+ apic_write(APIC_TDCR,
+ (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
+ APIC_TDR_DIV_16);
if (!oneshot)
- apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
+ apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
}
/*
@@ -249,7 +246,7 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
static int lapic_next_event(unsigned long delta,
struct clock_event_device *evt)
{
- apic_write_around(APIC_TMICT, delta);
+ apic_write(APIC_TMICT, delta);
return 0;
}
@@ -278,7 +275,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
case CLOCK_EVT_MODE_SHUTDOWN:
v = apic_read(APIC_LVTT);
v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
- apic_write_around(APIC_LVTT, v);
+ apic_write(APIC_LVTT, v);
break;
case CLOCK_EVT_MODE_RESUME:
/* Nothing to do here */
@@ -372,12 +369,7 @@ static void __init lapic_cal_handler(struct clock_event_device *dev)
}
}
-/*
- * Setup the boot APIC
- *
- * Calibrate and verify the result.
- */
-void __init setup_boot_APIC_clock(void)
+static int __init calibrate_APIC_clock(void)
{
struct clock_event_device *levt = &__get_cpu_var(lapic_events);
const long pm_100ms = PMTMR_TICKS_PER_SEC/10;
@@ -387,24 +379,6 @@ void __init setup_boot_APIC_clock(void)
long delta, deltapm;
int pm_referenced = 0;
- /*
- * The local apic timer can be disabled via the kernel
- * commandline or from the CPU detection code. Register the lapic
- * timer as a dummy clock event source on SMP systems, so the
- * broadcast mechanism is used. On UP systems simply ignore it.
- */
- if (local_apic_timer_disabled) {
- /* No broadcast on UP ! */
- if (num_possible_cpus() > 1) {
- lapic_clockevent.mult = 1;
- setup_APIC_timer();
- }
- return;
- }
-
- apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
- "calibrating APIC timer ...\n");
-
local_irq_disable();
/* Replace the global interrupt handler */
@@ -489,8 +463,6 @@ void __init setup_boot_APIC_clock(void)
calibration_result / (1000000 / HZ),
calibration_result % (1000000 / HZ));
- local_apic_timer_verify_ok = 1;
-
/*
* Do a sanity check on the APIC calibration result
*/
@@ -498,12 +470,11 @@ void __init setup_boot_APIC_clock(void)
local_irq_enable();
printk(KERN_WARNING
"APIC frequency too slow, disabling apic timer\n");
- /* No broadcast on UP ! */
- if (num_possible_cpus() > 1)
- setup_APIC_timer();
- return;
+ return -1;
}
+ local_apic_timer_verify_ok = 1;
+
/* We trust the pm timer based calibration */
if (!pm_referenced) {
apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
@@ -543,22 +514,55 @@ void __init setup_boot_APIC_clock(void)
if (!local_apic_timer_verify_ok) {
printk(KERN_WARNING
"APIC timer disabled due to verification failure.\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Setup the boot APIC
+ *
+ * Calibrate and verify the result.
+ */
+void __init setup_boot_APIC_clock(void)
+{
+ /*
+ * The local apic timer can be disabled via the kernel
+ * commandline or from the CPU detection code. Register the lapic
+ * timer as a dummy clock event source on SMP systems, so the
+ * broadcast mechanism is used. On UP systems simply ignore it.
+ */
+ if (local_apic_timer_disabled) {
/* No broadcast on UP ! */
- if (num_possible_cpus() == 1)
- return;
- } else {
- /*
- * If nmi_watchdog is set to IO_APIC, we need the
- * PIT/HPET going. Otherwise register lapic as a dummy
- * device.
- */
- if (nmi_watchdog != NMI_IO_APIC)
- lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
- else
- printk(KERN_WARNING "APIC timer registered as dummy,"
- " due to nmi_watchdog=%d!\n", nmi_watchdog);
+ if (num_possible_cpus() > 1) {
+ lapic_clockevent.mult = 1;
+ setup_APIC_timer();
+ }
+ return;
}
+ apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
+ "calibrating APIC timer ...\n");
+
+ if (calibrate_APIC_clock()) {
+ /* No broadcast on UP ! */
+ if (num_possible_cpus() > 1)
+ setup_APIC_timer();
+ return;
+ }
+
+ /*
+ * If nmi_watchdog is set to IO_APIC, we need the
+ * PIT/HPET going. Otherwise register lapic as a dummy
+ * device.
+ */
+ if (nmi_watchdog != NMI_IO_APIC)
+ lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
+ else
+ printk(KERN_WARNING "APIC timer registered as dummy,"
+ " due to nmi_watchdog=%d!\n", nmi_watchdog);
+
/* Setup the lapic or request the broadcast */
setup_APIC_timer();
}
@@ -693,44 +697,44 @@ void clear_local_APIC(void)
*/
if (maxlvt >= 3) {
v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
- apic_write_around(APIC_LVTERR, v | APIC_LVT_MASKED);
+ apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
}
/*
* Careful: we have to set masks only first to deassert
* any level-triggered sources.
*/
v = apic_read(APIC_LVTT);
- apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED);
+ apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
v = apic_read(APIC_LVT0);
- apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
+ apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
v = apic_read(APIC_LVT1);
- apic_write_around(APIC_LVT1, v | APIC_LVT_MASKED);
+ apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
if (maxlvt >= 4) {
v = apic_read(APIC_LVTPC);
- apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED);
+ apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
}
/* lets not touch this if we didn't frob it */
#ifdef CONFIG_X86_MCE_P4THERMAL
if (maxlvt >= 5) {
v = apic_read(APIC_LVTTHMR);
- apic_write_around(APIC_LVTTHMR, v | APIC_LVT_MASKED);
+ apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
}
#endif
/*
* Clean APIC state for other OSs:
*/
- apic_write_around(APIC_LVTT, APIC_LVT_MASKED);
- apic_write_around(APIC_LVT0, APIC_LVT_MASKED);
- apic_write_around(APIC_LVT1, APIC_LVT_MASKED);
+ apic_write(APIC_LVTT, APIC_LVT_MASKED);
+ apic_write(APIC_LVT0, APIC_LVT_MASKED);
+ apic_write(APIC_LVT1, APIC_LVT_MASKED);
if (maxlvt >= 3)
- apic_write_around(APIC_LVTERR, APIC_LVT_MASKED);
+ apic_write(APIC_LVTERR, APIC_LVT_MASKED);
if (maxlvt >= 4)
- apic_write_around(APIC_LVTPC, APIC_LVT_MASKED);
+ apic_write(APIC_LVTPC, APIC_LVT_MASKED);
#ifdef CONFIG_X86_MCE_P4THERMAL
if (maxlvt >= 5)
- apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED);
+ apic_write(APIC_LVTTHMR, APIC_LVT_MASKED);
#endif
/* Integrated APIC (!82489DX) ? */
if (lapic_is_integrated()) {
@@ -756,7 +760,7 @@ void disable_local_APIC(void)
*/
value = apic_read(APIC_SPIV);
value &= ~APIC_SPIV_APIC_ENABLED;
- apic_write_around(APIC_SPIV, value);
+ apic_write(APIC_SPIV, value);
/*
* When LAPIC was disabled by the BIOS and enabled by the kernel,
@@ -865,8 +869,8 @@ void __init sync_Arb_IDs(void)
apic_wait_icr_idle();
apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
- apic_write_around(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG
- | APIC_DM_INIT);
+ apic_write(APIC_ICR,
+ APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT);
}
/*
@@ -902,16 +906,16 @@ void __init init_bsp_APIC(void)
else
value |= APIC_SPIV_FOCUS_DISABLED;
value |= SPURIOUS_APIC_VECTOR;
- apic_write_around(APIC_SPIV, value);
+ apic_write(APIC_SPIV, value);
/*
* Set up the virtual wire mode.
*/
- apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
+ apic_write(APIC_LVT0, APIC_DM_EXTINT);
value = APIC_DM_NMI;
if (!lapic_is_integrated()) /* 82489DX */
value |= APIC_LVT_LEVEL_TRIGGER;
- apic_write_around(APIC_LVT1, value);
+ apic_write(APIC_LVT1, value);
}
static void __cpuinit lapic_setup_esr(void)
@@ -926,7 +930,7 @@ static void __cpuinit lapic_setup_esr(void)
/* enables sending errors */
value = ERROR_APIC_VECTOR;
- apic_write_around(APIC_LVTERR, value);
+ apic_write(APIC_LVTERR, value);
/*
* spec says clear errors after enabling vector.
*/
@@ -989,7 +993,7 @@ void __cpuinit setup_local_APIC(void)
*/
value = apic_read(APIC_TASKPRI);
value &= ~APIC_TPRI_MASK;
- apic_write_around(APIC_TASKPRI, value);
+ apic_write(APIC_TASKPRI, value);
/*
* After a crash, we no longer service the interrupts and a pending
@@ -1047,7 +1051,7 @@ void __cpuinit setup_local_APIC(void)
* Set spurious IRQ vector
*/
value |= SPURIOUS_APIC_VECTOR;
- apic_write_around(APIC_SPIV, value);
+ apic_write(APIC_SPIV, value);
/*
* Set up LVT0, LVT1:
@@ -1069,7 +1073,7 @@ void __cpuinit setup_local_APIC(void)
apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n",
smp_processor_id());
}
- apic_write_around(APIC_LVT0, value);
+ apic_write(APIC_LVT0, value);
/*
* only the BP should see the LINT1 NMI signal, obviously.
@@ -1080,7 +1084,7 @@ void __cpuinit setup_local_APIC(void)
value = APIC_DM_NMI | APIC_LVT_MASKED;
if (!integrated) /* 82489DX */
value |= APIC_LVT_LEVEL_TRIGGER;
- apic_write_around(APIC_LVT1, value);
+ apic_write(APIC_LVT1, value);
}
void __cpuinit end_local_APIC_setup(void)
@@ -1091,7 +1095,7 @@ void __cpuinit end_local_APIC_setup(void)
/* Disable the local apic timer */
value = apic_read(APIC_LVTT);
value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
- apic_write_around(APIC_LVTT, value);
+ apic_write(APIC_LVTT, value);
setup_apic_nmi_watchdog(NULL);
apic_pm_activate();
@@ -1214,9 +1218,6 @@ int apic_version[MAX_APICS];
int __init APIC_init_uniprocessor(void)
{
- if (disable_apic)
- clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
-
if (!smp_found_config && !cpu_has_apic)
return -1;
@@ -1419,7 +1420,7 @@ void disconnect_bsp_APIC(int virt_wire_setup)
value &= ~APIC_VECTOR_MASK;
value |= APIC_SPIV_APIC_ENABLED;
value |= 0xf;
- apic_write_around(APIC_SPIV, value);
+ apic_write(APIC_SPIV, value);
if (!virt_wire_setup) {
/*
@@ -1432,10 +1433,10 @@ void disconnect_bsp_APIC(int virt_wire_setup)
APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
- apic_write_around(APIC_LVT0, value);
+ apic_write(APIC_LVT0, value);
} else {
/* Disable LVT0 */
- apic_write_around(APIC_LVT0, APIC_LVT_MASKED);
+ apic_write(APIC_LVT0, APIC_LVT_MASKED);
}
/*
@@ -1449,7 +1450,7 @@ void disconnect_bsp_APIC(int virt_wire_setup)
APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
- apic_write_around(APIC_LVT1, value);
+ apic_write(APIC_LVT1, value);
}
}
@@ -1700,7 +1701,7 @@ early_param("lapic", parse_lapic);
static int __init parse_nolapic(char *arg)
{
disable_apic = 1;
- clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
+ setup_clear_cpu_cap(X86_FEATURE_APIC);
return 0;
}
early_param("nolapic", parse_nolapic);
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 1e3d32e27c14..7f1f030da7ee 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -54,7 +54,7 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
/*
* Debug level, exported for io_apic.c
*/
-int apic_verbosity;
+unsigned int apic_verbosity;
/* Have we found an MP table */
int smp_found_config;
@@ -314,7 +314,7 @@ static void setup_APIC_timer(void)
#define TICK_COUNT 100000000
-static void __init calibrate_APIC_clock(void)
+static int __init calibrate_APIC_clock(void)
{
unsigned apic, apic_start;
unsigned long tsc, tsc_start;
@@ -368,6 +368,17 @@ static void __init calibrate_APIC_clock(void)
clockevent_delta2ns(0xF, &lapic_clockevent);
calibration_result = result / HZ;
+
+ /*
+ * Do a sanity check on the APIC calibration result
+ */
+ if (calibration_result < (1000000 / HZ)) {
+ printk(KERN_WARNING
+ "APIC frequency too slow, disabling apic timer\n");
+ return -1;
+ }
+
+ return 0;
}
/*
@@ -394,14 +405,7 @@ void __init setup_boot_APIC_clock(void)
}
printk(KERN_INFO "Using local APIC timer interrupts.\n");
- calibrate_APIC_clock();
-
- /*
- * Do a sanity check on the APIC calibration result
- */
- if (calibration_result < (1000000 / HZ)) {
- printk(KERN_WARNING
- "APIC frequency too slow, disabling apic timer\n");
+ if (calibrate_APIC_clock()) {
/* No broadcast on UP ! */
if (num_possible_cpus() > 1)
setup_APIC_timer();
@@ -1337,7 +1341,7 @@ early_param("apic", apic_set_verbosity);
static __init int setup_disableapic(char *str)
{
disable_apic = 1;
- clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
+ setup_clear_cpu_cap(X86_FEATURE_APIC);
return 0;
}
early_param("disableapic", setup_disableapic);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index bacf5deeec2d..aa89387006fe 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -18,6 +18,8 @@
#include <asm/ia32.h>
#include <asm/bootparam.h>
+#include <xen/interface/xen.h>
+
#define __NO_STUBS 1
#undef __SYSCALL
#undef _ASM_X86_64_UNISTD_H_
@@ -131,5 +133,14 @@ int main(void)
OFFSET(BP_loadflags, boot_params, hdr.loadflags);
OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
OFFSET(BP_version, boot_params, hdr.version);
+
+ BLANK();
+ DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
+#ifdef CONFIG_XEN
+ BLANK();
+ OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
+ OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
+#undef ENTRY
+#endif
return 0;
}
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
new file mode 100644
index 000000000000..c639bd55391c
--- /dev/null
+++ b/arch/x86/kernel/bios_uv.c
@@ -0,0 +1,48 @@
+/*
+ * BIOS run time interface routines.
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <asm/uv/bios.h>
+
+const char *
+x86_bios_strerror(long status)
+{
+ const char *str;
+ switch (status) {
+ case 0: str = "Call completed without error"; break;
+ case -1: str = "Not implemented"; break;
+ case -2: str = "Invalid argument"; break;
+ case -3: str = "Call completed with error"; break;
+ default: str = "Unknown BIOS status code"; break;
+ }
+ return str;
+}
+
+long
+x86_bios_freq_base(unsigned long which, unsigned long *ticks_per_second,
+ unsigned long *drift_info)
+{
+ struct uv_bios_retval isrv;
+
+ BIOS_CALL(isrv, BIOS_FREQ_BASE, which, 0, 0, 0, 0, 0, 0);
+ *ticks_per_second = isrv.v0;
+ *drift_info = isrv.v1;
+ return isrv.status;
+}
+EXPORT_SYMBOL_GPL(x86_bios_freq_base);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 81a07ca65d44..cae9cabc3031 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -24,8 +24,6 @@
extern void vide(void);
__asm__(".align 4\nvide: ret");
-int force_mwait __cpuinitdata;
-
static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
{
if (cpuid_eax(0x80000000) >= 0x80000007) {
diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c
index 7c36fb8a28d4..d1692b2a41ff 100644
--- a/arch/x86/kernel/cpu/amd_64.c
+++ b/arch/x86/kernel/cpu/amd_64.c
@@ -115,6 +115,8 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
/* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
if (c->x86_power & (1<<8))
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+
+ set_cpu_cap(c, X86_FEATURE_SYSCALL32);
}
static void __cpuinit init_amd(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 1b1c56bb338f..c9b58a806e85 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -131,13 +131,7 @@ static void __init check_popad(void)
* (for due to lack of "invlpg" and working WP on a i386)
* - In order to run on anything without a TSC, we need to be
* compiled for a i486.
- * - In order to support the local APIC on a buggy Pentium machine,
- * we need to be compiled with CONFIG_X86_GOOD_APIC disabled,
- * which happens implicitly if compiled for a Pentium or lower
- * (unless an advanced selection of CPU features is used) as an
- * otherwise config implies a properly working local APIC without
- * the need to do extra reads from the APIC.
-*/
+ */
static void __init check_config(void)
{
@@ -151,21 +145,6 @@ static void __init check_config(void)
if (boot_cpu_data.x86 == 3)
panic("Kernel requires i486+ for 'invlpg' and other features");
#endif
-
-/*
- * If we were told we had a good local APIC, check for buggy Pentia,
- * i.e. all B steppings and the C2 stepping of P54C when using their
- * integrated APIC (see 11AP erratum in "Pentium Processor
- * Specification Update").
- */
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_GOOD_APIC)
- if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL
- && cpu_has_apic
- && boot_cpu_data.x86 == 5
- && boot_cpu_data.x86_model == 2
- && (boot_cpu_data.x86_mask < 6 || boot_cpu_data.x86_mask == 11))
- panic("Kernel compiled for PMMX+, assumes a local APIC without the read-before-write bug!");
-#endif
}
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index 7b8cc72feb40..dd6e3f15017e 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -7,15 +7,13 @@
#include <linux/module.h>
#include <linux/kgdb.h>
#include <linux/topology.h>
-#include <linux/string.h>
#include <linux/delay.h>
#include <linux/smp.h>
-#include <linux/module.h>
#include <linux/percpu.h>
-#include <asm/processor.h>
#include <asm/i387.h>
#include <asm/msr.h>
#include <asm/io.h>
+#include <asm/linkage.h>
#include <asm/mmu_context.h>
#include <asm/mtrr.h>
#include <asm/mce.h>
@@ -305,7 +303,6 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
c->x86_capability[2] = cpuid_edx(0x80860001);
}
- c->extended_cpuid_level = cpuid_eax(0x80000000);
if (c->extended_cpuid_level >= 0x80000007)
c->x86_power = cpuid_edx(0x80000007);
@@ -316,18 +313,11 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
c->x86_phys_bits = eax & 0xff;
}
- /* Assume all 64-bit CPUs support 32-bit syscall */
- set_cpu_cap(c, X86_FEATURE_SYSCALL32);
-
if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
cpu_devs[c->x86_vendor]->c_early_init)
cpu_devs[c->x86_vendor]->c_early_init(c);
validate_pat_support(c);
-
- /* early_param could clear that, but recall get it set again */
- if (disable_apic)
- clear_cpu_cap(c, X86_FEATURE_APIC);
}
/*
@@ -517,8 +507,7 @@ void pda_init(int cpu)
}
char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
- DEBUG_STKSZ]
-__attribute__((section(".bss.page_aligned")));
+ DEBUG_STKSZ] __page_aligned_bss;
extern asmlinkage void ignore_sysret(void);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 70609efdf1da..b75f2569b8f8 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -227,6 +227,16 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
if (cpu_has_bts)
ds_init_intel(c);
+ /*
+ * See if we have a good local APIC by checking for buggy Pentia,
+ * i.e. all B steppings and the C2 stepping of P54C when using their
+ * integrated APIC (see 11AP erratum in "Pentium Processor
+ * Specification Update").
+ */
+ if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
+ (c->x86_mask < 0x6 || c->x86_mask == 0xb))
+ set_cpu_cap(c, X86_FEATURE_11AP);
+
#ifdef CONFIG_X86_NUMAQ
numaq_tsc_disable();
#endif
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 2c8afafa18e8..ff517f0b8cc4 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -780,15 +780,14 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
}
kobject_put(per_cpu(cache_kobject, cpu));
cpuid4_cache_sysfs_exit(cpu);
- break;
+ return retval;
}
kobject_uevent(&(this_object->kobj), KOBJ_ADD);
}
- if (!retval)
- cpu_set(cpu, cache_dev_map);
+ cpu_set(cpu, cache_dev_map);
kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD);
- return retval;
+ return 0;
}
static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
index eef001ad3bde..9b60fce09f75 100644
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@@ -102,7 +102,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
/* The temperature transition interrupt handler setup */
h = THERMAL_APIC_VECTOR; /* our delivery vector */
h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */
- apic_write_around(APIC_LVTTHMR, h);
+ apic_write(APIC_LVTTHMR, h);
rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
@@ -114,7 +114,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h);
l = apic_read(APIC_LVTTHMR);
- apic_write_around(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
+ apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
printk(KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
/* enable thermal throttle processing */
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 28c29180b380..9af89078f7bb 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -877,7 +877,8 @@ void __init early_res_to_bootmem(u64 start, u64 end)
for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++)
count++;
- printk(KERN_INFO "(%d early reservations) ==> bootmem\n", count);
+ printk(KERN_INFO "(%d early reservations) ==> bootmem [%010llx - %010llx]\n",
+ count, start, end);
for (i = 0; i < count; i++) {
struct early_res *r = &early_res[i];
printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i,
@@ -1298,11 +1299,6 @@ void __init e820_reserve_resources(void)
}
}
-/*
- * Non-standard memory setup can be specified via this quirk:
- */
-char * (*arch_memory_setup_quirk)(void);
-
char *__init default_machine_specific_memory_setup(void)
{
char *who = "BIOS-e820";
@@ -1343,8 +1339,8 @@ char *__init default_machine_specific_memory_setup(void)
char *__init __attribute__((weak)) machine_specific_memory_setup(void)
{
- if (arch_memory_setup_quirk) {
- char *who = arch_memory_setup_quirk();
+ if (x86_quirks->arch_memory_setup) {
+ char *who = x86_quirks->arch_memory_setup();
if (who)
return who;
@@ -1367,24 +1363,3 @@ void __init setup_memory_map(void)
printk(KERN_INFO "BIOS-provided physical RAM map:\n");
e820_print_map(who);
}
-
-#ifdef CONFIG_X86_64
-int __init arch_get_ram_range(int slot, u64 *addr, u64 *size)
-{
- int i;
-
- if (slot < 0 || slot >= e820.nr_map)
- return -1;
- for (i = slot; i < e820.nr_map; i++) {
- if (e820.map[i].type != E820_RAM)
- continue;
- break;
- }
- if (i == e820.nr_map || e820.map[i].addr > (max_pfn << PAGE_SHIFT))
- return -1;
- *addr = e820.map[i].addr;
- *size = min_t(u64, e820.map[i].size + e820.map[i].addr,
- max_pfn << PAGE_SHIFT) - *addr;
- return i + 1;
-}
-#endif
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index a0e11c0cc872..4353cf5e6fac 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -16,10 +16,7 @@
#include <asm/dma.h>
#include <asm/io_apic.h>
#include <asm/apic.h>
-
-#ifdef CONFIG_GART_IOMMU
-#include <asm/gart.h>
-#endif
+#include <asm/iommu.h>
static void __init fix_hypertransport_config(int num, int slot, int func)
{
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 6bc07f0f1202..cdfd94cc6b14 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -332,7 +332,7 @@ sysenter_past_esp:
GET_THREAD_INFO(%ebp)
/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
- testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
+ testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
jnz syscall_trace_entry
cmpl $(nr_syscalls), %eax
jae syscall_badsys
@@ -370,7 +370,7 @@ ENTRY(system_call)
GET_THREAD_INFO(%ebp)
# system call tracing in operation / emulation
/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
- testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
+ testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
jnz syscall_trace_entry
cmpl $(nr_syscalls), %eax
jae syscall_badsys
@@ -383,10 +383,6 @@ syscall_exit:
# setting need_resched or sigpending
# between sampling and the iret
TRACE_IRQS_OFF
- testl $X86_EFLAGS_TF,PT_EFLAGS(%esp) # If tracing set singlestep flag on exit
- jz no_singlestep
- orl $_TIF_SINGLESTEP,TI_flags(%ebp)
-no_singlestep:
movl TI_flags(%ebp), %ecx
testw $_TIF_ALLWORK_MASK, %cx # current->work
jne syscall_exit_work
@@ -514,12 +510,8 @@ END(work_pending)
syscall_trace_entry:
movl $-ENOSYS,PT_EAX(%esp)
movl %esp, %eax
- xorl %edx,%edx
- call do_syscall_trace
- cmpl $0, %eax
- jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU,
- # so must skip actual syscall
- movl PT_ORIG_EAX(%esp), %eax
+ call syscall_trace_enter
+ /* What it returned is what we'll actually use. */
cmpl $(nr_syscalls), %eax
jnae syscall_call
jmp syscall_exit
@@ -528,14 +520,13 @@ END(syscall_trace_entry)
# perform syscall exit tracing
ALIGN
syscall_exit_work:
- testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
+ testb $_TIF_WORK_SYSCALL_EXIT, %cl
jz work_pending
TRACE_IRQS_ON
- ENABLE_INTERRUPTS(CLBR_ANY) # could let do_syscall_trace() call
+ ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call
# schedule() instead
movl %esp, %eax
- movl $1, %edx
- call do_syscall_trace
+ call syscall_trace_leave
jmp resume_userspace
END(syscall_exit_work)
CFI_ENDPROC
@@ -1024,6 +1015,7 @@ ENDPROC(kernel_thread_helper)
ENTRY(xen_sysenter_target)
RING0_INT_FRAME
addl $5*4, %esp /* remove xen-provided frame */
+ CFI_ADJUST_CFA_OFFSET -5*4
jmp sysenter_past_esp
CFI_ENDPROC
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index ae63e584c340..8410e26f4183 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -349,8 +349,7 @@ ENTRY(system_call_after_swapgs)
movq %rcx,RIP-ARGOFFSET(%rsp)
CFI_REL_OFFSET rip,RIP-ARGOFFSET
GET_THREAD_INFO(%rcx)
- testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
- TI_flags(%rcx)
+ testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
jnz tracesys
cmpq $__NR_syscall_max,%rax
ja badsys
@@ -430,7 +429,12 @@ tracesys:
FIXUP_TOP_OF_STACK %rdi
movq %rsp,%rdi
call syscall_trace_enter
- LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
+ /*
+ * Reload arg registers from stack in case ptrace changed them.
+ * We don't reload %rax because syscall_trace_enter() returned
+ * the value it wants us to use in the table lookup.
+ */
+ LOAD_ARGS ARGOFFSET, 1
RESTORE_REST
cmpq $__NR_syscall_max,%rax
ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
@@ -483,7 +487,7 @@ int_very_careful:
ENABLE_INTERRUPTS(CLBR_NONE)
SAVE_REST
/* Check for syscall exit trace */
- testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
+ testl $_TIF_WORK_SYSCALL_EXIT,%edx
jz int_signal
pushq %rdi
CFI_ADJUST_CFA_OFFSET 8
@@ -491,7 +495,7 @@ int_very_careful:
call syscall_trace_leave
popq %rdi
CFI_ADJUST_CFA_OFFSET -8
- andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
+ andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
jmp int_restore_rest
int_signal:
@@ -1189,6 +1193,7 @@ END(device_not_available)
/* runs on exception stack */
KPROBE_ENTRY(debug)
INTR_FRAME
+ PARAVIRT_ADJUST_EXCEPTION_FRAME
pushq $0
CFI_ADJUST_CFA_OFFSET 8
paranoidentry do_debug, DEBUG_STACK
@@ -1198,6 +1203,7 @@ KPROBE_END(debug)
/* runs on exception stack */
KPROBE_ENTRY(nmi)
INTR_FRAME
+ PARAVIRT_ADJUST_EXCEPTION_FRAME
pushq $-1
CFI_ADJUST_CFA_OFFSET 8
paranoidentry do_nmi, 0, 0
@@ -1211,6 +1217,7 @@ KPROBE_END(nmi)
KPROBE_ENTRY(int3)
INTR_FRAME
+ PARAVIRT_ADJUST_EXCEPTION_FRAME
pushq $0
CFI_ADJUST_CFA_OFFSET 8
paranoidentry do_int3, DEBUG_STACK
@@ -1237,6 +1244,7 @@ END(coprocessor_segment_overrun)
/* runs on exception stack */
ENTRY(double_fault)
XCPT_FRAME
+ PARAVIRT_ADJUST_EXCEPTION_FRAME
paranoidentry do_double_fault
jmp paranoid_exit1
CFI_ENDPROC
@@ -1253,6 +1261,7 @@ END(segment_not_present)
/* runs on exception stack */
ENTRY(stack_segment)
XCPT_FRAME
+ PARAVIRT_ADJUST_EXCEPTION_FRAME
paranoidentry do_stack_segment
jmp paranoid_exit1
CFI_ENDPROC
@@ -1278,6 +1287,7 @@ END(spurious_interrupt_bug)
/* runs on exception stack */
ENTRY(machine_check)
INTR_FRAME
+ PARAVIRT_ADJUST_EXCEPTION_FRAME
pushq $0
CFI_ADJUST_CFA_OFFSET 8
paranoidentry do_machine_check
@@ -1312,3 +1322,103 @@ KPROBE_ENTRY(ignore_sysret)
sysret
CFI_ENDPROC
ENDPROC(ignore_sysret)
+
+#ifdef CONFIG_XEN
+ENTRY(xen_hypervisor_callback)
+ zeroentry xen_do_hypervisor_callback
+END(xen_hypervisor_callback)
+
+/*
+# A note on the "critical region" in our callback handler.
+# We want to avoid stacking callback handlers due to events occurring
+# during handling of the last event. To do this, we keep events disabled
+# until we've done all processing. HOWEVER, we must enable events before
+# popping the stack frame (can't be done atomically) and so it would still
+# be possible to get enough handler activations to overflow the stack.
+# Although unlikely, bugs of that kind are hard to track down, so we'd
+# like to avoid the possibility.
+# So, on entry to the handler we detect whether we interrupted an
+# existing activation in its critical region -- if so, we pop the current
+# activation and restart the handler using the previous one.
+*/
+ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
+ CFI_STARTPROC
+/* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
+ see the correct pointer to the pt_regs */
+ movq %rdi, %rsp # we don't return, adjust the stack frame
+ CFI_ENDPROC
+ CFI_DEFAULT_STACK
+11: incl %gs:pda_irqcount
+ movq %rsp,%rbp
+ CFI_DEF_CFA_REGISTER rbp
+ cmovzq %gs:pda_irqstackptr,%rsp
+ pushq %rbp # backlink for old unwinder
+ call xen_evtchn_do_upcall
+ popq %rsp
+ CFI_DEF_CFA_REGISTER rsp
+ decl %gs:pda_irqcount
+ jmp error_exit
+ CFI_ENDPROC
+END(do_hypervisor_callback)
+
+/*
+# Hypervisor uses this for application faults while it executes.
+# We get here for two reasons:
+# 1. Fault while reloading DS, ES, FS or GS
+# 2. Fault while executing IRET
+# Category 1 we do not need to fix up as Xen has already reloaded all segment
+# registers that could be reloaded and zeroed the others.
+# Category 2 we fix up by killing the current process. We cannot use the
+# normal Linux return path in this case because if we use the IRET hypercall
+# to pop the stack frame we end up in an infinite loop of failsafe callbacks.
+# We distinguish between categories by comparing each saved segment register
+# with its current contents: any discrepancy means we in category 1.
+*/
+ENTRY(xen_failsafe_callback)
+ framesz = (RIP-0x30) /* workaround buggy gas */
+ _frame framesz
+ CFI_REL_OFFSET rcx, 0
+ CFI_REL_OFFSET r11, 8
+ movw %ds,%cx
+ cmpw %cx,0x10(%rsp)
+ CFI_REMEMBER_STATE
+ jne 1f
+ movw %es,%cx
+ cmpw %cx,0x18(%rsp)
+ jne 1f
+ movw %fs,%cx
+ cmpw %cx,0x20(%rsp)
+ jne 1f
+ movw %gs,%cx
+ cmpw %cx,0x28(%rsp)
+ jne 1f
+ /* All segments match their saved values => Category 2 (Bad IRET). */
+ movq (%rsp),%rcx
+ CFI_RESTORE rcx
+ movq 8(%rsp),%r11
+ CFI_RESTORE r11
+ addq $0x30,%rsp
+ CFI_ADJUST_CFA_OFFSET -0x30
+ pushq $0
+ CFI_ADJUST_CFA_OFFSET 8
+ pushq %r11
+ CFI_ADJUST_CFA_OFFSET 8
+ pushq %rcx
+ CFI_ADJUST_CFA_OFFSET 8
+ jmp general_protection
+ CFI_RESTORE_STATE
+1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
+ movq (%rsp),%rcx
+ CFI_RESTORE rcx
+ movq 8(%rsp),%r11
+ CFI_RESTORE r11
+ addq $0x30,%rsp
+ CFI_ADJUST_CFA_OFFSET -0x30
+ pushq $0
+ CFI_ADJUST_CFA_OFFSET 8
+ SAVE_ALL
+ jmp error_exit
+ CFI_ENDPROC
+END(xen_failsafe_callback)
+
+#endif /* CONFIG_XEN */
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 711f11c30b06..3c3929340692 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -24,6 +24,7 @@
#include <asm/pgtable.h>
#include <asm/uv/uv_mmrs.h>
#include <asm/uv/uv_hub.h>
+#include <asm/uv/bios.h>
DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info);
@@ -40,6 +41,9 @@ EXPORT_SYMBOL_GPL(uv_cpu_to_blade);
short uv_possible_blades;
EXPORT_SYMBOL_GPL(uv_possible_blades);
+unsigned long sn_rtc_cycles_per_second;
+EXPORT_SYMBOL(sn_rtc_cycles_per_second);
+
/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
static cpumask_t uv_target_cpus(void)
@@ -272,6 +276,23 @@ static __init void map_mmioh_high(int max_pnode)
map_high("MMIOH", mmioh.s.base, shift, map_uc);
}
+static __init void uv_rtc_init(void)
+{
+ long status, ticks_per_sec, drift;
+
+ status =
+ x86_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec,
+ &drift);
+ if (status != 0 || ticks_per_sec < 100000) {
+ printk(KERN_WARNING
+ "unable to determine platform RTC clock frequency, "
+ "guessing.\n");
+ /* BIOS gives wrong value for clock freq. so guess */
+ sn_rtc_cycles_per_second = 1000000000000UL / 30000UL;
+ } else
+ sn_rtc_cycles_per_second = ticks_per_sec;
+}
+
static __init void uv_system_init(void)
{
union uvh_si_addr_map_config_u m_n_config;
@@ -326,6 +347,8 @@ static __init void uv_system_init(void)
gnode_upper = (((unsigned long)node_id.s.node_id) &
~((1 << n_val) - 1)) << m_val;
+ uv_rtc_init();
+
for_each_present_cpu(cpu) {
nid = cpu_to_node(cpu);
pnode = uv_apicid_to_pnode(per_cpu(x86_cpu_to_apicid, cpu));
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index c97819829146..1b318e903bf6 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -39,6 +39,13 @@ static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata;
static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly;
#endif
+void __init x86_64_init_pda(void)
+{
+ _cpu_pda = __cpu_pda;
+ cpu_pda(0) = &_boot_cpu_pda;
+ pda_init(0);
+}
+
static void __init zap_identity_mappings(void)
{
pgd_t *pgd = pgd_offset_k(0UL);
@@ -102,9 +109,7 @@ void __init x86_64_start_kernel(char * real_mode_data)
early_printk("Kernel alive\n");
- _cpu_pda = __cpu_pda;
- cpu_pda(0) = &_boot_cpu_pda;
- pda_init(0);
+ x86_64_init_pda();
early_printk("Kernel really alive\n");
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index b07ac7b217cb..db3280afe886 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -407,6 +407,7 @@ ENTRY(phys_base)
/* This must match the first entry in level2_kernel_pgt */
.quad 0x0000000000000000
+#include "../../x86/xen/xen-head.S"
.section .bss, "aw", @nobits
.align L1_CACHE_BYTES
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 558abf4c796a..de9aa0e3a9c5 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -756,7 +756,7 @@ void send_IPI_self(int vector)
/*
* Send the IPI. The write to APIC_ICR fires this off.
*/
- apic_write_around(APIC_ICR, cfg);
+ apic_write(APIC_ICR, cfg);
}
#endif /* !CONFIG_SMP */
@@ -2030,7 +2030,7 @@ static void mask_lapic_irq(unsigned int irq)
unsigned long v;
v = apic_read(APIC_LVT0);
- apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
+ apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
}
static void unmask_lapic_irq(unsigned int irq)
@@ -2038,7 +2038,7 @@ static void unmask_lapic_irq(unsigned int irq)
unsigned long v;
v = apic_read(APIC_LVT0);
- apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
+ apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
}
static struct irq_chip lapic_chip __read_mostly = {
@@ -2168,7 +2168,7 @@ static inline void __init check_timer(void)
* The AEOI mode will finish them in the 8259A
* automatically.
*/
- apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
+ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
init_8259A(1);
timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
@@ -2177,8 +2177,9 @@ static inline void __init check_timer(void)
pin2 = ioapic_i8259.pin;
apic2 = ioapic_i8259.apic;
- printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
- vector, apic1, pin1, apic2, pin2);
+ apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
+ "apic1=%d pin1=%d apic2=%d pin2=%d\n",
+ vector, apic1, pin1, apic2, pin2);
/*
* Some BIOS writers are clueless and report the ExtINTA
@@ -2216,12 +2217,13 @@ static inline void __init check_timer(void)
}
clear_IO_APIC_pin(apic1, pin1);
if (!no_pin1)
- printk(KERN_ERR "..MP-BIOS bug: "
- "8254 timer not connected to IO-APIC\n");
+ apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
+ "8254 timer not connected to IO-APIC\n");
- printk(KERN_INFO "...trying to set up timer (IRQ0) "
- "through the 8259A ... ");
- printk("\n..... (found pin %d) ...", pin2);
+ apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
+ "(IRQ0) through the 8259A ...\n");
+ apic_printk(APIC_QUIET, KERN_INFO
+ "..... (found apic %d pin %d) ...\n", apic2, pin2);
/*
* legacy devices should be connected to IO APIC #0
*/
@@ -2230,7 +2232,7 @@ static inline void __init check_timer(void)
unmask_IO_APIC_irq(0);
enable_8259A_irq(0);
if (timer_irq_works()) {
- printk("works.\n");
+ apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
timer_through_8259 = 1;
if (nmi_watchdog == NMI_IO_APIC) {
disable_8259A_irq(0);
@@ -2244,44 +2246,47 @@ static inline void __init check_timer(void)
*/
disable_8259A_irq(0);
clear_IO_APIC_pin(apic2, pin2);
- printk(" failed.\n");
+ apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
}
if (nmi_watchdog == NMI_IO_APIC) {
- printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
+ apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
+ "through the IO-APIC - disabling NMI Watchdog!\n");
nmi_watchdog = NMI_NONE;
}
timer_ack = 0;
- printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
+ apic_printk(APIC_QUIET, KERN_INFO
+ "...trying to set up timer as Virtual Wire IRQ...\n");
lapic_register_intr(0, vector);
- apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
+ apic_write(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
enable_8259A_irq(0);
if (timer_irq_works()) {
- printk(" works.\n");
+ apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
goto out;
}
disable_8259A_irq(0);
- apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
- printk(" failed.\n");
+ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
+ apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
- printk(KERN_INFO "...trying to set up timer as ExtINT IRQ...");
+ apic_printk(APIC_QUIET, KERN_INFO
+ "...trying to set up timer as ExtINT IRQ...\n");
init_8259A(0);
make_8259A_irq(0);
- apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
+ apic_write(APIC_LVT0, APIC_DM_EXTINT);
unlock_ExtINT_logic();
if (timer_irq_works()) {
- printk(" works.\n");
+ apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
goto out;
}
- printk(" failed :(.\n");
+ apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
- "report. Then try booting with the 'noapic' option");
+ "report. Then try booting with the 'noapic' option.\n");
out:
local_irq_restore(flags);
}
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 6510cde36b35..64a46affd858 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -45,6 +45,7 @@
#include <asm/proto.h>
#include <asm/acpi.h>
#include <asm/dma.h>
+#include <asm/i8259.h>
#include <asm/nmi.h>
#include <asm/msidef.h>
#include <asm/hypertransport.h>
@@ -1696,8 +1697,9 @@ static inline void __init check_timer(void)
pin2 = ioapic_i8259.pin;
apic2 = ioapic_i8259.apic;
- apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
- cfg->vector, apic1, pin1, apic2, pin2);
+ apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
+ "apic1=%d pin1=%d apic2=%d pin2=%d\n",
+ cfg->vector, apic1, pin1, apic2, pin2);
/*
* Some BIOS writers are clueless and report the ExtINTA
@@ -1735,14 +1737,13 @@ static inline void __init check_timer(void)
}
clear_IO_APIC_pin(apic1, pin1);
if (!no_pin1)
- apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: "
+ apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
"8254 timer not connected to IO-APIC\n");
- apic_printk(APIC_VERBOSE,KERN_INFO
- "...trying to set up timer (IRQ0) "
- "through the 8259A ... ");
- apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...",
- apic2, pin2);
+ apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
+ "(IRQ0) through the 8259A ...\n");
+ apic_printk(APIC_QUIET, KERN_INFO
+ "..... (found apic %d pin %d) ...\n", apic2, pin2);
/*
* legacy devices should be connected to IO APIC #0
*/
@@ -1751,7 +1752,7 @@ static inline void __init check_timer(void)
unmask_IO_APIC_irq(0);
enable_8259A_irq(0);
if (timer_irq_works()) {
- apic_printk(APIC_VERBOSE," works.\n");
+ apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
timer_through_8259 = 1;
if (nmi_watchdog == NMI_IO_APIC) {
disable_8259A_irq(0);
@@ -1765,29 +1766,32 @@ static inline void __init check_timer(void)
*/
disable_8259A_irq(0);
clear_IO_APIC_pin(apic2, pin2);
- apic_printk(APIC_VERBOSE," failed.\n");
+ apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
}
if (nmi_watchdog == NMI_IO_APIC) {
- printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
+ apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
+ "through the IO-APIC - disabling NMI Watchdog!\n");
nmi_watchdog = NMI_NONE;
}
- apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
+ apic_printk(APIC_QUIET, KERN_INFO
+ "...trying to set up timer as Virtual Wire IRQ...\n");
lapic_register_intr(0);
apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
enable_8259A_irq(0);
if (timer_irq_works()) {
- apic_printk(APIC_VERBOSE," works.\n");
+ apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
goto out;
}
disable_8259A_irq(0);
apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
- apic_printk(APIC_VERBOSE," failed.\n");
+ apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
- apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as ExtINT IRQ...");
+ apic_printk(APIC_QUIET, KERN_INFO
+ "...trying to set up timer as ExtINT IRQ...\n");
init_8259A(0);
make_8259A_irq(0);
@@ -1796,11 +1800,12 @@ static inline void __init check_timer(void)
unlock_ExtINT_logic();
if (timer_irq_works()) {
- apic_printk(APIC_VERBOSE," works.\n");
+ apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
goto out;
}
- apic_printk(APIC_VERBOSE," failed :(.\n");
- panic("IO-APIC + timer doesn't work! Try using the 'noapic' kernel parameter\n");
+ apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
+ panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
+ "report. Then try booting with the 'noapic' option.\n");
out:
local_irq_restore(flags);
}
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c
index 5921e5f0a640..1c3a66a67f83 100644
--- a/arch/x86/kernel/io_delay.c
+++ b/arch/x86/kernel/io_delay.c
@@ -103,6 +103,9 @@ void __init io_delay_init(void)
static int __init io_delay_param(char *s)
{
+ if (!s)
+ return -EINVAL;
+
if (!strcmp(s, "0x80"))
io_delay_type = CONFIG_IO_DELAY_TYPE_0X80;
else if (!strcmp(s, "0xed"))
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c
index 9d98cda39ad9..3f7537b669d3 100644
--- a/arch/x86/kernel/ipi.c
+++ b/arch/x86/kernel/ipi.c
@@ -70,7 +70,7 @@ void __send_IPI_shortcut(unsigned int shortcut, int vector)
/*
* Send the IPI. The write to APIC_ICR fires this off.
*/
- apic_write_around(APIC_ICR, cfg);
+ apic_write(APIC_ICR, cfg);
}
void send_IPI_self(int vector)
@@ -98,7 +98,7 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector)
* prepare target chip field
*/
cfg = __prepare_ICR2(mask);
- apic_write_around(APIC_ICR2, cfg);
+ apic_write(APIC_ICR2, cfg);
/*
* program the ICR
@@ -108,7 +108,7 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector)
/*
* Send the IPI. The write to APIC_ICR fires this off.
*/
- apic_write_around(APIC_ICR, cfg);
+ apic_write(APIC_ICR, cfg);
}
/*
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 47a6f6f12478..1cf8c1fcc088 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -83,11 +83,8 @@ union irq_ctx {
static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly;
static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly;
-static char softirq_stack[NR_CPUS * THREAD_SIZE]
- __attribute__((__section__(".bss.page_aligned")));
-
-static char hardirq_stack[NR_CPUS * THREAD_SIZE]
- __attribute__((__section__(".bss.page_aligned")));
+static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
+static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
static void call_on_stack(void *func, void *stack)
{
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index c03205991718..f2d43bc75514 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -12,9 +12,13 @@
#include <linux/init.h>
#include <linux/io.h>
#include <linux/mm.h>
+#include <linux/module.h>
#include <asm/setup.h>
+struct dentry *arch_debugfs_dir;
+EXPORT_SYMBOL(arch_debugfs_dir);
+
#ifdef CONFIG_DEBUG_BOOT_PARAMS
struct setup_data_node {
u64 paddr;
@@ -209,6 +213,10 @@ static int __init arch_kdebugfs_init(void)
{
int error = 0;
+ arch_debugfs_dir = debugfs_create_dir("x86", NULL);
+ if (!arch_debugfs_dir)
+ return -ENOMEM;
+
#ifdef CONFIG_DEBUG_BOOT_PARAMS
error = boot_params_kdebugfs_init();
#endif
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index b8c6743a13da..43c019f85f0d 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -860,7 +860,6 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs)
resume_execution(cur, regs, kcb);
regs->flags |= kcb->kprobe_saved_flags;
- trace_hardirqs_fixup_flags(regs->flags);
if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
kcb->kprobe_status = KPROBE_HIT_SSDONE;
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 87edf1ceb1df..d02def06ca91 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -113,7 +113,7 @@ static void kvm_setup_secondary_clock(void)
#endif
#ifdef CONFIG_SMP
-void __init kvm_smp_prepare_boot_cpu(void)
+static void __init kvm_smp_prepare_boot_cpu(void)
{
WARN_ON(kvm_register_clock("primary cpu clock"));
native_smp_prepare_boot_cpu();
diff --git a/arch/x86/kernel/module_64.c b/arch/x86/kernel/module_64.c
index a888e67f5874..0e867676b5a5 100644
--- a/arch/x86/kernel/module_64.c
+++ b/arch/x86/kernel/module_64.c
@@ -150,7 +150,8 @@ int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
struct module *me)
{
- const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL;
+ const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
+ *para = NULL;
char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
@@ -160,6 +161,8 @@ int module_finalize(const Elf_Ehdr *hdr,
alt = s;
if (!strcmp(".smp_locks", secstrings + s->sh_name))
locks= s;
+ if (!strcmp(".parainstructions", secstrings + s->sh_name))
+ para = s;
}
if (alt) {
@@ -175,6 +178,11 @@ int module_finalize(const Elf_Ehdr *hdr,
tseg, tseg + text->sh_size);
}
+ if (para) {
+ void *pseg = (void *)para->sh_addr;
+ apply_paravirt(pseg, pseg + para->sh_size);
+ }
+
return module_bug_finalize(hdr, sechdrs, me);
}
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 3b25e49380c6..6ae005ccaed8 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -27,6 +27,7 @@
#include <asm/bios_ebda.h>
#include <asm/e820.h>
#include <asm/trampoline.h>
+#include <asm/setup.h>
#include <mach_apic.h>
#ifdef CONFIG_X86_32
@@ -48,76 +49,6 @@ static int __init mpf_checksum(unsigned char *mp, int len)
return sum & 0xFF;
}
-#ifdef CONFIG_X86_NUMAQ
-int found_numaq;
-/*
- * Have to match translation table entries to main table entries by counter
- * hence the mpc_record variable .... can't see a less disgusting way of
- * doing this ....
- */
-struct mpc_config_translation {
- unsigned char mpc_type;
- unsigned char trans_len;
- unsigned char trans_type;
- unsigned char trans_quad;
- unsigned char trans_global;
- unsigned char trans_local;
- unsigned short trans_reserved;
-};
-
-
-static int mpc_record;
-static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY]
- __cpuinitdata;
-
-static inline int generate_logical_apicid(int quad, int phys_apicid)
-{
- return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1);
-}
-
-
-static inline int mpc_apic_id(struct mpc_config_processor *m,
- struct mpc_config_translation *translation_record)
-{
- int quad = translation_record->trans_quad;
- int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid);
-
- printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
- m->mpc_apicid,
- (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
- (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
- m->mpc_apicver, quad, logical_apicid);
- return logical_apicid;
-}
-
-int mp_bus_id_to_node[MAX_MP_BUSSES];
-
-int mp_bus_id_to_local[MAX_MP_BUSSES];
-
-static void mpc_oem_bus_info(struct mpc_config_bus *m, char *name,
- struct mpc_config_translation *translation)
-{
- int quad = translation->trans_quad;
- int local = translation->trans_local;
-
- mp_bus_id_to_node[m->mpc_busid] = quad;
- mp_bus_id_to_local[m->mpc_busid] = local;
- printk(KERN_INFO "Bus #%d is %s (node %d)\n",
- m->mpc_busid, name, quad);
-}
-
-int quad_local_to_mp_bus_id [NR_CPUS/4][4];
-static void mpc_oem_pci_bus(struct mpc_config_bus *m,
- struct mpc_config_translation *translation)
-{
- int quad = translation->trans_quad;
- int local = translation->trans_local;
-
- quad_local_to_mp_bus_id[quad][local] = m->mpc_busid;
-}
-
-#endif
-
static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
{
int apicid;
@@ -127,14 +58,12 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
disabled_cpus++;
return;
}
-#ifdef CONFIG_X86_NUMAQ
- if (found_numaq)
- apicid = mpc_apic_id(m, translation_table[mpc_record]);
+
+ if (x86_quirks->mpc_apic_id)
+ apicid = x86_quirks->mpc_apic_id(m);
else
apicid = m->mpc_apicid;
-#else
- apicid = m->mpc_apicid;
-#endif
+
if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
bootup_cpu = " (Bootup-CPU)";
boot_cpu_physical_apicid = m->mpc_apicid;
@@ -151,12 +80,10 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
memcpy(str, m->mpc_bustype, 6);
str[6] = 0;
-#ifdef CONFIG_X86_NUMAQ
- if (found_numaq)
- mpc_oem_bus_info(m, str, translation_table[mpc_record]);
-#else
- printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str);
-#endif
+ if (x86_quirks->mpc_oem_bus_info)
+ x86_quirks->mpc_oem_bus_info(m, str);
+ else
+ printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str);
#if MAX_MP_BUSSES < 256
if (m->mpc_busid >= MAX_MP_BUSSES) {
@@ -173,10 +100,9 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
#endif
} else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
-#ifdef CONFIG_X86_NUMAQ
- if (found_numaq)
- mpc_oem_pci_bus(m, translation_table[mpc_record]);
-#endif
+ if (x86_quirks->mpc_oem_pci_bus)
+ x86_quirks->mpc_oem_pci_bus(m);
+
clear_bit(m->mpc_busid, mp_bus_not_pci);
#if defined(CONFIG_EISA) || defined (CONFIG_MCA)
mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
@@ -316,83 +242,6 @@ static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m)
m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
}
-#ifdef CONFIG_X86_NUMAQ
-static void __init MP_translation_info(struct mpc_config_translation *m)
-{
- printk(KERN_INFO
- "Translation: record %d, type %d, quad %d, global %d, local %d\n",
- mpc_record, m->trans_type, m->trans_quad, m->trans_global,
- m->trans_local);
-
- if (mpc_record >= MAX_MPC_ENTRY)
- printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
- else
- translation_table[mpc_record] = m; /* stash this for later */
- if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
- node_set_online(m->trans_quad);
-}
-
-/*
- * Read/parse the MPC oem tables
- */
-
-static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable,
- unsigned short oemsize)
-{
- int count = sizeof(*oemtable); /* the header size */
- unsigned char *oemptr = ((unsigned char *)oemtable) + count;
-
- mpc_record = 0;
- printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n",
- oemtable);
- if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) {
- printk(KERN_WARNING
- "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
- oemtable->oem_signature[0], oemtable->oem_signature[1],
- oemtable->oem_signature[2], oemtable->oem_signature[3]);
- return;
- }
- if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) {
- printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
- return;
- }
- while (count < oemtable->oem_length) {
- switch (*oemptr) {
- case MP_TRANSLATION:
- {
- struct mpc_config_translation *m =
- (struct mpc_config_translation *)oemptr;
- MP_translation_info(m);
- oemptr += sizeof(*m);
- count += sizeof(*m);
- ++mpc_record;
- break;
- }
- default:
- {
- printk(KERN_WARNING
- "Unrecognised OEM table entry type! - %d\n",
- (int)*oemptr);
- return;
- }
- }
- }
-}
-
-void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
- char *productid)
-{
- if (strncmp(oem, "IBM NUMA", 8))
- printk("Warning! Not a NUMA-Q system!\n");
- else
- found_numaq = 1;
-
- if (mpc->mpc_oemptr)
- smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr,
- mpc->mpc_oemsize);
-}
-#endif /* CONFIG_X86_NUMAQ */
-
/*
* Read/parse the MPC
*/
@@ -457,7 +306,6 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
} else
mps_oem_check(mpc, oem, str);
#endif
-
/* save the local APIC address, it might be non-default */
if (!acpi_lapic)
mp_lapic_addr = mpc->mpc_lapic;
@@ -465,12 +313,17 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
if (early)
return 1;
+ if (mpc->mpc_oemptr && x86_quirks->smp_read_mpc_oem) {
+ struct mp_config_oemtable *oem_table = (struct mp_config_oemtable *)(unsigned long)mpc->mpc_oemptr;
+ x86_quirks->smp_read_mpc_oem(oem_table, mpc->mpc_oemsize);
+ }
+
/*
* Now process the configuration blocks.
*/
-#ifdef CONFIG_X86_NUMAQ
- mpc_record = 0;
-#endif
+ if (x86_quirks->mpc_record)
+ *x86_quirks->mpc_record = 0;
+
while (count < mpc->mpc_length) {
switch (*mpt) {
case MP_PROCESSOR:
@@ -536,9 +389,8 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
count = mpc->mpc_length;
break;
}
-#ifdef CONFIG_X86_NUMAQ
- ++mpc_record;
-#endif
+ if (x86_quirks->mpc_record)
+ (*x86_quirks->mpc_record)++;
}
#ifdef CONFIG_X86_GENERICARCH
@@ -726,20 +578,14 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
static struct intel_mp_floating *mpf_found;
/*
- * Machine specific quirk for finding the SMP config before other setup
- * activities destroy the table:
- */
-int (*mach_get_smp_config_quirk)(unsigned int early);
-
-/*
* Scan the memory blocks for an SMP configuration block.
*/
static void __init __get_smp_config(unsigned int early)
{
struct intel_mp_floating *mpf = mpf_found;
- if (mach_get_smp_config_quirk) {
- if (mach_get_smp_config_quirk(early))
+ if (x86_quirks->mach_get_smp_config) {
+ if (x86_quirks->mach_get_smp_config(early))
return;
}
if (acpi_lapic && early)
@@ -899,14 +745,12 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
return 0;
}
-int (*mach_find_smp_config_quirk)(unsigned int reserve);
-
static void __init __find_smp_config(unsigned int reserve)
{
unsigned int address;
- if (mach_find_smp_config_quirk) {
- if (mach_find_smp_config_quirk(reserve))
+ if (x86_quirks->mach_find_smp_config) {
+ if (x86_quirks->mach_find_smp_config(reserve))
return;
}
/*
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index ec024b3baad0..ac6d51222e7d 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -263,7 +263,7 @@ late_initcall(init_lapic_nmi_sysfs);
static void __acpi_nmi_enable(void *__unused)
{
- apic_write_around(APIC_LVT0, APIC_DM_NMI);
+ apic_write(APIC_LVT0, APIC_DM_NMI);
}
/*
@@ -277,7 +277,7 @@ void acpi_nmi_enable(void)
static void __acpi_nmi_disable(void *__unused)
{
- apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
+ apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
}
/*
@@ -448,6 +448,13 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
#ifdef CONFIG_SYSCTL
+static int __init setup_unknown_nmi_panic(char *str)
+{
+ unknown_nmi_panic = 1;
+ return 1;
+}
+__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
+
static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
{
unsigned char reason = get_nmi_reason();
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index a23e8233b9ac..b8c45610b20a 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -33,6 +33,7 @@
#include <asm/processor.h>
#include <asm/mpspec.h>
#include <asm/e820.h>
+#include <asm/setup.h>
#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
@@ -71,6 +72,188 @@ static void __init smp_dump_qct(void)
}
}
+
+void __init numaq_tsc_disable(void)
+{
+ if (!found_numaq)
+ return;
+
+ if (num_online_nodes() > 1) {
+ printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
+ setup_clear_cpu_cap(X86_FEATURE_TSC);
+ }
+}
+
+static int __init numaq_pre_time_init(void)
+{
+ numaq_tsc_disable();
+ return 0;
+}
+
+int found_numaq;
+/*
+ * Have to match translation table entries to main table entries by counter
+ * hence the mpc_record variable .... can't see a less disgusting way of
+ * doing this ....
+ */
+struct mpc_config_translation {
+ unsigned char mpc_type;
+ unsigned char trans_len;
+ unsigned char trans_type;
+ unsigned char trans_quad;
+ unsigned char trans_global;
+ unsigned char trans_local;
+ unsigned short trans_reserved;
+};
+
+/* x86_quirks member */
+static int mpc_record;
+static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY]
+ __cpuinitdata;
+
+static inline int generate_logical_apicid(int quad, int phys_apicid)
+{
+ return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1);
+}
+
+/* x86_quirks member */
+static int mpc_apic_id(struct mpc_config_processor *m)
+{
+ int quad = translation_table[mpc_record]->trans_quad;
+ int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid);
+
+ printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
+ m->mpc_apicid,
+ (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
+ (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
+ m->mpc_apicver, quad, logical_apicid);
+ return logical_apicid;
+}
+
+int mp_bus_id_to_node[MAX_MP_BUSSES];
+
+int mp_bus_id_to_local[MAX_MP_BUSSES];
+
+/* x86_quirks member */
+static void mpc_oem_bus_info(struct mpc_config_bus *m, char *name)
+{
+ int quad = translation_table[mpc_record]->trans_quad;
+ int local = translation_table[mpc_record]->trans_local;
+
+ mp_bus_id_to_node[m->mpc_busid] = quad;
+ mp_bus_id_to_local[m->mpc_busid] = local;
+ printk(KERN_INFO "Bus #%d is %s (node %d)\n",
+ m->mpc_busid, name, quad);
+}
+
+int quad_local_to_mp_bus_id [NR_CPUS/4][4];
+
+/* x86_quirks member */
+static void mpc_oem_pci_bus(struct mpc_config_bus *m)
+{
+ int quad = translation_table[mpc_record]->trans_quad;
+ int local = translation_table[mpc_record]->trans_local;
+
+ quad_local_to_mp_bus_id[quad][local] = m->mpc_busid;
+}
+
+static void __init MP_translation_info(struct mpc_config_translation *m)
+{
+ printk(KERN_INFO
+ "Translation: record %d, type %d, quad %d, global %d, local %d\n",
+ mpc_record, m->trans_type, m->trans_quad, m->trans_global,
+ m->trans_local);
+
+ if (mpc_record >= MAX_MPC_ENTRY)
+ printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
+ else
+ translation_table[mpc_record] = m; /* stash this for later */
+ if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
+ node_set_online(m->trans_quad);
+}
+
+static int __init mpf_checksum(unsigned char *mp, int len)
+{
+ int sum = 0;
+
+ while (len--)
+ sum += *mp++;
+
+ return sum & 0xFF;
+}
+
+/*
+ * Read/parse the MPC oem tables
+ */
+
+static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable,
+ unsigned short oemsize)
+{
+ int count = sizeof(*oemtable); /* the header size */
+ unsigned char *oemptr = ((unsigned char *)oemtable) + count;
+
+ mpc_record = 0;
+ printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n",
+ oemtable);
+ if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) {
+ printk(KERN_WARNING
+ "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
+ oemtable->oem_signature[0], oemtable->oem_signature[1],
+ oemtable->oem_signature[2], oemtable->oem_signature[3]);
+ return;
+ }
+ if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) {
+ printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
+ return;
+ }
+ while (count < oemtable->oem_length) {
+ switch (*oemptr) {
+ case MP_TRANSLATION:
+ {
+ struct mpc_config_translation *m =
+ (struct mpc_config_translation *)oemptr;
+ MP_translation_info(m);
+ oemptr += sizeof(*m);
+ count += sizeof(*m);
+ ++mpc_record;
+ break;
+ }
+ default:
+ {
+ printk(KERN_WARNING
+ "Unrecognised OEM table entry type! - %d\n",
+ (int)*oemptr);
+ return;
+ }
+ }
+ }
+}
+
+static struct x86_quirks numaq_x86_quirks __initdata = {
+ .arch_pre_time_init = numaq_pre_time_init,
+ .arch_time_init = NULL,
+ .arch_pre_intr_init = NULL,
+ .arch_memory_setup = NULL,
+ .arch_intr_init = NULL,
+ .arch_trap_init = NULL,
+ .mach_get_smp_config = NULL,
+ .mach_find_smp_config = NULL,
+ .mpc_record = &mpc_record,
+ .mpc_apic_id = mpc_apic_id,
+ .mpc_oem_bus_info = mpc_oem_bus_info,
+ .mpc_oem_pci_bus = mpc_oem_pci_bus,
+ .smp_read_mpc_oem = smp_read_mpc_oem,
+};
+
+void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
+ char *productid)
+{
+ if (strncmp(oem, "IBM NUMA", 8))
+ printk("Warning! Not a NUMA-Q system!\n");
+ else
+ found_numaq = 1;
+}
+
static __init void early_check_numaq(void)
{
/*
@@ -82,6 +265,9 @@ static __init void early_check_numaq(void)
*/
if (smp_found_config)
early_get_smp_config();
+
+ if (found_numaq)
+ x86_quirks = &numaq_x86_quirks;
}
int __init get_memcfg_numaq(void)
@@ -92,14 +278,3 @@ int __init get_memcfg_numaq(void)
smp_dump_qct();
return 1;
}
-
-void __init numaq_tsc_disable(void)
-{
- if (!found_numaq)
- return;
-
- if (num_online_nodes() > 1) {
- printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
- setup_clear_cpu_cap(X86_FEATURE_TSC);
- }
-}
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index e0f571d58c19..b4564d089b43 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -29,6 +29,7 @@
#include <asm/desc.h>
#include <asm/setup.h>
#include <asm/arch_hooks.h>
+#include <asm/pgtable.h>
#include <asm/time.h>
#include <asm/pgalloc.h>
#include <asm/irq.h>
@@ -361,7 +362,6 @@ struct pv_cpu_ops pv_cpu_ops = {
struct pv_apic_ops pv_apic_ops = {
#ifdef CONFIG_X86_LOCAL_APIC
.apic_write = native_apic_write,
- .apic_write_atomic = native_apic_write_atomic,
.apic_read = native_apic_read,
.setup_boot_clock = setup_boot_APIC_clock,
.setup_secondary_clock = setup_secondary_APIC_clock,
@@ -373,6 +373,9 @@ struct pv_mmu_ops pv_mmu_ops = {
#ifndef CONFIG_X86_64
.pagetable_setup_start = native_pagetable_setup_start,
.pagetable_setup_done = native_pagetable_setup_done,
+#else
+ .pagetable_setup_start = paravirt_nop,
+ .pagetable_setup_done = paravirt_nop,
#endif
.read_cr2 = native_read_cr2,
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 6959b5c45df4..151f2d171f7c 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -36,7 +36,7 @@
#include <linux/delay.h>
#include <linux/scatterlist.h>
#include <linux/iommu-helper.h>
-#include <asm/gart.h>
+#include <asm/iommu.h>
#include <asm/calgary.h>
#include <asm/tce.h>
#include <asm/pci-direct.h>
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 8467ec2320f1..a4213c00dffc 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -5,12 +5,11 @@
#include <asm/proto.h>
#include <asm/dma.h>
-#include <asm/gart.h>
+#include <asm/iommu.h>
#include <asm/calgary.h>
#include <asm/amd_iommu.h>
-int forbid_dac __read_mostly;
-EXPORT_SYMBOL(forbid_dac);
+static int forbid_dac __read_mostly;
const struct dma_mapping_ops *dma_ops;
EXPORT_SYMBOL(dma_ops);
@@ -114,21 +113,15 @@ void __init pci_iommu_alloc(void)
* The order of these functions is important for
* fall-back/fail-over reasons
*/
-#ifdef CONFIG_GART_IOMMU
gart_iommu_hole_init();
-#endif
-#ifdef CONFIG_CALGARY_IOMMU
detect_calgary();
-#endif
detect_intel_iommu();
amd_iommu_detect();
-#ifdef CONFIG_SWIOTLB
pci_swiotlb_init();
-#endif
}
#endif
@@ -184,9 +177,7 @@ static __init int iommu_setup(char *p)
swiotlb = 1;
#endif
-#ifdef CONFIG_GART_IOMMU
gart_parse_options(p);
-#endif
#ifdef CONFIG_CALGARY_IOMMU
if (!strncmp(p, "calgary", 7))
@@ -500,17 +491,13 @@ EXPORT_SYMBOL(dma_free_coherent);
static int __init pci_iommu_init(void)
{
-#ifdef CONFIG_CALGARY_IOMMU
calgary_iommu_init();
-#endif
intel_iommu_init();
amd_iommu_init();
-#ifdef CONFIG_GART_IOMMU
gart_iommu_init();
-#endif
no_iommu_init();
return 0;
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index c3fe78406d18..be60961f8695 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -32,6 +32,7 @@
#include <asm/mtrr.h>
#include <asm/pgtable.h>
#include <asm/proto.h>
+#include <asm/iommu.h>
#include <asm/gart.h>
#include <asm/cacheflush.h>
#include <asm/swiotlb.h>
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index aec43d56f49c..792b9179eff3 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -7,7 +7,7 @@
#include <linux/dma-mapping.h>
#include <linux/scatterlist.h>
-#include <asm/gart.h>
+#include <asm/iommu.h>
#include <asm/processor.h>
#include <asm/dma.h>
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index 82299cd1d04d..20df839b9c20 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -5,7 +5,7 @@
#include <linux/module.h>
#include <linux/dma-mapping.h>
-#include <asm/gart.h>
+#include <asm/iommu.h>
#include <asm/swiotlb.h>
#include <asm/dma.h>
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 4d629c62f4f8..7fc4d5b0a6a0 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -15,6 +15,7 @@ unsigned long idle_nomwait;
EXPORT_SYMBOL(idle_nomwait);
struct kmem_cache *task_xstate_cachep;
+static int force_mwait __cpuinitdata;
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
@@ -199,6 +200,7 @@ static void poll_idle(void)
*
* idle=mwait overrides this decision and forces the usage of mwait.
*/
+static int __cpuinitdata force_mwait;
#define MWAIT_INFO 0x05
#define MWAIT_ECX_EXTENDED_INFO 0x01
@@ -326,6 +328,9 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
static int __init idle_setup(char *str)
{
+ if (!str)
+ return -EINVAL;
+
if (!strcmp(str, "poll")) {
printk("using polling idle threads.\n");
pm_idle = poll_idle;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index a8e53626ac9a..e8a8e1b99817 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -537,8 +537,8 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
struct task_struct *
__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
- struct thread_struct *prev = &prev_p->thread,
- *next = &next_p->thread;
+ struct thread_struct *prev = &prev_p->thread;
+ struct thread_struct *next = &next_p->thread;
int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(init_tss, cpu);
unsigned fsindex, gsindex;
@@ -586,35 +586,34 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/*
* Switch FS and GS.
+ *
+ * Segment register != 0 always requires a reload. Also
+ * reload when it has changed. When prev process used 64bit
+ * base always reload to avoid an information leak.
*/
- {
- /* segment register != 0 always requires a reload.
- also reload when it has changed.
- when prev process used 64bit base always reload
- to avoid an information leak. */
- if (unlikely(fsindex | next->fsindex | prev->fs)) {
- loadsegment(fs, next->fsindex);
- /* check if the user used a selector != 0
- * if yes clear 64bit base, since overloaded base
- * is always mapped to the Null selector
- */
- if (fsindex)
+ if (unlikely(fsindex | next->fsindex | prev->fs)) {
+ loadsegment(fs, next->fsindex);
+ /*
+ * Check if the user used a selector != 0; if yes
+ * clear 64bit base, since overloaded base is always
+ * mapped to the Null selector
+ */
+ if (fsindex)
prev->fs = 0;
- }
- /* when next process has a 64bit base use it */
- if (next->fs)
- wrmsrl(MSR_FS_BASE, next->fs);
- prev->fsindex = fsindex;
-
- if (unlikely(gsindex | next->gsindex | prev->gs)) {
- load_gs_index(next->gsindex);
- if (gsindex)
+ }
+ /* when next process has a 64bit base use it */
+ if (next->fs)
+ wrmsrl(MSR_FS_BASE, next->fs);
+ prev->fsindex = fsindex;
+
+ if (unlikely(gsindex | next->gsindex | prev->gs)) {
+ load_gs_index(next->gsindex);
+ if (gsindex)
prev->gs = 0;
- }
- if (next->gs)
- wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
- prev->gsindex = gsindex;
}
+ if (next->gs)
+ wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
+ prev->gsindex = gsindex;
/* Must be after DS reload */
unlazy_fpu(prev_p);
@@ -627,7 +626,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
write_pda(pcurrent, next_p);
write_pda(kernelstack,
- (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
+ (unsigned long)task_stack_page(next_p) +
+ THREAD_SIZE - PDA_STACKOFFSET);
#ifdef CONFIG_CC_STACKPROTECTOR
write_pda(stack_canary, next_p->stack_canary);
/*
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 77040b6070e1..e37dccce85db 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1357,8 +1357,6 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
#endif
}
-#ifdef CONFIG_X86_32
-
void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
{
struct siginfo info;
@@ -1377,89 +1375,10 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
force_sig_info(SIGTRAP, &info, tsk);
}
-/* notification of system call entry/exit
- * - triggered by current->work.syscall_trace
- */
-int do_syscall_trace(struct pt_regs *regs, int entryexit)
-{
- int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU);
- /*
- * With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP for syscall
- * interception
- */
- int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP);
- int ret = 0;
-
- /* do the secure computing check first */
- if (!entryexit)
- secure_computing(regs->orig_ax);
-
- if (unlikely(current->audit_context)) {
- if (entryexit)
- audit_syscall_exit(AUDITSC_RESULT(regs->ax),
- regs->ax);
- /* Debug traps, when using PTRACE_SINGLESTEP, must be sent only
- * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is
- * not used, entry.S will call us only on syscall exit, not
- * entry; so when TIF_SYSCALL_AUDIT is used we must avoid
- * calling send_sigtrap() on syscall entry.
- *
- * Note that when PTRACE_SYSEMU_SINGLESTEP is used,
- * is_singlestep is false, despite his name, so we will still do
- * the correct thing.
- */
- else if (is_singlestep)
- goto out;
- }
-
- if (!(current->ptrace & PT_PTRACED))
- goto out;
-
- /* If a process stops on the 1st tracepoint with SYSCALL_TRACE
- * and then is resumed with SYSEMU_SINGLESTEP, it will come in
- * here. We have to check this and return */
- if (is_sysemu && entryexit)
- return 0;
-
- /* Fake a debug trap */
- if (is_singlestep)
- send_sigtrap(current, regs, 0);
-
- if (!test_thread_flag(TIF_SYSCALL_TRACE) && !is_sysemu)
- goto out;
-
- /* the 0x80 provides a way for the tracing parent to distinguish
- between a syscall stop and SIGTRAP delivery */
- /* Note that the debugger could change the result of test_thread_flag!*/
- ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80:0));
-
- /*
- * this isn't the same as continuing with a signal, but it will do
- * for normal use. strace only continues with a signal if the
- * stopping signal is not SIGTRAP. -brl
- */
- if (current->exit_code) {
- send_sig(current->exit_code, current, 1);
- current->exit_code = 0;
- }
- ret = is_sysemu;
-out:
- if (unlikely(current->audit_context) && !entryexit)
- audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_ax,
- regs->bx, regs->cx, regs->dx, regs->si);
- if (ret == 0)
- return 0;
-
- regs->orig_ax = -1; /* force skip of syscall restarting */
- if (unlikely(current->audit_context))
- audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
- return 1;
-}
-
-#else /* CONFIG_X86_64 */
-
static void syscall_trace(struct pt_regs *regs)
{
+ if (!(current->ptrace & PT_PTRACED))
+ return;
#if 0
printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n",
@@ -1481,39 +1400,81 @@ static void syscall_trace(struct pt_regs *regs)
}
}
-asmlinkage void syscall_trace_enter(struct pt_regs *regs)
+#ifdef CONFIG_X86_32
+# define IS_IA32 1
+#elif defined CONFIG_IA32_EMULATION
+# define IS_IA32 test_thread_flag(TIF_IA32)
+#else
+# define IS_IA32 0
+#endif
+
+/*
+ * We must return the syscall number to actually look up in the table.
+ * This can be -1L to skip running any syscall at all.
+ */
+asmregparm long syscall_trace_enter(struct pt_regs *regs)
{
+ long ret = 0;
+
+ /*
+ * If we stepped into a sysenter/syscall insn, it trapped in
+ * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
+ * If user-mode had set TF itself, then it's still clear from
+ * do_debug() and we need to set it again to restore the user
+ * state. If we entered on the slow path, TF was already set.
+ */
+ if (test_thread_flag(TIF_SINGLESTEP))
+ regs->flags |= X86_EFLAGS_TF;
+
/* do the secure computing check first */
secure_computing(regs->orig_ax);
- if (test_thread_flag(TIF_SYSCALL_TRACE)
- && (current->ptrace & PT_PTRACED))
+ if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
+ ret = -1L;
+
+ if (ret || test_thread_flag(TIF_SYSCALL_TRACE))
syscall_trace(regs);
if (unlikely(current->audit_context)) {
- if (test_thread_flag(TIF_IA32)) {
+ if (IS_IA32)
audit_syscall_entry(AUDIT_ARCH_I386,
regs->orig_ax,
regs->bx, regs->cx,
regs->dx, regs->si);
- } else {
+#ifdef CONFIG_X86_64
+ else
audit_syscall_entry(AUDIT_ARCH_X86_64,
regs->orig_ax,
regs->di, regs->si,
regs->dx, regs->r10);
- }
+#endif
}
+
+ return ret ?: regs->orig_ax;
}
-asmlinkage void syscall_trace_leave(struct pt_regs *regs)
+asmregparm void syscall_trace_leave(struct pt_regs *regs)
{
if (unlikely(current->audit_context))
audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
- if ((test_thread_flag(TIF_SYSCALL_TRACE)
- || test_thread_flag(TIF_SINGLESTEP))
- && (current->ptrace & PT_PTRACED))
+ if (test_thread_flag(TIF_SYSCALL_TRACE))
syscall_trace(regs);
-}
-#endif /* CONFIG_X86_32 */
+ /*
+ * If TIF_SYSCALL_EMU is set, we only get here because of
+ * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
+ * We already reported this syscall instruction in
+ * syscall_trace_enter(), so don't do any more now.
+ */
+ if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
+ return;
+
+ /*
+ * If we are single-stepping, synthesize a trap to follow the
+ * system call instruction.
+ */
+ if (test_thread_flag(TIF_SINGLESTEP) &&
+ (current->ptrace & PT_PTRACED))
+ send_sigtrap(current, regs, 0);
+}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index f8a62160e151..9dcf39c02972 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -177,6 +177,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2400"),
},
},
+ { /* Handle problems with rebooting on Dell T5400's */
+ .callback = set_bios_reboot,
+ .ident = "Dell Precision T5400",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Precision WorkStation T5400"),
+ },
+ },
{ /* Handle problems with rebooting on HP laptops */
.callback = set_bios_reboot,
.ident = "HP Compaq Laptop",
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 531b55b8e81a..ec952aa5394a 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -57,12 +57,8 @@
#include <linux/slab.h>
#include <linux/user.h>
#include <linux/delay.h>
-#include <linux/highmem.h>
#include <linux/kallsyms.h>
-#include <linux/edd.h>
-#include <linux/iscsi_ibft.h>
-#include <linux/kexec.h>
#include <linux/cpufreq.h>
#include <linux/dma-mapping.h>
#include <linux/ctype.h>
@@ -96,7 +92,7 @@
#include <asm/smp.h>
#include <asm/desc.h>
#include <asm/dma.h>
-#include <asm/gart.h>
+#include <asm/iommu.h>
#include <asm/mmu_context.h>
#include <asm/proto.h>
@@ -104,7 +100,6 @@
#include <asm/paravirt.h>
#include <asm/percpu.h>
-#include <asm/sections.h>
#include <asm/topology.h>
#include <asm/apicdef.h>
#ifdef CONFIG_X86_64
@@ -579,6 +574,10 @@ static int __init setup_elfcorehdr(char *arg)
early_param("elfcorehdr", setup_elfcorehdr);
#endif
+static struct x86_quirks default_x86_quirks __initdata;
+
+struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
+
/*
* Determine if we were loaded by an EFI loader. If so, then we have also been
* passed the efi memmap, systab, etc., so we should use these data structures
@@ -824,7 +823,10 @@ void __init setup_arch(char **cmdline_p)
vmi_init();
#endif
+ paravirt_pagetable_setup_start(swapper_pg_dir);
paging_init();
+ paravirt_pagetable_setup_done(swapper_pg_dir);
+ paravirt_post_allocator_init();
#ifdef CONFIG_X86_64
map_vsyscall();
@@ -854,14 +856,6 @@ void __init setup_arch(char **cmdline_p)
init_cpu_to_node();
#endif
-#ifdef CONFIG_X86_NUMAQ
- /*
- * need to check online nodes num, call it
- * here before time_init/tsc_init
- */
- numaq_tsc_disable();
-#endif
-
init_apic_mappings();
ioapic_init_mappings();
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index d92373630963..07faaa5109cb 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -212,7 +212,7 @@ asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
badframe:
if (show_unhandled_signals && printk_ratelimit()) {
- printk(KERN_INFO "%s%s[%d] bad frame in sigreturn frame:"
+ printk("%s%s[%d] bad frame in sigreturn frame:"
"%p ip:%lx sp:%lx oeax:%lx",
task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
current->comm, task_pid_nr(current), frame, regs->ip,
@@ -657,12 +657,6 @@ static void do_signal(struct pt_regs *regs)
void
do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
{
- /* Pending single-step? */
- if (thread_info_flags & _TIF_SINGLESTEP) {
- regs->flags |= X86_EFLAGS_TF;
- clear_thread_flag(TIF_SINGLESTEP);
- }
-
/* deal with pending signal delivery */
if (thread_info_flags & _TIF_SIGPENDING)
do_signal(regs);
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index e53b267662e7..bf87684474f1 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -487,12 +487,6 @@ static void do_signal(struct pt_regs *regs)
void do_notify_resume(struct pt_regs *regs, void *unused,
__u32 thread_info_flags)
{
- /* Pending single-step? */
- if (thread_info_flags & _TIF_SINGLESTEP) {
- regs->flags |= X86_EFLAGS_TF;
- clear_thread_flag(TIF_SINGLESTEP);
- }
-
#ifdef CONFIG_X86_MCE
/* notify userspace of pending MCEs */
if (thread_info_flags & _TIF_MCE_NOTIFY)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 687376ab07e8..27640196eb7c 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -546,8 +546,8 @@ static inline void __inquire_remote_apic(int apicid)
printk(KERN_CONT
"a previous APIC delivery may have failed\n");
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
- apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
+ apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
+ apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
timeout = 0;
do {
@@ -579,11 +579,11 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
int maxlvt;
/* Target chip */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
+ apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
/* Boot on the stack */
/* Kick the second */
- apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
+ apic_write(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
Dprintk("Waiting for send to finish...\n");
send_status = safe_apic_wait_icr_idle();
@@ -592,14 +592,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
* Give the other CPU some time to accept the IPI.
*/
udelay(200);
- /*
- * Due to the Pentium erratum 3AP.
- */
maxlvt = lapic_get_maxlvt();
- if (maxlvt > 3) {
- apic_read_around(APIC_SPIV);
+ if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
apic_write(APIC_ESR, 0);
- }
accept_status = (apic_read(APIC_ESR) & 0xEF);
Dprintk("NMI sent.\n");
@@ -625,12 +620,14 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
return send_status;
}
+ maxlvt = lapic_get_maxlvt();
+
/*
* Be paranoid about clearing APIC errors.
*/
if (APIC_INTEGRATED(apic_version[phys_apicid])) {
- apic_read_around(APIC_SPIV);
- apic_write(APIC_ESR, 0);
+ if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
+ apic_write(APIC_ESR, 0);
apic_read(APIC_ESR);
}
@@ -639,13 +636,13 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
/*
* Turn INIT on target chip
*/
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+ apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
/*
* Send IPI
*/
- apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
- | APIC_DM_INIT);
+ apic_write(APIC_ICR,
+ APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT);
Dprintk("Waiting for send to finish...\n");
send_status = safe_apic_wait_icr_idle();
@@ -655,10 +652,10 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
Dprintk("Deasserting INIT.\n");
/* Target chip */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+ apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
/* Send IPI */
- apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
+ apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
Dprintk("Waiting for send to finish...\n");
send_status = safe_apic_wait_icr_idle();
@@ -689,12 +686,10 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
*/
Dprintk("#startup loops: %d.\n", num_starts);
- maxlvt = lapic_get_maxlvt();
-
for (j = 1; j <= num_starts; j++) {
Dprintk("Sending STARTUP #%d.\n", j);
- apic_read_around(APIC_SPIV);
- apic_write(APIC_ESR, 0);
+ if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
+ apic_write(APIC_ESR, 0);
apic_read(APIC_ESR);
Dprintk("After apic_write.\n");
@@ -703,12 +698,11 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
*/
/* Target chip */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+ apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
/* Boot on the stack */
/* Kick the second */
- apic_write_around(APIC_ICR, APIC_DM_STARTUP
- | (start_eip >> 12));
+ apic_write(APIC_ICR, APIC_DM_STARTUP | (start_eip >> 12));
/*
* Give the other CPU some time to accept the IPI.
@@ -724,13 +718,8 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
* Give the other CPU some time to accept the IPI.
*/
udelay(200);
- /*
- * Due to the Pentium erratum 3AP.
- */
- if (maxlvt > 3) {
- apic_read_around(APIC_SPIV);
+ if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
apic_write(APIC_ESR, 0);
- }
accept_status = (apic_read(APIC_ESR) & 0xEF);
if (send_status || accept_status)
break;
@@ -768,7 +757,7 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
*
* Must be called after the _cpu_pda pointer table is initialized.
*/
-static int __cpuinit get_local_pda(int cpu)
+int __cpuinit get_local_pda(int cpu)
{
struct x8664_pda *oldpda, *newpda;
unsigned long size = sizeof(struct x8664_pda);
@@ -1311,7 +1300,7 @@ static void __ref remove_cpu_from_maps(int cpu)
cpu_clear(cpu, cpu_callout_map);
cpu_clear(cpu, cpu_callin_map);
/* was set by cpu_init() */
- clear_bit(cpu, (unsigned long *)&cpu_initialized);
+ cpu_clear(cpu, cpu_initialized);
numa_remove_cpu(cpu);
}
@@ -1390,7 +1379,8 @@ static int __init parse_maxcpus(char *arg)
{
extern unsigned int maxcpus;
- maxcpus = simple_strtoul(arg, NULL, 0);
+ if (arg)
+ maxcpus = simple_strtoul(arg, NULL, 0);
return 0;
}
early_param("maxcpus", parse_maxcpus);
diff --git a/arch/x86/kernel/smpcommon_32.c b/arch/x86/kernel/smpcommon_32.c
deleted file mode 100644
index 8b137891791f..000000000000
--- a/arch/x86/kernel/smpcommon_32.c
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 92c20fee6781..e8b9863ef8c4 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -105,6 +105,20 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
static int enable_single_step(struct task_struct *child)
{
struct pt_regs *regs = task_pt_regs(child);
+ unsigned long oflags;
+
+ /*
+ * If we stepped into a sysenter/syscall insn, it trapped in
+ * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
+ * If user-mode had set TF itself, then it's still clear from
+ * do_debug() and we need to set it again to restore the user
+ * state so we don't wrongly set TIF_FORCED_TF below.
+ * If enable_single_step() was used last and that is what
+ * set TIF_SINGLESTEP, then both TF and TIF_FORCED_TF are
+ * already set and our bookkeeping is fine.
+ */
+ if (unlikely(test_tsk_thread_flag(child, TIF_SINGLESTEP)))
+ regs->flags |= X86_EFLAGS_TF;
/*
* Always set TIF_SINGLESTEP - this guarantees that
@@ -113,11 +127,7 @@ static int enable_single_step(struct task_struct *child)
*/
set_tsk_thread_flag(child, TIF_SINGLESTEP);
- /*
- * If TF was already set, don't do anything else
- */
- if (regs->flags & X86_EFLAGS_TF)
- return 0;
+ oflags = regs->flags;
/* Set TF on the kernel stack.. */
regs->flags |= X86_EFLAGS_TF;
@@ -126,9 +136,22 @@ static int enable_single_step(struct task_struct *child)
* ..but if TF is changed by the instruction we will trace,
* don't mark it as being "us" that set it, so that we
* won't clear it by hand later.
+ *
+ * Note that if we don't actually execute the popf because
+ * of a signal arriving right now or suchlike, we will lose
+ * track of the fact that it really was "us" that set it.
*/
- if (is_setting_trap_flag(child, regs))
+ if (is_setting_trap_flag(child, regs)) {
+ clear_tsk_thread_flag(child, TIF_FORCED_TF);
return 0;
+ }
+
+ /*
+ * If TF was already set, check whether it was us who set it.
+ * If not, we should never attempt a block step.
+ */
+ if (oflags & X86_EFLAGS_TF)
+ return test_tsk_thread_flag(child, TIF_FORCED_TF);
set_tsk_thread_flag(child, TIF_FORCED_TF);
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index 059ca6ee59b4..ffe3c664afc0 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c
@@ -129,6 +129,7 @@ void __init hpet_time_init(void)
*/
void __init time_init(void)
{
+ pre_time_init_hook();
tsc_init();
late_time_init = choose_time_init();
}
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 8a768973c4f0..03df8e45e5a1 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -58,6 +58,7 @@
#include <asm/nmi.h>
#include <asm/smp.h>
#include <asm/io.h>
+#include <asm/traps.h>
#include "mach_traps.h"
@@ -77,26 +78,6 @@ char ignore_fpu_irq;
gate_desc idt_table[256]
__attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
-asmlinkage void divide_error(void);
-asmlinkage void debug(void);
-asmlinkage void nmi(void);
-asmlinkage void int3(void);
-asmlinkage void overflow(void);
-asmlinkage void bounds(void);
-asmlinkage void invalid_op(void);
-asmlinkage void device_not_available(void);
-asmlinkage void coprocessor_segment_overrun(void);
-asmlinkage void invalid_TSS(void);
-asmlinkage void segment_not_present(void);
-asmlinkage void stack_segment(void);
-asmlinkage void general_protection(void);
-asmlinkage void page_fault(void);
-asmlinkage void coprocessor_error(void);
-asmlinkage void simd_coprocessor_error(void);
-asmlinkage void alignment_check(void);
-asmlinkage void spurious_interrupt_bug(void);
-asmlinkage void machine_check(void);
-
int panic_on_unrecovered_nmi;
int kstack_depth_to_print = 24;
static unsigned int code_bytes = 64;
@@ -256,7 +237,7 @@ static const struct stacktrace_ops print_trace_ops = {
static void
show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *stack, unsigned long bp, char *log_lvl)
+ unsigned long *stack, unsigned long bp, char *log_lvl)
{
dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
printk("%s =======================\n", log_lvl);
@@ -383,6 +364,54 @@ int is_valid_bugaddr(unsigned long ip)
return ud2 == 0x0b0f;
}
+static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
+static int die_owner = -1;
+static unsigned int die_nest_count;
+
+unsigned __kprobes long oops_begin(void)
+{
+ unsigned long flags;
+
+ oops_enter();
+
+ if (die_owner != raw_smp_processor_id()) {
+ console_verbose();
+ raw_local_irq_save(flags);
+ __raw_spin_lock(&die_lock);
+ die_owner = smp_processor_id();
+ die_nest_count = 0;
+ bust_spinlocks(1);
+ } else {
+ raw_local_irq_save(flags);
+ }
+ die_nest_count++;
+ return flags;
+}
+
+void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
+{
+ bust_spinlocks(0);
+ die_owner = -1;
+ add_taint(TAINT_DIE);
+ __raw_spin_unlock(&die_lock);
+ raw_local_irq_restore(flags);
+
+ if (!regs)
+ return;
+
+ if (kexec_should_crash(current))
+ crash_kexec(regs);
+
+ if (in_interrupt())
+ panic("Fatal exception in interrupt");
+
+ if (panic_on_oops)
+ panic("Fatal exception");
+
+ oops_exit();
+ do_exit(signr);
+}
+
int __kprobes __die(const char *str, struct pt_regs *regs, long err)
{
unsigned short ss;
@@ -423,31 +452,9 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
*/
void die(const char *str, struct pt_regs *regs, long err)
{
- static struct {
- raw_spinlock_t lock;
- u32 lock_owner;
- int lock_owner_depth;
- } die = {
- .lock = __RAW_SPIN_LOCK_UNLOCKED,
- .lock_owner = -1,
- .lock_owner_depth = 0
- };
- unsigned long flags;
-
- oops_enter();
-
- if (die.lock_owner != raw_smp_processor_id()) {
- console_verbose();
- raw_local_irq_save(flags);
- __raw_spin_lock(&die.lock);
- die.lock_owner = smp_processor_id();
- die.lock_owner_depth = 0;
- bust_spinlocks(1);
- } else {
- raw_local_irq_save(flags);
- }
+ unsigned long flags = oops_begin();
- if (++die.lock_owner_depth < 3) {
+ if (die_nest_count < 3) {
report_bug(regs->ip, regs);
if (__die(str, regs, err))
@@ -456,26 +463,7 @@ void die(const char *str, struct pt_regs *regs, long err)
printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
}
- bust_spinlocks(0);
- die.lock_owner = -1;
- add_taint(TAINT_DIE);
- __raw_spin_unlock(&die.lock);
- raw_local_irq_restore(flags);
-
- if (!regs)
- return;
-
- if (kexec_should_crash(current))
- crash_kexec(regs);
-
- if (in_interrupt())
- panic("Fatal exception in interrupt");
-
- if (panic_on_oops)
- panic("Fatal exception");
-
- oops_exit();
- do_exit(SIGSEGV);
+ oops_end(flags, regs, SIGSEGV);
}
static inline void
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 2696a6837782..3f18d73f420c 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -51,30 +51,10 @@
#include <asm/pgalloc.h>
#include <asm/proto.h>
#include <asm/pda.h>
+#include <asm/traps.h>
#include <mach_traps.h>
-asmlinkage void divide_error(void);
-asmlinkage void debug(void);
-asmlinkage void nmi(void);
-asmlinkage void int3(void);
-asmlinkage void overflow(void);
-asmlinkage void bounds(void);
-asmlinkage void invalid_op(void);
-asmlinkage void device_not_available(void);
-asmlinkage void double_fault(void);
-asmlinkage void coprocessor_segment_overrun(void);
-asmlinkage void invalid_TSS(void);
-asmlinkage void segment_not_present(void);
-asmlinkage void stack_segment(void);
-asmlinkage void general_protection(void);
-asmlinkage void page_fault(void);
-asmlinkage void coprocessor_error(void);
-asmlinkage void simd_coprocessor_error(void);
-asmlinkage void alignment_check(void);
-asmlinkage void spurious_interrupt_bug(void);
-asmlinkage void machine_check(void);
-
int panic_on_unrecovered_nmi;
int kstack_depth_to_print = 12;
static unsigned int code_bytes = 64;
@@ -355,17 +335,24 @@ static const struct stacktrace_ops print_trace_ops = {
.address = print_trace_address,
};
-void show_trace(struct task_struct *task, struct pt_regs *regs,
- unsigned long *stack, unsigned long bp)
+static void
+show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *stack, unsigned long bp, char *log_lvl)
{
printk("\nCall Trace:\n");
- dump_trace(task, regs, stack, bp, &print_trace_ops, NULL);
+ dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
printk("\n");
}
+void show_trace(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *stack, unsigned long bp)
+{
+ show_trace_log_lvl(task, regs, stack, bp, "");
+}
+
static void
-_show_stack(struct task_struct *task, struct pt_regs *regs,
- unsigned long *sp, unsigned long bp)
+show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *sp, unsigned long bp, char *log_lvl)
{
unsigned long *stack;
int i;
@@ -399,12 +386,12 @@ _show_stack(struct task_struct *task, struct pt_regs *regs,
printk(" %016lx", *stack++);
touch_nmi_watchdog();
}
- show_trace(task, regs, sp, bp);
+ show_trace_log_lvl(task, regs, sp, bp, log_lvl);
}
void show_stack(struct task_struct *task, unsigned long *sp)
{
- _show_stack(task, NULL, sp, 0);
+ show_stack_log_lvl(task, NULL, sp, 0, "");
}
/*
@@ -454,7 +441,8 @@ void show_registers(struct pt_regs *regs)
u8 *ip;
printk("Stack: ");
- _show_stack(NULL, regs, (unsigned long *)sp, regs->bp);
+ show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
+ regs->bp, "");
printk("\n");
printk(KERN_EMERG "Code: ");
@@ -518,7 +506,7 @@ unsigned __kprobes long oops_begin(void)
}
void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
-{
+{
die_owner = -1;
bust_spinlocks(0);
die_nest_count--;
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index e94bdb6add1d..41e01b145c48 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -73,7 +73,7 @@ int is_visws_box(void)
return visws_board_type >= 0;
}
-static int __init visws_time_init_quirk(void)
+static int __init visws_time_init(void)
{
printk(KERN_INFO "Starting Cobalt Timer system clock\n");
@@ -93,7 +93,7 @@ static int __init visws_time_init_quirk(void)
return 0;
}
-static int __init visws_pre_intr_init_quirk(void)
+static int __init visws_pre_intr_init(void)
{
init_VISWS_APIC_irqs();
@@ -114,7 +114,7 @@ EXPORT_SYMBOL(sgivwfb_mem_size);
long long mem_size __initdata = 0;
-static char * __init visws_memory_setup_quirk(void)
+static char * __init visws_memory_setup(void)
{
long long gfx_mem_size = 8 * MB;
@@ -176,7 +176,7 @@ static void visws_machine_power_off(void)
outl(PIIX_SPECIAL_STOP, 0xCFC);
}
-static int __init visws_get_smp_config_quirk(unsigned int early)
+static int __init visws_get_smp_config(unsigned int early)
{
/*
* Prevent MP-table parsing by the generic code:
@@ -192,7 +192,7 @@ extern unsigned int __cpuinitdata maxcpus;
* No problem for Linux.
*/
-static void __init MP_processor_info (struct mpc_config_processor *m)
+static void __init MP_processor_info(struct mpc_config_processor *m)
{
int ver, logical_apicid;
physid_mask_t apic_cpus;
@@ -232,7 +232,7 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
apic_version[m->mpc_apicid] = ver;
}
-int __init visws_find_smp_config_quirk(unsigned int reserve)
+static int __init visws_find_smp_config(unsigned int reserve)
{
struct mpc_config_processor *mp = phys_to_virt(CO_CPU_TAB_PHYS);
unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS));
@@ -258,7 +258,17 @@ int __init visws_find_smp_config_quirk(unsigned int reserve)
return 1;
}
-extern int visws_trap_init_quirk(void);
+static int visws_trap_init(void);
+
+static struct x86_quirks visws_x86_quirks __initdata = {
+ .arch_time_init = visws_time_init,
+ .arch_pre_intr_init = visws_pre_intr_init,
+ .arch_memory_setup = visws_memory_setup,
+ .arch_intr_init = NULL,
+ .arch_trap_init = visws_trap_init,
+ .mach_get_smp_config = visws_get_smp_config,
+ .mach_find_smp_config = visws_find_smp_config,
+};
void __init visws_early_detect(void)
{
@@ -272,16 +282,10 @@ void __init visws_early_detect(void)
/*
* Install special quirks for timer, interrupt and memory setup:
- */
- arch_time_init_quirk = visws_time_init_quirk;
- arch_pre_intr_init_quirk = visws_pre_intr_init_quirk;
- arch_memory_setup_quirk = visws_memory_setup_quirk;
-
- /*
* Fall back to generic behavior for traps:
+ * Override generic MP-table parsing:
*/
- arch_intr_init_quirk = NULL;
- arch_trap_init_quirk = visws_trap_init_quirk;
+ x86_quirks = &visws_x86_quirks;
/*
* Install reboot quirks:
@@ -294,12 +298,6 @@ void __init visws_early_detect(void)
*/
no_broadcast = 0;
- /*
- * Override generic MP-table parsing:
- */
- mach_get_smp_config_quirk = visws_get_smp_config_quirk;
- mach_find_smp_config_quirk = visws_find_smp_config_quirk;
-
#ifdef CONFIG_X86_IO_APIC
/*
* Turn off IO-APIC detection and initialization:
@@ -426,7 +424,7 @@ static __init void cobalt_init(void)
co_apic_read(CO_APIC_ID));
}
-int __init visws_trap_init_quirk(void)
+static int __init visws_trap_init(void)
{
lithium_init();
cobalt_init();
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index b15346092b7b..0a1b1a9d922d 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -906,7 +906,6 @@ static inline int __init activate_vmi(void)
#ifdef CONFIG_X86_LOCAL_APIC
para_fill(pv_apic_ops.apic_read, APICRead);
para_fill(pv_apic_ops.apic_write, APICWrite);
- para_fill(pv_apic_ops.apic_write_atomic, APICWrite);
#endif
/*
OpenPOWER on IntegriCloud