summaryrefslogtreecommitdiffstats
path: root/arch/i386/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/i386/kernel')
-rw-r--r--arch/i386/kernel/Makefile1
-rw-r--r--arch/i386/kernel/acpi/boot.c220
-rw-r--r--arch/i386/kernel/acpi/sleep.c27
-rw-r--r--arch/i386/kernel/apic.c80
-rw-r--r--arch/i386/kernel/apm.c8
-rw-r--r--arch/i386/kernel/cpu/common.c45
-rw-r--r--arch/i386/kernel/cpu/cpufreq/powernow-k7.c4
-rw-r--r--arch/i386/kernel/cpu/intel.c12
-rw-r--r--arch/i386/kernel/cpu/intel_cacheinfo.c4
-rw-r--r--arch/i386/kernel/cpu/mcheck/k7.c2
-rw-r--r--arch/i386/kernel/cpu/mcheck/mce.c4
-rw-r--r--arch/i386/kernel/cpu/mcheck/p4.c4
-rw-r--r--arch/i386/kernel/cpu/mcheck/p5.c2
-rw-r--r--arch/i386/kernel/cpu/mcheck/p6.c2
-rw-r--r--arch/i386/kernel/cpu/mcheck/winchip.c2
-rw-r--r--arch/i386/kernel/cpu/mtrr/generic.c3
-rw-r--r--arch/i386/kernel/crash.c223
-rw-r--r--arch/i386/kernel/dmi_scan.c391
-rw-r--r--arch/i386/kernel/efi.c4
-rw-r--r--arch/i386/kernel/head.S6
-rw-r--r--arch/i386/kernel/i8259.c12
-rw-r--r--arch/i386/kernel/io_apic.c43
-rw-r--r--arch/i386/kernel/irq.c72
-rw-r--r--arch/i386/kernel/machine_kexec.c226
-rw-r--r--arch/i386/kernel/mpparse.c2
-rw-r--r--arch/i386/kernel/process.c39
-rw-r--r--arch/i386/kernel/reboot.c82
-rw-r--r--arch/i386/kernel/relocate_kernel.S120
-rw-r--r--arch/i386/kernel/setup.c69
-rw-r--r--arch/i386/kernel/signal.c4
-rw-r--r--arch/i386/kernel/smp.c34
-rw-r--r--arch/i386/kernel/smpboot.c349
-rw-r--r--arch/i386/kernel/syscall_table.S2
-rw-r--r--arch/i386/kernel/sysenter.c12
-rw-r--r--arch/i386/kernel/time_hpet.c2
-rw-r--r--arch/i386/kernel/timers/common.c2
-rw-r--r--arch/i386/kernel/timers/timer_tsc.c2
-rw-r--r--arch/i386/kernel/traps.c51
-rw-r--r--arch/i386/kernel/vmlinux.lds.S59
39 files changed, 1632 insertions, 594 deletions
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index 51ecd512603d..4cc83b322b36 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -24,6 +24,7 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o
obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o
obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups.o
+obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
obj-$(CONFIG_X86_NUMAQ) += numaq.o
obj-$(CONFIG_X86_SUMMIT_NUMA) += summit.o
obj-$(CONFIG_KPROBES) += kprobes.o
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
index 848bb97af7ca..9f63ae0f404b 100644
--- a/arch/i386/kernel/acpi/boot.c
+++ b/arch/i386/kernel/acpi/boot.c
@@ -29,6 +29,7 @@
#include <linux/efi.h>
#include <linux/irq.h>
#include <linux/module.h>
+#include <linux/dmi.h>
#include <asm/pgtable.h>
#include <asm/io_apic.h>
@@ -815,6 +816,219 @@ acpi_process_madt(void)
return;
}
+extern int acpi_force;
+
+#ifdef __i386__
+
+#ifdef CONFIG_ACPI_PCI
+static int __init disable_acpi_irq(struct dmi_system_id *d)
+{
+ if (!acpi_force) {
+ printk(KERN_NOTICE "%s detected: force use of acpi=noirq\n",
+ d->ident);
+ acpi_noirq_set();
+ }
+ return 0;
+}
+
+static int __init disable_acpi_pci(struct dmi_system_id *d)
+{
+ if (!acpi_force) {
+ printk(KERN_NOTICE "%s detected: force use of pci=noacpi\n",
+ d->ident);
+ acpi_disable_pci();
+ }
+ return 0;
+}
+#endif
+
+static int __init dmi_disable_acpi(struct dmi_system_id *d)
+{
+ if (!acpi_force) {
+ printk(KERN_NOTICE "%s detected: acpi off\n",d->ident);
+ disable_acpi();
+ } else {
+ printk(KERN_NOTICE
+ "Warning: DMI blacklist says broken, but acpi forced\n");
+ }
+ return 0;
+}
+
+/*
+ * Limit ACPI to CPU enumeration for HT
+ */
+static int __init force_acpi_ht(struct dmi_system_id *d)
+{
+ if (!acpi_force) {
+ printk(KERN_NOTICE "%s detected: force use of acpi=ht\n", d->ident);
+ disable_acpi();
+ acpi_ht = 1;
+ } else {
+ printk(KERN_NOTICE
+ "Warning: acpi=force overrules DMI blacklist: acpi=ht\n");
+ }
+ return 0;
+}
+
+/*
+ * If your system is blacklisted here, but you find that acpi=force
+ * works for you, please contact acpi-devel@sourceforge.net
+ */
+static struct dmi_system_id __initdata acpi_dmi_table[] = {
+ /*
+ * Boxes that need ACPI disabled
+ */
+ {
+ .callback = dmi_disable_acpi,
+ .ident = "IBM Thinkpad",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+ DMI_MATCH(DMI_BOARD_NAME, "2629H1G"),
+ },
+ },
+
+ /*
+ * Boxes that need acpi=ht
+ */
+ {
+ .callback = force_acpi_ht,
+ .ident = "FSC Primergy T850",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "DELL GX240",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"),
+ DMI_MATCH(DMI_BOARD_NAME, "OptiPlex GX240"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "HP VISUALIZE NT Workstation",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "Compaq Workstation W8000",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Compaq"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Workstation W8000"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "ASUS P4B266",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+ DMI_MATCH(DMI_BOARD_NAME, "P4B266"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "ASUS P2B-DS",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+ DMI_MATCH(DMI_BOARD_NAME, "P2B-DS"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "ASUS CUR-DLS",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+ DMI_MATCH(DMI_BOARD_NAME, "CUR-DLS"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "ABIT i440BX-W83977",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ABIT <http://www.abit.com>"),
+ DMI_MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "IBM Bladecenter",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+ DMI_MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "IBM eServer xSeries 360",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+ DMI_MATCH(DMI_BOARD_NAME, "eServer xSeries 360"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "IBM eserver xSeries 330",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+ DMI_MATCH(DMI_BOARD_NAME, "eserver xSeries 330"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "IBM eserver xSeries 440",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"),
+ },
+ },
+
+#ifdef CONFIG_ACPI_PCI
+ /*
+ * Boxes that need ACPI PCI IRQ routing disabled
+ */
+ {
+ .callback = disable_acpi_irq,
+ .ident = "ASUS A7V",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"),
+ DMI_MATCH(DMI_BOARD_NAME, "<A7V>"),
+ /* newer BIOS, Revision 1011, does work */
+ DMI_MATCH(DMI_BIOS_VERSION, "ASUS A7V ACPI BIOS Revision 1007"),
+ },
+ },
+
+ /*
+ * Boxes that need ACPI PCI IRQ routing and PCI scan disabled
+ */
+ { /* _BBN 0 bug */
+ .callback = disable_acpi_pci,
+ .ident = "ASUS PR-DLS",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+ DMI_MATCH(DMI_BOARD_NAME, "PR-DLS"),
+ DMI_MATCH(DMI_BIOS_VERSION, "ASUS PR-DLS ACPI BIOS Revision 1010"),
+ DMI_MATCH(DMI_BIOS_DATE, "03/21/2003")
+ },
+ },
+ {
+ .callback = disable_acpi_pci,
+ .ident = "Acer TravelMate 36x Laptop",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"),
+ },
+ },
+#endif
+ { }
+};
+
+#endif /* __i386__ */
+
/*
* acpi_boot_table_init() and acpi_boot_init()
* called from setup_arch(), always.
@@ -843,6 +1057,10 @@ acpi_boot_table_init(void)
{
int error;
+#ifdef __i386__
+ dmi_check_system(acpi_dmi_table);
+#endif
+
/*
* If acpi_disabled, bail out
* One exception: acpi=ht continues far enough to enumerate LAPICs
@@ -870,8 +1088,6 @@ acpi_boot_table_init(void)
*/
error = acpi_blacklisted();
if (error) {
- extern int acpi_force;
-
if (acpi_force) {
printk(KERN_WARNING PREFIX "acpi=force override\n");
} else {
diff --git a/arch/i386/kernel/acpi/sleep.c b/arch/i386/kernel/acpi/sleep.c
index 28bb0514bb6e..c1af93032ff3 100644
--- a/arch/i386/kernel/acpi/sleep.c
+++ b/arch/i386/kernel/acpi/sleep.c
@@ -7,6 +7,7 @@
#include <linux/acpi.h>
#include <linux/bootmem.h>
+#include <linux/dmi.h>
#include <asm/smp.h>
#include <asm/tlbflush.h>
@@ -91,3 +92,29 @@ static int __init acpi_sleep_setup(char *str)
__setup("acpi_sleep=", acpi_sleep_setup);
+
+
+static __init int reset_videomode_after_s3(struct dmi_system_id *d)
+{
+ acpi_video_flags |= 2;
+ return 0;
+}
+
+static __initdata struct dmi_system_id acpisleep_dmi_table[] = {
+ { /* Reset video mode after returning from ACPI S3 sleep */
+ .callback = reset_videomode_after_s3,
+ .ident = "Toshiba Satellite 4030cdt",
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"),
+ },
+ },
+ { }
+};
+
+static int __init acpisleep_dmi_init(void)
+{
+ dmi_check_system(acpisleep_dmi_table);
+ return 0;
+}
+
+core_initcall(acpisleep_dmi_init);
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index 8d993fa71754..93df90bbb87e 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -26,6 +26,7 @@
#include <linux/mc146818rtc.h>
#include <linux/kernel_stat.h>
#include <linux/sysdev.h>
+#include <linux/cpu.h>
#include <asm/atomic.h>
#include <asm/smp.h>
@@ -40,6 +41,11 @@
#include "io_ports.h"
/*
+ * Knob to control our willingness to enable the local APIC.
+ */
+int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
+
+/*
* Debug level
*/
int apic_verbosity;
@@ -205,7 +211,7 @@ void __init connect_bsp_APIC(void)
enable_apic_mode();
}
-void disconnect_bsp_APIC(void)
+void disconnect_bsp_APIC(int virt_wire_setup)
{
if (pic_mode) {
/*
@@ -219,6 +225,42 @@ void disconnect_bsp_APIC(void)
outb(0x70, 0x22);
outb(0x00, 0x23);
}
+ else {
+ /* Go back to Virtual Wire compatibility mode */
+ unsigned long value;
+
+ /* For the spurious interrupt use vector F, and enable it */
+ value = apic_read(APIC_SPIV);
+ value &= ~APIC_VECTOR_MASK;
+ value |= APIC_SPIV_APIC_ENABLED;
+ value |= 0xf;
+ apic_write_around(APIC_SPIV, value);
+
+ if (!virt_wire_setup) {
+ /* For LVT0 make it edge triggered, active high, external and enabled */
+ value = apic_read(APIC_LVT0);
+ value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
+ APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
+ APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED );
+ value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
+ value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
+ apic_write_around(APIC_LVT0, value);
+ }
+ else {
+ /* Disable LVT0 */
+ apic_write_around(APIC_LVT0, APIC_LVT_MASKED);
+ }
+
+ /* For LVT1 make it edge triggered, active high, nmi and enabled */
+ value = apic_read(APIC_LVT1);
+ value &= ~(
+ APIC_MODE_MASK | APIC_SEND_PENDING |
+ APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
+ APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
+ value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
+ value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
+ apic_write_around(APIC_LVT1, value);
+ }
}
void disable_local_APIC(void)
@@ -363,7 +405,7 @@ void __init init_bsp_APIC(void)
apic_write_around(APIC_LVT1, value);
}
-void __init setup_local_APIC (void)
+void __devinit setup_local_APIC(void)
{
unsigned long oldvalue, value, ver, maxlvt;
@@ -634,7 +676,7 @@ static struct sys_device device_lapic = {
.cls = &lapic_sysclass,
};
-static void __init apic_pm_activate(void)
+static void __devinit apic_pm_activate(void)
{
apic_pm_state.active = 1;
}
@@ -665,26 +707,6 @@ static void apic_pm_activate(void) { }
* Original code written by Keir Fraser.
*/
-/*
- * Knob to control our willingness to enable the local APIC.
- */
-int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
-
-static int __init lapic_disable(char *str)
-{
- enable_local_apic = -1;
- clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
- return 0;
-}
-__setup("nolapic", lapic_disable);
-
-static int __init lapic_enable(char *str)
-{
- enable_local_apic = 1;
- return 0;
-}
-__setup("lapic", lapic_enable);
-
static int __init apic_set_verbosity(char *str)
{
if (strcmp("debug", str) == 0)
@@ -855,7 +877,7 @@ fake_ioapic_page:
* but we do not accept timer interrupts yet. We only allow the BP
* to calibrate.
*/
-static unsigned int __init get_8254_timer_count(void)
+static unsigned int __devinit get_8254_timer_count(void)
{
extern spinlock_t i8253_lock;
unsigned long flags;
@@ -874,7 +896,7 @@ static unsigned int __init get_8254_timer_count(void)
}
/* next tick in 8254 can be caught by catching timer wraparound */
-static void __init wait_8254_wraparound(void)
+static void __devinit wait_8254_wraparound(void)
{
unsigned int curr_count, prev_count;
@@ -894,7 +916,7 @@ static void __init wait_8254_wraparound(void)
* Default initialization for 8254 timers. If we use other timers like HPET,
* we override this later
*/
-void (*wait_timer_tick)(void) __initdata = wait_8254_wraparound;
+void (*wait_timer_tick)(void) __devinitdata = wait_8254_wraparound;
/*
* This function sets up the local APIC timer, with a timeout of
@@ -930,7 +952,7 @@ static void __setup_APIC_LVTT(unsigned int clocks)
apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
}
-static void __init setup_APIC_timer(unsigned int clocks)
+static void __devinit setup_APIC_timer(unsigned int clocks)
{
unsigned long flags;
@@ -1043,12 +1065,12 @@ void __init setup_boot_APIC_clock(void)
local_irq_enable();
}
-void __init setup_secondary_APIC_clock(void)
+void __devinit setup_secondary_APIC_clock(void)
{
setup_APIC_timer(calibration_result);
}
-void __init disable_APIC_timer(void)
+void __devinit disable_APIC_timer(void)
{
if (using_apic_timer) {
unsigned long v;
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
index 0ff65abcd56c..d48ce9290963 100644
--- a/arch/i386/kernel/apm.c
+++ b/arch/i386/kernel/apm.c
@@ -346,10 +346,10 @@ extern int (*console_blank_hook)(int);
struct apm_user {
int magic;
struct apm_user * next;
- int suser: 1;
- int writer: 1;
- int reader: 1;
- int suspend_wait: 1;
+ unsigned int suser: 1;
+ unsigned int writer: 1;
+ unsigned int reader: 1;
+ unsigned int suspend_wait: 1;
int suspend_result;
int suspends_pending;
int standbys_pending;
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index b9954248d0aa..2203a9d20212 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -24,9 +24,9 @@ EXPORT_PER_CPU_SYMBOL(cpu_gdt_table);
DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
-static int cachesize_override __initdata = -1;
-static int disable_x86_fxsr __initdata = 0;
-static int disable_x86_serial_nr __initdata = 1;
+static int cachesize_override __devinitdata = -1;
+static int disable_x86_fxsr __devinitdata = 0;
+static int disable_x86_serial_nr __devinitdata = 1;
struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
@@ -59,7 +59,7 @@ static int __init cachesize_setup(char *str)
}
__setup("cachesize=", cachesize_setup);
-int __init get_model_name(struct cpuinfo_x86 *c)
+int __devinit get_model_name(struct cpuinfo_x86 *c)
{
unsigned int *v;
char *p, *q;
@@ -89,7 +89,7 @@ int __init get_model_name(struct cpuinfo_x86 *c)
}
-void __init display_cacheinfo(struct cpuinfo_x86 *c)
+void __devinit display_cacheinfo(struct cpuinfo_x86 *c)
{
unsigned int n, dummy, ecx, edx, l2size;
@@ -130,7 +130,7 @@ void __init display_cacheinfo(struct cpuinfo_x86 *c)
/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */
/* Look up CPU names by table lookup. */
-static char __init *table_lookup_model(struct cpuinfo_x86 *c)
+static char __devinit *table_lookup_model(struct cpuinfo_x86 *c)
{
struct cpu_model_info *info;
@@ -151,7 +151,7 @@ static char __init *table_lookup_model(struct cpuinfo_x86 *c)
}
-void __init get_cpu_vendor(struct cpuinfo_x86 *c, int early)
+void __devinit get_cpu_vendor(struct cpuinfo_x86 *c, int early)
{
char *v = c->x86_vendor_id;
int i;
@@ -202,7 +202,7 @@ static inline int flag_is_changeable_p(u32 flag)
/* Probe for the CPUID instruction */
-static int __init have_cpuid_p(void)
+static int __devinit have_cpuid_p(void)
{
return flag_is_changeable_p(X86_EFLAGS_ID);
}
@@ -249,7 +249,7 @@ static void __init early_cpu_detect(void)
#endif
}
-void __init generic_identify(struct cpuinfo_x86 * c)
+void __devinit generic_identify(struct cpuinfo_x86 * c)
{
u32 tfms, xlvl;
int junk;
@@ -296,7 +296,7 @@ void __init generic_identify(struct cpuinfo_x86 * c)
}
}
-static void __init squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
+static void __devinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
{
if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) {
/* Disable processor serial number */
@@ -324,7 +324,7 @@ __setup("serialnumber", x86_serial_nr_setup);
/*
* This does the hard work of actually picking apart the CPU stuff...
*/
-void __init identify_cpu(struct cpuinfo_x86 *c)
+void __devinit identify_cpu(struct cpuinfo_x86 *c)
{
int i;
@@ -432,10 +432,13 @@ void __init identify_cpu(struct cpuinfo_x86 *c)
#ifdef CONFIG_X86_MCE
mcheck_init(c);
#endif
+ if (c == &boot_cpu_data)
+ sysenter_setup();
+ enable_sep_cpu();
}
#ifdef CONFIG_X86_HT
-void __init detect_ht(struct cpuinfo_x86 *c)
+void __devinit detect_ht(struct cpuinfo_x86 *c)
{
u32 eax, ebx, ecx, edx;
int index_msb, tmp;
@@ -490,7 +493,7 @@ void __init detect_ht(struct cpuinfo_x86 *c)
}
#endif
-void __init print_cpu_info(struct cpuinfo_x86 *c)
+void __devinit print_cpu_info(struct cpuinfo_x86 *c)
{
char *vendor = NULL;
@@ -513,7 +516,7 @@ void __init print_cpu_info(struct cpuinfo_x86 *c)
printk("\n");
}
-cpumask_t cpu_initialized __initdata = CPU_MASK_NONE;
+cpumask_t cpu_initialized __devinitdata = CPU_MASK_NONE;
/* This is hacky. :)
* We're emulating future behavior.
@@ -560,7 +563,7 @@ void __init early_cpu_init(void)
* and IDT. We reload them nevertheless, this function acts as a
* 'CPU state barrier', nothing should get across.
*/
-void __init cpu_init (void)
+void __devinit cpu_init(void)
{
int cpu = smp_processor_id();
struct tss_struct * t = &per_cpu(init_tss, cpu);
@@ -648,3 +651,15 @@ void __init cpu_init (void)
clear_used_math();
mxcsr_feature_mask_init();
}
+
+#ifdef CONFIG_HOTPLUG_CPU
+void __devinit cpu_uninit(void)
+{
+ int cpu = raw_smp_processor_id();
+ cpu_clear(cpu, cpu_initialized);
+
+ /* lazy TLB state */
+ per_cpu(cpu_tlbstate, cpu).state = 0;
+ per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
+}
+#endif
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c
index 5c530064eb74..73a5dc5b26b8 100644
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c
@@ -648,9 +648,7 @@ static int powernow_cpu_exit (struct cpufreq_policy *policy) {
}
#endif
- if (powernow_table)
- kfree(powernow_table);
-
+ kfree(powernow_table);
return 0;
}
diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c
index 121aa2176e69..96a75d045835 100644
--- a/arch/i386/kernel/cpu/intel.c
+++ b/arch/i386/kernel/cpu/intel.c
@@ -28,7 +28,7 @@ extern int trap_init_f00f_bug(void);
struct movsl_mask movsl_mask;
#endif
-void __init early_intel_workaround(struct cpuinfo_x86 *c)
+void __devinit early_intel_workaround(struct cpuinfo_x86 *c)
{
if (c->x86_vendor != X86_VENDOR_INTEL)
return;
@@ -43,7 +43,7 @@ void __init early_intel_workaround(struct cpuinfo_x86 *c)
* This is called before we do cpu ident work
*/
-int __init ppro_with_ram_bug(void)
+int __devinit ppro_with_ram_bug(void)
{
/* Uses data from early_cpu_detect now */
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
@@ -61,7 +61,7 @@ int __init ppro_with_ram_bug(void)
* P4 Xeon errata 037 workaround.
* Hardware prefetcher may cause stale data to be loaded into the cache.
*/
-static void __init Intel_errata_workarounds(struct cpuinfo_x86 *c)
+static void __devinit Intel_errata_workarounds(struct cpuinfo_x86 *c)
{
unsigned long lo, hi;
@@ -80,7 +80,7 @@ static void __init Intel_errata_workarounds(struct cpuinfo_x86 *c)
/*
* find out the number of processor cores on the die
*/
-static int __init num_cpu_cores(struct cpuinfo_x86 *c)
+static int __devinit num_cpu_cores(struct cpuinfo_x86 *c)
{
unsigned int eax;
@@ -98,7 +98,7 @@ static int __init num_cpu_cores(struct cpuinfo_x86 *c)
return 1;
}
-static void __init init_intel(struct cpuinfo_x86 *c)
+static void __devinit init_intel(struct cpuinfo_x86 *c)
{
unsigned int l2 = 0;
char *p = NULL;
@@ -204,7 +204,7 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 * c, unsigned int size)
return size;
}
-static struct cpu_dev intel_cpu_dev __initdata = {
+static struct cpu_dev intel_cpu_dev __devinitdata = {
.c_vendor = "Intel",
.c_ident = { "GenuineIntel" },
.c_models = {
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c
index a710dc4eb20e..1d768b263269 100644
--- a/arch/i386/kernel/cpu/intel_cacheinfo.c
+++ b/arch/i386/kernel/cpu/intel_cacheinfo.c
@@ -28,7 +28,7 @@ struct _cache_table
};
/* all the cache descriptor types we care about (no TLB or trace cache entries) */
-static struct _cache_table cache_table[] __initdata =
+static struct _cache_table cache_table[] __devinitdata =
{
{ 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
{ 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */
@@ -160,7 +160,7 @@ static int __init find_num_cache_leaves(void)
return retval;
}
-unsigned int __init init_intel_cacheinfo(struct cpuinfo_x86 *c)
+unsigned int __devinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
{
unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */
unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
diff --git a/arch/i386/kernel/cpu/mcheck/k7.c b/arch/i386/kernel/cpu/mcheck/k7.c
index 8df52e86c4d2..c4abe7657397 100644
--- a/arch/i386/kernel/cpu/mcheck/k7.c
+++ b/arch/i386/kernel/cpu/mcheck/k7.c
@@ -69,7 +69,7 @@ static fastcall void k7_machine_check(struct pt_regs * regs, long error_code)
/* AMD K7 machine check is Intel like */
-void __init amd_mcheck_init(struct cpuinfo_x86 *c)
+void __devinit amd_mcheck_init(struct cpuinfo_x86 *c)
{
u32 l, h;
int i;
diff --git a/arch/i386/kernel/cpu/mcheck/mce.c b/arch/i386/kernel/cpu/mcheck/mce.c
index bf6d1aefafc0..2cf25d2ba0f1 100644
--- a/arch/i386/kernel/cpu/mcheck/mce.c
+++ b/arch/i386/kernel/cpu/mcheck/mce.c
@@ -16,7 +16,7 @@
#include "mce.h"
-int mce_disabled __initdata = 0;
+int mce_disabled __devinitdata = 0;
int nr_mce_banks;
EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */
@@ -31,7 +31,7 @@ static fastcall void unexpected_machine_check(struct pt_regs * regs, long error_
void fastcall (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check;
/* This has to be run for each processor */
-void __init mcheck_init(struct cpuinfo_x86 *c)
+void __devinit mcheck_init(struct cpuinfo_x86 *c)
{
if (mce_disabled==1)
return;
diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c
index 8b16ceb929b4..0abccb6fdf9e 100644
--- a/arch/i386/kernel/cpu/mcheck/p4.c
+++ b/arch/i386/kernel/cpu/mcheck/p4.c
@@ -78,7 +78,7 @@ fastcall void smp_thermal_interrupt(struct pt_regs *regs)
}
/* P4/Xeon Thermal regulation detect and init */
-static void __init intel_init_thermal(struct cpuinfo_x86 *c)
+static void __devinit intel_init_thermal(struct cpuinfo_x86 *c)
{
u32 l, h;
unsigned int cpu = smp_processor_id();
@@ -232,7 +232,7 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
}
-void __init intel_p4_mcheck_init(struct cpuinfo_x86 *c)
+void __devinit intel_p4_mcheck_init(struct cpuinfo_x86 *c)
{
u32 l, h;
int i;
diff --git a/arch/i386/kernel/cpu/mcheck/p5.c b/arch/i386/kernel/cpu/mcheck/p5.c
index c45a1b485c80..ec0614cd2925 100644
--- a/arch/i386/kernel/cpu/mcheck/p5.c
+++ b/arch/i386/kernel/cpu/mcheck/p5.c
@@ -29,7 +29,7 @@ static fastcall void pentium_machine_check(struct pt_regs * regs, long error_cod
}
/* Set up machine check reporting for processors with Intel style MCE */
-void __init intel_p5_mcheck_init(struct cpuinfo_x86 *c)
+void __devinit intel_p5_mcheck_init(struct cpuinfo_x86 *c)
{
u32 l, h;
diff --git a/arch/i386/kernel/cpu/mcheck/p6.c b/arch/i386/kernel/cpu/mcheck/p6.c
index 46640f8c2494..f01b73f947e1 100644
--- a/arch/i386/kernel/cpu/mcheck/p6.c
+++ b/arch/i386/kernel/cpu/mcheck/p6.c
@@ -80,7 +80,7 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
}
/* Set up machine check reporting for processors with Intel style MCE */
-void __init intel_p6_mcheck_init(struct cpuinfo_x86 *c)
+void __devinit intel_p6_mcheck_init(struct cpuinfo_x86 *c)
{
u32 l, h;
int i;
diff --git a/arch/i386/kernel/cpu/mcheck/winchip.c b/arch/i386/kernel/cpu/mcheck/winchip.c
index 753fa7acb984..7bae68fa168f 100644
--- a/arch/i386/kernel/cpu/mcheck/winchip.c
+++ b/arch/i386/kernel/cpu/mcheck/winchip.c
@@ -23,7 +23,7 @@ static fastcall void winchip_machine_check(struct pt_regs * regs, long error_cod
}
/* Set up machine check reporting on the Winchip C6 series */
-void __init winchip_mcheck_init(struct cpuinfo_x86 *c)
+void __devinit winchip_mcheck_init(struct cpuinfo_x86 *c)
{
u32 lo, hi;
machine_check_vector = winchip_machine_check;
diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c
index f468a979e9aa..64d91f73a0a4 100644
--- a/arch/i386/kernel/cpu/mtrr/generic.c
+++ b/arch/i386/kernel/cpu/mtrr/generic.c
@@ -70,8 +70,7 @@ void __init get_mtrr_state(void)
/* Free resources associated with a struct mtrr_state */
void __init finalize_mtrr_state(void)
{
- if (mtrr_state.var_ranges)
- kfree(mtrr_state.var_ranges);
+ kfree(mtrr_state.var_ranges);
mtrr_state.var_ranges = NULL;
}
diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c
new file mode 100644
index 000000000000..e5fab12f7926
--- /dev/null
+++ b/arch/i386/kernel/crash.c
@@ -0,0 +1,223 @@
+/*
+ * Architecture specific (i386) functions for kexec based crash dumps.
+ *
+ * Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
+ *
+ * Copyright (C) IBM Corporation, 2004. All rights reserved.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/irq.h>
+#include <linux/reboot.h>
+#include <linux/kexec.h>
+#include <linux/irq.h>
+#include <linux/delay.h>
+#include <linux/elf.h>
+#include <linux/elfcore.h>
+
+#include <asm/processor.h>
+#include <asm/hardirq.h>
+#include <asm/nmi.h>
+#include <asm/hw_irq.h>
+#include <asm/apic.h>
+#include <mach_ipi.h>
+
+
+note_buf_t crash_notes[NR_CPUS];
+/* This keeps a track of which one is crashing cpu. */
+static int crashing_cpu;
+
+static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
+ size_t data_len)
+{
+ struct elf_note note;
+
+ note.n_namesz = strlen(name) + 1;
+ note.n_descsz = data_len;
+ note.n_type = type;
+ memcpy(buf, &note, sizeof(note));
+ buf += (sizeof(note) +3)/4;
+ memcpy(buf, name, note.n_namesz);
+ buf += (note.n_namesz + 3)/4;
+ memcpy(buf, data, note.n_descsz);
+ buf += (note.n_descsz + 3)/4;
+
+ return buf;
+}
+
+static void final_note(u32 *buf)
+{
+ struct elf_note note;
+
+ note.n_namesz = 0;
+ note.n_descsz = 0;
+ note.n_type = 0;
+ memcpy(buf, &note, sizeof(note));
+}
+
+static void crash_save_this_cpu(struct pt_regs *regs, int cpu)
+{
+ struct elf_prstatus prstatus;
+ u32 *buf;
+
+ if ((cpu < 0) || (cpu >= NR_CPUS))
+ return;
+
+ /* Using ELF notes here is opportunistic.
+ * I need a well defined structure format
+ * for the data I pass, and I need tags
+ * on the data to indicate what information I have
+ * squirrelled away. ELF notes happen to provide
+ * all of that that no need to invent something new.
+ */
+ buf = &crash_notes[cpu][0];
+ memset(&prstatus, 0, sizeof(prstatus));
+ prstatus.pr_pid = current->pid;
+ elf_core_copy_regs(&prstatus.pr_reg, regs);
+ buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus,
+ sizeof(prstatus));
+ final_note(buf);
+}
+
+static void crash_get_current_regs(struct pt_regs *regs)
+{
+ __asm__ __volatile__("movl %%ebx,%0" : "=m"(regs->ebx));
+ __asm__ __volatile__("movl %%ecx,%0" : "=m"(regs->ecx));
+ __asm__ __volatile__("movl %%edx,%0" : "=m"(regs->edx));
+ __asm__ __volatile__("movl %%esi,%0" : "=m"(regs->esi));
+ __asm__ __volatile__("movl %%edi,%0" : "=m"(regs->edi));
+ __asm__ __volatile__("movl %%ebp,%0" : "=m"(regs->ebp));
+ __asm__ __volatile__("movl %%eax,%0" : "=m"(regs->eax));
+ __asm__ __volatile__("movl %%esp,%0" : "=m"(regs->esp));
+ __asm__ __volatile__("movw %%ss, %%ax;" :"=a"(regs->xss));
+ __asm__ __volatile__("movw %%cs, %%ax;" :"=a"(regs->xcs));
+ __asm__ __volatile__("movw %%ds, %%ax;" :"=a"(regs->xds));
+ __asm__ __volatile__("movw %%es, %%ax;" :"=a"(regs->xes));
+ __asm__ __volatile__("pushfl; popl %0" :"=m"(regs->eflags));
+
+ regs->eip = (unsigned long)current_text_addr();
+}
+
+/* CPU does not save ss and esp on stack if execution is already
+ * running in kernel mode at the time of NMI occurrence. This code
+ * fixes it.
+ */
+static void crash_setup_regs(struct pt_regs *newregs, struct pt_regs *oldregs)
+{
+ memcpy(newregs, oldregs, sizeof(*newregs));
+ newregs->esp = (unsigned long)&(oldregs->esp);
+ __asm__ __volatile__("xorl %eax, %eax;");
+ __asm__ __volatile__ ("movw %%ss, %%ax;" :"=a"(newregs->xss));
+}
+
+/* We may have saved_regs from where the error came from
+ * or it is NULL if via a direct panic().
+ */
+static void crash_save_self(struct pt_regs *saved_regs)
+{
+ struct pt_regs regs;
+ int cpu;
+
+ cpu = smp_processor_id();
+ if (saved_regs)
+ crash_setup_regs(&regs, saved_regs);
+ else
+ crash_get_current_regs(&regs);
+ crash_save_this_cpu(&regs, cpu);
+}
+
+#ifdef CONFIG_SMP
+static atomic_t waiting_for_crash_ipi;
+
+static int crash_nmi_callback(struct pt_regs *regs, int cpu)
+{
+ struct pt_regs fixed_regs;
+
+ /* Don't do anything if this handler is invoked on crashing cpu.
+ * Otherwise, system will completely hang. Crashing cpu can get
+ * an NMI if system was initially booted with nmi_watchdog parameter.
+ */
+ if (cpu == crashing_cpu)
+ return 1;
+ local_irq_disable();
+
+ if (!user_mode(regs)) {
+ crash_setup_regs(&fixed_regs, regs);
+ regs = &fixed_regs;
+ }
+ crash_save_this_cpu(regs, cpu);
+ disable_local_APIC();
+ atomic_dec(&waiting_for_crash_ipi);
+ /* Assume hlt works */
+ __asm__("hlt");
+ for(;;);
+
+ return 1;
+}
+
+/*
+ * By using the NMI code instead of a vector we just sneak thru the
+ * word generator coming out with just what we want. AND it does
+ * not matter if clustered_apic_mode is set or not.
+ */
+static void smp_send_nmi_allbutself(void)
+{
+ send_IPI_allbutself(APIC_DM_NMI);
+}
+
+static void nmi_shootdown_cpus(void)
+{
+ unsigned long msecs;
+
+ atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
+ /* Would it be better to replace the trap vector here? */
+ set_nmi_callback(crash_nmi_callback);
+ /* Ensure the new callback function is set before sending
+ * out the NMI
+ */
+ wmb();
+
+ smp_send_nmi_allbutself();
+
+ msecs = 1000; /* Wait at most a second for the other cpus to stop */
+ while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
+ mdelay(1);
+ msecs--;
+ }
+
+ /* Leave the nmi callback set */
+ disable_local_APIC();
+}
+#else
+static void nmi_shootdown_cpus(void)
+{
+ /* There are no cpus to shootdown */
+}
+#endif
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+ /* This function is only called after the system
+ * has paniced or is otherwise in a critical state.
+ * The minimum amount of code to allow a kexec'd kernel
+ * to run successfully needs to happen here.
+ *
+ * In practice this means shooting down the other cpus in
+ * an SMP system.
+ */
+ /* The kernel is broken so disable interrupts */
+ local_irq_disable();
+
+ /* Make a note of crashing cpu. Will be used in NMI callback.*/
+ crashing_cpu = smp_processor_id();
+ nmi_shootdown_cpus();
+ lapic_shutdown();
+#if defined(CONFIG_X86_IO_APIC)
+ disable_IO_APIC();
+#endif
+ crash_save_self(regs);
+}
diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c
index 6ed7e28f306c..a3cdf894302b 100644
--- a/arch/i386/kernel/dmi_scan.c
+++ b/arch/i386/kernel/dmi_scan.c
@@ -1,22 +1,15 @@
#include <linux/types.h>
-#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/init.h>
#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/acpi.h>
-#include <asm/io.h>
-#include <linux/pm.h>
-#include <asm/system.h>
#include <linux/dmi.h>
#include <linux/bootmem.h>
-struct dmi_header
-{
- u8 type;
- u8 length;
- u16 handle;
+struct dmi_header {
+ u8 type;
+ u8 length;
+ u16 handle;
};
#undef DMI_DEBUG
@@ -29,15 +22,13 @@ struct dmi_header
static char * __init dmi_string(struct dmi_header *dm, u8 s)
{
- u8 *bp=(u8 *)dm;
- bp+=dm->length;
- if(!s)
+ u8 *bp = ((u8 *) dm) + dm->length;
+
+ if (!s)
return "";
s--;
- while(s>0 && *bp)
- {
- bp+=strlen(bp);
- bp++;
+ while (s > 0 && *bp) {
+ bp += strlen(bp) + 1;
s--;
}
return bp;
@@ -47,16 +38,14 @@ static char * __init dmi_string(struct dmi_header *dm, u8 s)
* We have to be cautious here. We have seen BIOSes with DMI pointers
* pointing to completely the wrong place for example
*/
-
-static int __init dmi_table(u32 base, int len, int num, void (*decode)(struct dmi_header *))
+static int __init dmi_table(u32 base, int len, int num,
+ void (*decode)(struct dmi_header *))
{
- u8 *buf;
- struct dmi_header *dm;
- u8 *data;
- int i=0;
+ u8 *buf, *data;
+ int i = 0;
buf = bt_ioremap(base, len);
- if(buf==NULL)
+ if (buf == NULL)
return -1;
data = buf;
@@ -65,36 +54,34 @@ static int __init dmi_table(u32 base, int len, int num, void (*decode)(struct dm
* Stop when we see all the items the table claimed to have
* OR we run off the end of the table (also happens)
*/
-
- while(i<num && data-buf+sizeof(struct dmi_header)<=len)
- {
- dm=(struct dmi_header *)data;
+ while ((i < num) && (data - buf + sizeof(struct dmi_header)) <= len) {
+ struct dmi_header *dm = (struct dmi_header *)data;
/*
* We want to know the total length (formated area and strings)
* before decoding to make sure we won't run off the table in
* dmi_decode or dmi_string
*/
- data+=dm->length;
- while(data-buf<len-1 && (data[0] || data[1]))
+ data += dm->length;
+ while ((data - buf < len - 1) && (data[0] || data[1]))
data++;
- if(data-buf<len-1)
+ if (data - buf < len - 1)
decode(dm);
- data+=2;
+ data += 2;
i++;
}
bt_iounmap(buf, len);
return 0;
}
-
-inline static int __init dmi_checksum(u8 *buf)
+static int __init dmi_checksum(u8 *buf)
{
- u8 sum=0;
+ u8 sum = 0;
int a;
- for(a=0; a<15; a++)
- sum+=buf[a];
- return (sum==0);
+ for (a = 0; a < 15; a++)
+ sum += buf[a];
+
+ return sum == 0;
}
static int __init dmi_iterate(void (*decode)(struct dmi_header *))
@@ -110,28 +97,30 @@ static int __init dmi_iterate(void (*decode)(struct dmi_header *))
p = ioremap(0xF0000, 0x10000);
if (p == NULL)
return -1;
+
for (q = p; q < p + 0x10000; q += 16) {
memcpy_fromio(buf, q, 15);
- if(memcmp(buf, "_DMI_", 5)==0 && dmi_checksum(buf))
- {
- u16 num=buf[13]<<8|buf[12];
- u16 len=buf[7]<<8|buf[6];
- u32 base=buf[11]<<24|buf[10]<<16|buf[9]<<8|buf[8];
+ if ((memcmp(buf, "_DMI_", 5) == 0) && dmi_checksum(buf)) {
+ u16 num = (buf[13] << 8) | buf[12];
+ u16 len = (buf[7] << 8) | buf[6];
+ u32 base = (buf[11] << 24) | (buf[10] << 16) |
+ (buf[9] << 8) | buf[8];
/*
* DMI version 0.0 means that the real version is taken from
* the SMBIOS version, which we don't know at this point.
*/
- if(buf[14]!=0)
+ if (buf[14] != 0)
printk(KERN_INFO "DMI %d.%d present.\n",
- buf[14]>>4, buf[14]&0x0F);
+ buf[14] >> 4, buf[14] & 0xF);
else
printk(KERN_INFO "DMI present.\n");
+
dmi_printk((KERN_INFO "%d structures occupying %d bytes.\n",
num, len));
- dmi_printk((KERN_INFO "DMI table at 0x%08X.\n",
- base));
- if(dmi_table(base,len, num, decode)==0)
+ dmi_printk((KERN_INFO "DMI table at 0x%08X.\n", base));
+
+ if (dmi_table(base,len, num, decode) == 0)
return 0;
}
}
@@ -143,16 +132,17 @@ static char *dmi_ident[DMI_STRING_MAX];
/*
* Save a DMI string
*/
-
static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string)
{
char *d = (char*)dm;
char *p = dmi_string(dm, d[string]);
- if(p==NULL || *p == 0)
+
+ if (p == NULL || *p == 0)
return;
if (dmi_ident[slot])
return;
- dmi_ident[slot] = alloc_bootmem(strlen(p)+1);
+
+ dmi_ident[slot] = alloc_bootmem(strlen(p) + 1);
if(dmi_ident[slot])
strcpy(dmi_ident[slot], p);
else
@@ -160,281 +150,47 @@ static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string)
}
/*
- * Ugly compatibility crap.
- */
-#define dmi_blacklist dmi_system_id
-#define NO_MATCH { DMI_NONE, NULL}
-#define MATCH DMI_MATCH
-
-/*
- * Toshiba keyboard likes to repeat keys when they are not repeated.
- */
-
-static __init int broken_toshiba_keyboard(struct dmi_blacklist *d)
-{
- printk(KERN_WARNING "Toshiba with broken keyboard detected. If your keyboard sometimes generates 3 keypresses instead of one, see http://davyd.ucc.asn.au/projects/toshiba/README\n");
- return 0;
-}
-
-
-#ifdef CONFIG_ACPI_SLEEP
-static __init int reset_videomode_after_s3(struct dmi_blacklist *d)
-{
- /* See acpi_wakeup.S */
- extern long acpi_video_flags;
- acpi_video_flags |= 2;
- return 0;
-}
-#endif
-
-
-#ifdef CONFIG_ACPI_BOOT
-extern int acpi_force;
-
-static __init __attribute__((unused)) int dmi_disable_acpi(struct dmi_blacklist *d)
-{
- if (!acpi_force) {
- printk(KERN_NOTICE "%s detected: acpi off\n",d->ident);
- disable_acpi();
- } else {
- printk(KERN_NOTICE
- "Warning: DMI blacklist says broken, but acpi forced\n");
- }
- return 0;
-}
-
-/*
- * Limit ACPI to CPU enumeration for HT
- */
-static __init __attribute__((unused)) int force_acpi_ht(struct dmi_blacklist *d)
-{
- if (!acpi_force) {
- printk(KERN_NOTICE "%s detected: force use of acpi=ht\n", d->ident);
- disable_acpi();
- acpi_ht = 1;
- } else {
- printk(KERN_NOTICE
- "Warning: acpi=force overrules DMI blacklist: acpi=ht\n");
- }
- return 0;
-}
-#endif
-
-#ifdef CONFIG_ACPI_PCI
-static __init int disable_acpi_irq(struct dmi_blacklist *d)
-{
- if (!acpi_force) {
- printk(KERN_NOTICE "%s detected: force use of acpi=noirq\n",
- d->ident);
- acpi_noirq_set();
- }
- return 0;
-}
-static __init int disable_acpi_pci(struct dmi_blacklist *d)
-{
- if (!acpi_force) {
- printk(KERN_NOTICE "%s detected: force use of pci=noacpi\n",
- d->ident);
- acpi_disable_pci();
- }
- return 0;
-}
-#endif
-
-/*
- * Process the DMI blacklists
- */
-
-
-/*
- * This will be expanded over time to force things like the APM
- * interrupt mask settings according to the laptop
- */
-
-static __initdata struct dmi_blacklist dmi_blacklist[]={
-
- { broken_toshiba_keyboard, "Toshiba Satellite 4030cdt", { /* Keyboard generates spurious repeats */
- MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"),
- NO_MATCH, NO_MATCH, NO_MATCH
- } },
-#ifdef CONFIG_ACPI_SLEEP
- { reset_videomode_after_s3, "Toshiba Satellite 4030cdt", { /* Reset video mode after returning from ACPI S3 sleep */
- MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"),
- NO_MATCH, NO_MATCH, NO_MATCH
- } },
-#endif
-
-#ifdef CONFIG_ACPI_BOOT
- /*
- * If your system is blacklisted here, but you find that acpi=force
- * works for you, please contact acpi-devel@sourceforge.net
- */
-
- /*
- * Boxes that need ACPI disabled
- */
-
- { dmi_disable_acpi, "IBM Thinkpad", {
- MATCH(DMI_BOARD_VENDOR, "IBM"),
- MATCH(DMI_BOARD_NAME, "2629H1G"),
- NO_MATCH, NO_MATCH }},
-
- /*
- * Boxes that need acpi=ht
- */
-
- { force_acpi_ht, "FSC Primergy T850", {
- MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
- MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"),
- NO_MATCH, NO_MATCH }},
-
- { force_acpi_ht, "DELL GX240", {
- MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"),
- MATCH(DMI_BOARD_NAME, "OptiPlex GX240"),
- NO_MATCH, NO_MATCH }},
-
- { force_acpi_ht, "HP VISUALIZE NT Workstation", {
- MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
- MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"),
- NO_MATCH, NO_MATCH }},
-
- { force_acpi_ht, "Compaq Workstation W8000", {
- MATCH(DMI_SYS_VENDOR, "Compaq"),
- MATCH(DMI_PRODUCT_NAME, "Workstation W8000"),
- NO_MATCH, NO_MATCH }},
-
- { force_acpi_ht, "ASUS P4B266", {
- MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
- MATCH(DMI_BOARD_NAME, "P4B266"),
- NO_MATCH, NO_MATCH }},
-
- { force_acpi_ht, "ASUS P2B-DS", {
- MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
- MATCH(DMI_BOARD_NAME, "P2B-DS"),
- NO_MATCH, NO_MATCH }},
-
- { force_acpi_ht, "ASUS CUR-DLS", {
- MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
- MATCH(DMI_BOARD_NAME, "CUR-DLS"),
- NO_MATCH, NO_MATCH }},
-
- { force_acpi_ht, "ABIT i440BX-W83977", {
- MATCH(DMI_BOARD_VENDOR, "ABIT <http://www.abit.com>"),
- MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"),
- NO_MATCH, NO_MATCH }},
-
- { force_acpi_ht, "IBM Bladecenter", {
- MATCH(DMI_BOARD_VENDOR, "IBM"),
- MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"),
- NO_MATCH, NO_MATCH }},
-
- { force_acpi_ht, "IBM eServer xSeries 360", {
- MATCH(DMI_BOARD_VENDOR, "IBM"),
- MATCH(DMI_BOARD_NAME, "eServer xSeries 360"),
- NO_MATCH, NO_MATCH }},
-
- { force_acpi_ht, "IBM eserver xSeries 330", {
- MATCH(DMI_BOARD_VENDOR, "IBM"),
- MATCH(DMI_BOARD_NAME, "eserver xSeries 330"),
- NO_MATCH, NO_MATCH }},
-
- { force_acpi_ht, "IBM eserver xSeries 440", {
- MATCH(DMI_BOARD_VENDOR, "IBM"),
- MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"),
- NO_MATCH, NO_MATCH }},
-
-#endif // CONFIG_ACPI_BOOT
-
-#ifdef CONFIG_ACPI_PCI
- /*
- * Boxes that need ACPI PCI IRQ routing disabled
- */
-
- { disable_acpi_irq, "ASUS A7V", {
- MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"),
- MATCH(DMI_BOARD_NAME, "<A7V>"),
- /* newer BIOS, Revision 1011, does work */
- MATCH(DMI_BIOS_VERSION, "ASUS A7V ACPI BIOS Revision 1007"),
- NO_MATCH }},
-
- /*
- * Boxes that need ACPI PCI IRQ routing and PCI scan disabled
- */
- { disable_acpi_pci, "ASUS PR-DLS", { /* _BBN 0 bug */
- MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
- MATCH(DMI_BOARD_NAME, "PR-DLS"),
- MATCH(DMI_BIOS_VERSION, "ASUS PR-DLS ACPI BIOS Revision 1010"),
- MATCH(DMI_BIOS_DATE, "03/21/2003") }},
-
- { disable_acpi_pci, "Acer TravelMate 36x Laptop", {
- MATCH(DMI_SYS_VENDOR, "Acer"),
- MATCH(DMI_PRODUCT_NAME, "TravelMate 360"),
- NO_MATCH, NO_MATCH
- } },
-
-#endif
-
- { NULL, }
-};
-
-/*
* Process a DMI table entry. Right now all we care about are the BIOS
* and machine entries. For 2.5 we should pull the smbus controller info
* out of here.
*/
-
static void __init dmi_decode(struct dmi_header *dm)
{
-#ifdef DMI_DEBUG
- u8 *data = (u8 *)dm;
-#endif
+ u8 *data __attribute__((__unused__)) = (u8 *)dm;
- switch(dm->type)
- {
- case 0:
- dmi_printk(("BIOS Vendor: %s\n",
- dmi_string(dm, data[4])));
- dmi_save_ident(dm, DMI_BIOS_VENDOR, 4);
- dmi_printk(("BIOS Version: %s\n",
- dmi_string(dm, data[5])));
- dmi_save_ident(dm, DMI_BIOS_VERSION, 5);
- dmi_printk(("BIOS Release: %s\n",
- dmi_string(dm, data[8])));
- dmi_save_ident(dm, DMI_BIOS_DATE, 8);
- break;
- case 1:
- dmi_printk(("System Vendor: %s\n",
- dmi_string(dm, data[4])));
- dmi_save_ident(dm, DMI_SYS_VENDOR, 4);
- dmi_printk(("Product Name: %s\n",
- dmi_string(dm, data[5])));
- dmi_save_ident(dm, DMI_PRODUCT_NAME, 5);
- dmi_printk(("Version: %s\n",
- dmi_string(dm, data[6])));
- dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6);
- dmi_printk(("Serial Number: %s\n",
- dmi_string(dm, data[7])));
- break;
- case 2:
- dmi_printk(("Board Vendor: %s\n",
- dmi_string(dm, data[4])));
- dmi_save_ident(dm, DMI_BOARD_VENDOR, 4);
- dmi_printk(("Board Name: %s\n",
- dmi_string(dm, data[5])));
- dmi_save_ident(dm, DMI_BOARD_NAME, 5);
- dmi_printk(("Board Version: %s\n",
- dmi_string(dm, data[6])));
- dmi_save_ident(dm, DMI_BOARD_VERSION, 6);
- break;
+ switch(dm->type) {
+ case 0:
+ dmi_printk(("BIOS Vendor: %s\n", dmi_string(dm, data[4])));
+ dmi_save_ident(dm, DMI_BIOS_VENDOR, 4);
+ dmi_printk(("BIOS Version: %s\n", dmi_string(dm, data[5])));
+ dmi_save_ident(dm, DMI_BIOS_VERSION, 5);
+ dmi_printk(("BIOS Release: %s\n", dmi_string(dm, data[8])));
+ dmi_save_ident(dm, DMI_BIOS_DATE, 8);
+ break;
+ case 1:
+ dmi_printk(("System Vendor: %s\n", dmi_string(dm, data[4])));
+ dmi_save_ident(dm, DMI_SYS_VENDOR, 4);
+ dmi_printk(("Product Name: %s\n", dmi_string(dm, data[5])));
+ dmi_save_ident(dm, DMI_PRODUCT_NAME, 5);
+ dmi_printk(("Version: %s\n", dmi_string(dm, data[6])));
+ dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6);
+ dmi_printk(("Serial Number: %s\n", dmi_string(dm, data[7])));
+ dmi_save_ident(dm, DMI_PRODUCT_SERIAL, 7);
+ break;
+ case 2:
+ dmi_printk(("Board Vendor: %s\n", dmi_string(dm, data[4])));
+ dmi_save_ident(dm, DMI_BOARD_VENDOR, 4);
+ dmi_printk(("Board Name: %s\n", dmi_string(dm, data[5])));
+ dmi_save_ident(dm, DMI_BOARD_NAME, 5);
+ dmi_printk(("Board Version: %s\n", dmi_string(dm, data[6])));
+ dmi_save_ident(dm, DMI_BOARD_VERSION, 6);
+ break;
}
}
void __init dmi_scan_machine(void)
{
- int err = dmi_iterate(dmi_decode);
- if(err == 0)
- dmi_check_system(dmi_blacklist);
- else
+ if (dmi_iterate(dmi_decode))
printk(KERN_INFO "DMI not present.\n");
}
@@ -470,7 +226,6 @@ fail: d++;
return count;
}
-
EXPORT_SYMBOL(dmi_check_system);
/**
@@ -480,8 +235,8 @@ EXPORT_SYMBOL(dmi_check_system);
* Returns one DMI data value, can be used to perform
* complex DMI data checks.
*/
-char * dmi_get_system_info(int field)
+char *dmi_get_system_info(int field)
{
return dmi_ident[field];
}
-
+EXPORT_SYMBOL(dmi_get_system_info);
diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c
index f732f427b418..385883ea8c19 100644
--- a/arch/i386/kernel/efi.c
+++ b/arch/i386/kernel/efi.c
@@ -30,6 +30,7 @@
#include <linux/ioport.h>
#include <linux/module.h>
#include <linux/efi.h>
+#include <linux/kexec.h>
#include <asm/setup.h>
#include <asm/io.h>
@@ -598,6 +599,9 @@ efi_initialize_iomem_resources(struct resource *code_resource,
if (md->type == EFI_CONVENTIONAL_MEMORY) {
request_resource(res, code_resource);
request_resource(res, data_resource);
+#ifdef CONFIG_KEXEC
+ request_resource(res, &crashk_res);
+#endif
}
}
}
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
index e966fc8c44c4..4477bb107098 100644
--- a/arch/i386/kernel/head.S
+++ b/arch/i386/kernel/head.S
@@ -299,7 +299,6 @@ is386: movl $2,%ecx # set MP
movl %eax,%cr0
call check_x87
- incb ready
lgdt cpu_gdt_descr
lidt idt_descr
ljmp $(__KERNEL_CS),$1f
@@ -316,8 +315,9 @@ is386: movl $2,%ecx # set MP
lldt %ax
cld # gcc2 wants the direction flag cleared at all times
#ifdef CONFIG_SMP
- movb ready, %cl
- cmpb $1,%cl
+ movb ready, %cl
+ movb $1, ready
+ cmpb $0,%cl
je 1f # the first CPU calls start_kernel
# all other CPUs call initialize_secondary
call initialize_secondary
diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c
index 2c4813b47e57..178f4e9bac9d 100644
--- a/arch/i386/kernel/i8259.c
+++ b/arch/i386/kernel/i8259.c
@@ -268,10 +268,22 @@ static int i8259A_suspend(struct sys_device *dev, pm_message_t state)
return 0;
}
+static int i8259A_shutdown(struct sys_device *dev)
+{
+ /* Put the i8259A into a quiescent state that
+ * the kernel initialization code can get it
+ * out of.
+ */
+ outb(0xff, 0x21); /* mask all of 8259A-1 */
+ outb(0xff, 0xA1); /* mask all of 8259A-1 */
+ return 0;
+}
+
static struct sysdev_class i8259_sysdev_class = {
set_kset_name("i8259"),
.suspend = i8259A_suspend,
.resume = i8259A_resume,
+ .shutdown = i8259A_shutdown,
};
static struct sys_device device_i8259A = {
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index 08540bc4ba3e..35eb8e29c485 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -573,12 +573,14 @@ static int balanced_irq(void *unused)
for ( ; ; ) {
set_current_state(TASK_INTERRUPTIBLE);
time_remaining = schedule_timeout(time_remaining);
- try_to_freeze(PF_FREEZE);
+ try_to_freeze();
if (time_after(jiffies,
prev_balance_time+balanced_irq_interval)) {
+ preempt_disable();
do_irq_balance();
prev_balance_time = jiffies;
time_remaining = balanced_irq_interval;
+ preempt_enable();
}
}
return 0;
@@ -630,10 +632,8 @@ static int __init balanced_irq_init(void)
printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
failed:
for (i = 0; i < NR_CPUS; i++) {
- if(irq_cpu_data[i].irq_delta)
- kfree(irq_cpu_data[i].irq_delta);
- if(irq_cpu_data[i].last_irq)
- kfree(irq_cpu_data[i].last_irq);
+ kfree(irq_cpu_data[i].irq_delta);
+ kfree(irq_cpu_data[i].last_irq);
}
return 0;
}
@@ -1634,12 +1634,43 @@ static void __init enable_IO_APIC(void)
*/
void disable_IO_APIC(void)
{
+ int pin;
/*
* Clear the IO-APIC before rebooting:
*/
clear_IO_APIC();
- disconnect_bsp_APIC();
+ /*
+ * If the i82559 is routed through an IOAPIC
+ * Put that IOAPIC in virtual wire mode
+ * so legacy interrups can be delivered.
+ */
+ pin = find_isa_irq_pin(0, mp_ExtINT);
+ if (pin != -1) {
+ struct IO_APIC_route_entry entry;
+ unsigned long flags;
+
+ memset(&entry, 0, sizeof(entry));
+ entry.mask = 0; /* Enabled */
+ entry.trigger = 0; /* Edge */
+ entry.irr = 0;
+ entry.polarity = 0; /* High */
+ entry.delivery_status = 0;
+ entry.dest_mode = 0; /* Physical */
+ entry.delivery_mode = 7; /* ExtInt */
+ entry.vector = 0;
+ entry.dest.physical.physical_dest = 0;
+
+
+ /*
+ * Add it to the IO-APIC irq-routing table:
+ */
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1));
+ io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ }
+ disconnect_bsp_APIC(pin != -1);
}
/*
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
index 73945a3c53c4..ce66dcc26d90 100644
--- a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -15,6 +15,9 @@
#include <linux/seq_file.h>
#include <linux/interrupt.h>
#include <linux/kernel_stat.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp;
EXPORT_PER_CPU_SYMBOL(irq_stat);
@@ -153,6 +156,11 @@ void irq_ctx_init(int cpu)
cpu,hardirq_ctx[cpu],softirq_ctx[cpu]);
}
+void irq_ctx_exit(int cpu)
+{
+ hardirq_ctx[cpu] = NULL;
+}
+
extern asmlinkage void __do_softirq(void);
asmlinkage void do_softirq(void)
@@ -210,9 +218,8 @@ int show_interrupts(struct seq_file *p, void *v)
if (i == 0) {
seq_printf(p, " ");
- for (j=0; j<NR_CPUS; j++)
- if (cpu_online(j))
- seq_printf(p, "CPU%d ",j);
+ for_each_cpu(j)
+ seq_printf(p, "CPU%d ",j);
seq_putc(p, '\n');
}
@@ -225,9 +232,8 @@ int show_interrupts(struct seq_file *p, void *v)
#ifndef CONFIG_SMP
seq_printf(p, "%10u ", kstat_irqs(i));
#else
- for (j = 0; j < NR_CPUS; j++)
- if (cpu_online(j))
- seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+ for_each_cpu(j)
+ seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
#endif
seq_printf(p, " %14s", irq_desc[i].handler->typename);
seq_printf(p, " %s", action->name);
@@ -240,16 +246,14 @@ skip:
spin_unlock_irqrestore(&irq_desc[i].lock, flags);
} else if (i == NR_IRQS) {
seq_printf(p, "NMI: ");
- for (j = 0; j < NR_CPUS; j++)
- if (cpu_online(j))
- seq_printf(p, "%10u ", nmi_count(j));
+ for_each_cpu(j)
+ seq_printf(p, "%10u ", nmi_count(j));
seq_putc(p, '\n');
#ifdef CONFIG_X86_LOCAL_APIC
seq_printf(p, "LOC: ");
- for (j = 0; j < NR_CPUS; j++)
- if (cpu_online(j))
- seq_printf(p, "%10u ",
- per_cpu(irq_stat,j).apic_timer_irqs);
+ for_each_cpu(j)
+ seq_printf(p, "%10u ",
+ per_cpu(irq_stat,j).apic_timer_irqs);
seq_putc(p, '\n');
#endif
seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
@@ -259,3 +263,45 @@ skip:
}
return 0;
}
+
+#ifdef CONFIG_HOTPLUG_CPU
+#include <mach_apic.h>
+
+void fixup_irqs(cpumask_t map)
+{
+ unsigned int irq;
+ static int warned;
+
+ for (irq = 0; irq < NR_IRQS; irq++) {
+ cpumask_t mask;
+ if (irq == 2)
+ continue;
+
+ cpus_and(mask, irq_affinity[irq], map);
+ if (any_online_cpu(mask) == NR_CPUS) {
+ printk("Breaking affinity for irq %i\n", irq);
+ mask = map;
+ }
+ if (irq_desc[irq].handler->set_affinity)
+ irq_desc[irq].handler->set_affinity(irq, mask);
+ else if (irq_desc[irq].action && !(warned++))
+ printk("Cannot set affinity for irq %i\n", irq);
+ }
+
+#if 0
+ barrier();
+ /* Ingo Molnar says: "after the IO-APIC masks have been redirected
+ [note the nop - the interrupt-enable boundary on x86 is two
+ instructions from sti] - to flush out pending hardirqs and
+ IPIs. After this point nothing is supposed to reach this CPU." */
+ __asm__ __volatile__("sti; nop; cli");
+ barrier();
+#else
+ /* That doesn't seem sufficient. Give it 1ms. */
+ local_irq_enable();
+ mdelay(1);
+ local_irq_disable();
+#endif
+}
+#endif
+
diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c
new file mode 100644
index 000000000000..52ed18d8b511
--- /dev/null
+++ b/arch/i386/kernel/machine_kexec.c
@@ -0,0 +1,226 @@
+/*
+ * machine_kexec.c - handle transition of Linux booting another kernel
+ * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include <linux/mm.h>
+#include <linux/kexec.h>
+#include <linux/delay.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+#include <asm/mmu_context.h>
+#include <asm/io.h>
+#include <asm/apic.h>
+#include <asm/cpufeature.h>
+
+static inline unsigned long read_cr3(void)
+{
+ unsigned long cr3;
+ asm volatile("movl %%cr3,%0": "=r"(cr3));
+ return cr3;
+}
+
+#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
+
+#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define L2_ATTR (_PAGE_PRESENT)
+
+#define LEVEL0_SIZE (1UL << 12UL)
+
+#ifndef CONFIG_X86_PAE
+#define LEVEL1_SIZE (1UL << 22UL)
+static u32 pgtable_level1[1024] PAGE_ALIGNED;
+
+static void identity_map_page(unsigned long address)
+{
+ unsigned long level1_index, level2_index;
+ u32 *pgtable_level2;
+
+ /* Find the current page table */
+ pgtable_level2 = __va(read_cr3());
+
+ /* Find the indexes of the physical address to identity map */
+ level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
+ level2_index = address / LEVEL1_SIZE;
+
+ /* Identity map the page table entry */
+ pgtable_level1[level1_index] = address | L0_ATTR;
+ pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
+
+ /* Flush the tlb so the new mapping takes effect.
+ * Global tlb entries are not flushed but that is not an issue.
+ */
+ load_cr3(pgtable_level2);
+}
+
+#else
+#define LEVEL1_SIZE (1UL << 21UL)
+#define LEVEL2_SIZE (1UL << 30UL)
+static u64 pgtable_level1[512] PAGE_ALIGNED;
+static u64 pgtable_level2[512] PAGE_ALIGNED;
+
+static void identity_map_page(unsigned long address)
+{
+ unsigned long level1_index, level2_index, level3_index;
+ u64 *pgtable_level3;
+
+ /* Find the current page table */
+ pgtable_level3 = __va(read_cr3());
+
+ /* Find the indexes of the physical address to identity map */
+ level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
+ level2_index = (address % LEVEL2_SIZE)/LEVEL1_SIZE;
+ level3_index = address / LEVEL2_SIZE;
+
+ /* Identity map the page table entry */
+ pgtable_level1[level1_index] = address | L0_ATTR;
+ pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
+ set_64bit(&pgtable_level3[level3_index],
+ __pa(pgtable_level2) | L2_ATTR);
+
+ /* Flush the tlb so the new mapping takes effect.
+ * Global tlb entries are not flushed but that is not an issue.
+ */
+ load_cr3(pgtable_level3);
+}
+#endif
+
+
+static void set_idt(void *newidt, __u16 limit)
+{
+ unsigned char curidt[6];
+
+ /* ia32 supports unaliged loads & stores */
+ (*(__u16 *)(curidt)) = limit;
+ (*(__u32 *)(curidt +2)) = (unsigned long)(newidt);
+
+ __asm__ __volatile__ (
+ "lidt %0\n"
+ : "=m" (curidt)
+ );
+};
+
+
+static void set_gdt(void *newgdt, __u16 limit)
+{
+ unsigned char curgdt[6];
+
+ /* ia32 supports unaligned loads & stores */
+ (*(__u16 *)(curgdt)) = limit;
+ (*(__u32 *)(curgdt +2)) = (unsigned long)(newgdt);
+
+ __asm__ __volatile__ (
+ "lgdt %0\n"
+ : "=m" (curgdt)
+ );
+};
+
+static void load_segments(void)
+{
+#define __STR(X) #X
+#define STR(X) __STR(X)
+
+ __asm__ __volatile__ (
+ "\tljmp $"STR(__KERNEL_CS)",$1f\n"
+ "\t1:\n"
+ "\tmovl $"STR(__KERNEL_DS)",%eax\n"
+ "\tmovl %eax,%ds\n"
+ "\tmovl %eax,%es\n"
+ "\tmovl %eax,%fs\n"
+ "\tmovl %eax,%gs\n"
+ "\tmovl %eax,%ss\n"
+ );
+#undef STR
+#undef __STR
+}
+
+typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)(
+ unsigned long indirection_page,
+ unsigned long reboot_code_buffer,
+ unsigned long start_address,
+ unsigned int has_pae) ATTRIB_NORET;
+
+const extern unsigned char relocate_new_kernel[];
+extern void relocate_new_kernel_end(void);
+const extern unsigned int relocate_new_kernel_size;
+
+/*
+ * A architecture hook called to validate the
+ * proposed image and prepare the control pages
+ * as needed. The pages for KEXEC_CONTROL_CODE_SIZE
+ * have been allocated, but the segments have yet
+ * been copied into the kernel.
+ *
+ * Do what every setup is needed on image and the
+ * reboot code buffer to allow us to avoid allocations
+ * later.
+ *
+ * Currently nothing.
+ */
+int machine_kexec_prepare(struct kimage *image)
+{
+ return 0;
+}
+
+/*
+ * Undo anything leftover by machine_kexec_prepare
+ * when an image is freed.
+ */
+void machine_kexec_cleanup(struct kimage *image)
+{
+}
+
+/*
+ * Do not allocate memory (or fail in any way) in machine_kexec().
+ * We are past the point of no return, committed to rebooting now.
+ */
+NORET_TYPE void machine_kexec(struct kimage *image)
+{
+ unsigned long page_list;
+ unsigned long reboot_code_buffer;
+
+ relocate_new_kernel_t rnk;
+
+ /* Interrupts aren't acceptable while we reboot */
+ local_irq_disable();
+
+ /* Compute some offsets */
+ reboot_code_buffer = page_to_pfn(image->control_code_page)
+ << PAGE_SHIFT;
+ page_list = image->head;
+
+ /* Set up an identity mapping for the reboot_code_buffer */
+ identity_map_page(reboot_code_buffer);
+
+ /* copy it out */
+ memcpy((void *)reboot_code_buffer, relocate_new_kernel,
+ relocate_new_kernel_size);
+
+ /* The segment registers are funny things, they are
+ * automatically loaded from a table, in memory wherever you
+ * set them to a specific selector, but this table is never
+ * accessed again you set the segment to a different selector.
+ *
+ * The more common model is are caches where the behide
+ * the scenes work is done, but is also dropped at arbitrary
+ * times.
+ *
+ * I take advantage of this here by force loading the
+ * segments, before I zap the gdt with an invalid value.
+ */
+ load_segments();
+ /* The gdt & idt are now invalid.
+ * If you want to load them you must set up your own idt & gdt.
+ */
+ set_gdt(phys_to_virt(0),0);
+ set_idt(phys_to_virt(0),0);
+
+ /* now call it */
+ rnk = (relocate_new_kernel_t) reboot_code_buffer;
+ (*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae);
+}
diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c
index 383a11600d2c..af917f609c7d 100644
--- a/arch/i386/kernel/mpparse.c
+++ b/arch/i386/kernel/mpparse.c
@@ -67,7 +67,6 @@ unsigned long mp_lapic_addr;
/* Processor that is doing the boot up */
unsigned int boot_cpu_physical_apicid = -1U;
-unsigned int boot_cpu_logical_apicid = -1U;
/* Internal processor count */
static unsigned int __initdata num_processors;
@@ -180,7 +179,6 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
Dprintk(" Bootup CPU\n");
boot_cpu_physical_apicid = m->mpc_apicid;
- boot_cpu_logical_apicid = apicid;
}
if (num_processors >= NR_CPUS) {
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index aea2ce1145df..5f8cfa6b7940 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -13,6 +13,7 @@
#include <stdarg.h>
+#include <linux/cpu.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/fs.h>
@@ -55,6 +56,9 @@
#include <linux/irq.h>
#include <linux/err.h>
+#include <asm/tlbflush.h>
+#include <asm/cpu.h>
+
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
static int hlt_counter;
@@ -143,14 +147,42 @@ static void poll_idle (void)
}
}
+#ifdef CONFIG_HOTPLUG_CPU
+#include <asm/nmi.h>
+/* We don't actually take CPU down, just spin without interrupts. */
+static inline void play_dead(void)
+{
+ /* This must be done before dead CPU ack */
+ cpu_exit_clear();
+ wbinvd();
+ mb();
+ /* Ack it */
+ __get_cpu_var(cpu_state) = CPU_DEAD;
+
+ /*
+ * With physical CPU hotplug, we should halt the cpu
+ */
+ local_irq_disable();
+ while (1)
+ __asm__ __volatile__("hlt":::"memory");
+}
+#else
+static inline void play_dead(void)
+{
+ BUG();
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
/*
* The idle thread. There's no useful work to be
* done, so just try to conserve power and have a
* low exit latency (ie sit in a loop waiting for
* somebody to say that they'd like to reschedule)
*/
-void cpu_idle (void)
+void cpu_idle(void)
{
+ int cpu = raw_smp_processor_id();
+
/* endless idle loop with no priority at all */
while (1) {
while (!need_resched()) {
@@ -165,6 +197,9 @@ void cpu_idle (void)
if (!idle)
idle = default_idle;
+ if (cpu_is_offline(cpu))
+ play_dead();
+
__get_cpu_var(irq_stat).idle_timestamp = jiffies;
idle();
}
@@ -223,7 +258,7 @@ static void mwait_idle(void)
}
}
-void __init select_idle_routine(const struct cpuinfo_x86 *c)
+void __devinit select_idle_routine(const struct cpuinfo_x86 *c)
{
if (cpu_has(c, X86_FEATURE_MWAIT)) {
printk("monitor/mwait feature present.\n");
diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c
index db912209a8d3..b3e584849961 100644
--- a/arch/i386/kernel/reboot.c
+++ b/arch/i386/kernel/reboot.c
@@ -26,7 +26,6 @@ static int reboot_mode;
static int reboot_thru_bios;
#ifdef CONFIG_SMP
-int reboot_smp = 0;
static int reboot_cpu = -1;
/* shamelessly grabbed from lib/vsprintf.c for readability */
#define is_digit(c) ((c) >= '0' && (c) <= '9')
@@ -49,7 +48,6 @@ static int __init reboot_setup(char *str)
break;
#ifdef CONFIG_SMP
case 's': /* "smp" reboot by executing reset on BSP or other CPU*/
- reboot_smp = 1;
if (is_digit(*(str+1))) {
reboot_cpu = (int) (*(str+1) - '0');
if (is_digit(*(str+2)))
@@ -88,33 +86,9 @@ static int __init set_bios_reboot(struct dmi_system_id *d)
return 0;
}
-/*
- * Some machines require the "reboot=s" commandline option, this quirk makes that automatic.
- */
-static int __init set_smp_reboot(struct dmi_system_id *d)
-{
-#ifdef CONFIG_SMP
- if (!reboot_smp) {
- reboot_smp = 1;
- printk(KERN_INFO "%s series board detected. Selecting SMP-method for reboots.\n", d->ident);
- }
-#endif
- return 0;
-}
-
-/*
- * Some machines require the "reboot=b,s" commandline option, this quirk makes that automatic.
- */
-static int __init set_smp_bios_reboot(struct dmi_system_id *d)
-{
- set_smp_reboot(d);
- set_bios_reboot(d);
- return 0;
-}
-
static struct dmi_system_id __initdata reboot_dmi_table[] = {
{ /* Handle problems with rebooting on Dell 1300's */
- .callback = set_smp_bios_reboot,
+ .callback = set_bios_reboot,
.ident = "Dell PowerEdge 1300",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
@@ -301,41 +275,32 @@ void machine_real_restart(unsigned char *code, int length)
EXPORT_SYMBOL(machine_real_restart);
#endif
-void machine_restart(char * __unused)
+void machine_shutdown(void)
{
#ifdef CONFIG_SMP
- int cpuid;
-
- cpuid = GET_APIC_ID(apic_read(APIC_ID));
-
- if (reboot_smp) {
-
- /* check to see if reboot_cpu is valid
- if its not, default to the BSP */
- if ((reboot_cpu == -1) ||
- (reboot_cpu > (NR_CPUS -1)) ||
- !physid_isset(cpuid, phys_cpu_present_map))
- reboot_cpu = boot_cpu_physical_apicid;
-
- reboot_smp = 0; /* use this as a flag to only go through this once*/
- /* re-run this function on the other CPUs
- it will fall though this section since we have
- cleared reboot_smp, and do the reboot if it is the
- correct CPU, otherwise it halts. */
- if (reboot_cpu != cpuid)
- smp_call_function((void *)machine_restart , NULL, 1, 0);
+ int reboot_cpu_id;
+
+ /* The boot cpu is always logical cpu 0 */
+ reboot_cpu_id = 0;
+
+ /* See if there has been given a command line override */
+ if ((reboot_cpu_id != -1) && (reboot_cpu < NR_CPUS) &&
+ cpu_isset(reboot_cpu, cpu_online_map)) {
+ reboot_cpu_id = reboot_cpu;
}
- /* if reboot_cpu is still -1, then we want a tradional reboot,
- and if we are not running on the reboot_cpu,, halt */
- if ((reboot_cpu != -1) && (cpuid != reboot_cpu)) {
- for (;;)
- __asm__ __volatile__ ("hlt");
+ /* Make certain the cpu I'm rebooting on is online */
+ if (!cpu_isset(reboot_cpu_id, cpu_online_map)) {
+ reboot_cpu_id = smp_processor_id();
}
- /*
- * Stop all CPUs and turn off local APICs and the IO-APIC, so
- * other OSs see a clean IRQ state.
+
+ /* Make certain I only run on the appropriate processor */
+ set_cpus_allowed(current, cpumask_of_cpu(reboot_cpu_id));
+
+ /* O.K. Now that I'm on the appropriate processor, stop
+ * all of the others, and disable their local APICs.
*/
+
smp_send_stop();
#endif /* CONFIG_SMP */
@@ -344,6 +309,11 @@ void machine_restart(char * __unused)
#ifdef CONFIG_X86_IO_APIC
disable_IO_APIC();
#endif
+}
+
+void machine_restart(char * __unused)
+{
+ machine_shutdown();
if (!reboot_thru_bios) {
if (efi_enabled) {
diff --git a/arch/i386/kernel/relocate_kernel.S b/arch/i386/kernel/relocate_kernel.S
new file mode 100644
index 000000000000..d312616effa1
--- /dev/null
+++ b/arch/i386/kernel/relocate_kernel.S
@@ -0,0 +1,120 @@
+/*
+ * relocate_kernel.S - put the kernel image in place to boot
+ * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include <linux/linkage.h>
+
+ /*
+ * Must be relocatable PIC code callable as a C function, that once
+ * it starts can not use the previous processes stack.
+ */
+ .globl relocate_new_kernel
+relocate_new_kernel:
+ /* read the arguments and say goodbye to the stack */
+ movl 4(%esp), %ebx /* page_list */
+ movl 8(%esp), %ebp /* reboot_code_buffer */
+ movl 12(%esp), %edx /* start address */
+ movl 16(%esp), %ecx /* cpu_has_pae */
+
+ /* zero out flags, and disable interrupts */
+ pushl $0
+ popfl
+
+ /* set a new stack at the bottom of our page... */
+ lea 4096(%ebp), %esp
+
+ /* store the parameters back on the stack */
+ pushl %edx /* store the start address */
+
+ /* Set cr0 to a known state:
+ * 31 0 == Paging disabled
+ * 18 0 == Alignment check disabled
+ * 16 0 == Write protect disabled
+ * 3 0 == No task switch
+ * 2 0 == Don't do FP software emulation.
+ * 0 1 == Proctected mode enabled
+ */
+ movl %cr0, %eax
+ andl $~((1<<31)|(1<<18)|(1<<16)|(1<<3)|(1<<2)), %eax
+ orl $(1<<0), %eax
+ movl %eax, %cr0
+
+ /* clear cr4 if applicable */
+ testl %ecx, %ecx
+ jz 1f
+ /* Set cr4 to a known state:
+ * Setting everything to zero seems safe.
+ */
+ movl %cr4, %eax
+ andl $0, %eax
+ movl %eax, %cr4
+
+ jmp 1f
+1:
+
+ /* Flush the TLB (needed?) */
+ xorl %eax, %eax
+ movl %eax, %cr3
+
+ /* Do the copies */
+ movl %ebx, %ecx
+ jmp 1f
+
+0: /* top, read another word from the indirection page */
+ movl (%ebx), %ecx
+ addl $4, %ebx
+1:
+ testl $0x1, %ecx /* is it a destination page */
+ jz 2f
+ movl %ecx, %edi
+ andl $0xfffff000, %edi
+ jmp 0b
+2:
+ testl $0x2, %ecx /* is it an indirection page */
+ jz 2f
+ movl %ecx, %ebx
+ andl $0xfffff000, %ebx
+ jmp 0b
+2:
+ testl $0x4, %ecx /* is it the done indicator */
+ jz 2f
+ jmp 3f
+2:
+ testl $0x8, %ecx /* is it the source indicator */
+ jz 0b /* Ignore it otherwise */
+ movl %ecx, %esi /* For every source page do a copy */
+ andl $0xfffff000, %esi
+
+ movl $1024, %ecx
+ rep ; movsl
+ jmp 0b
+
+3:
+
+ /* To be certain of avoiding problems with self-modifying code
+ * I need to execute a serializing instruction here.
+ * So I flush the TLB, it's handy, and not processor dependent.
+ */
+ xorl %eax, %eax
+ movl %eax, %cr3
+
+ /* set all of the registers to known values */
+ /* leave %esp alone */
+
+ xorl %eax, %eax
+ xorl %ebx, %ebx
+ xorl %ecx, %ecx
+ xorl %edx, %edx
+ xorl %esi, %esi
+ xorl %edi, %edi
+ xorl %ebp, %ebp
+ ret
+relocate_new_kernel_end:
+
+ .globl relocate_new_kernel_size
+relocate_new_kernel_size:
+ .long relocate_new_kernel_end - relocate_new_kernel
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index 30406fd0b64c..7306353c520e 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -43,7 +43,12 @@
#include <linux/init.h>
#include <linux/edd.h>
#include <linux/nodemask.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
+
#include <video/edid.h>
+
+#include <asm/apic.h>
#include <asm/e820.h>
#include <asm/mpspec.h>
#include <asm/setup.h>
@@ -55,12 +60,15 @@
#include "setup_arch_pre.h"
#include <bios_ebda.h>
+/* Forward Declaration. */
+void __init find_max_pfn(void);
+
/* This value is set up by the early boot code to point to the value
immediately after the boot time page tables. It contains a *physical*
address, and must not be in the .bss segment! */
unsigned long init_pg_tables_end __initdata = ~0UL;
-int disable_pse __initdata = 0;
+int disable_pse __devinitdata = 0;
/*
* Machine setup..
@@ -732,6 +740,15 @@ static void __init parse_cmdline_early (char ** cmdline_p)
if (to != command_line)
to--;
if (!memcmp(from+7, "exactmap", 8)) {
+#ifdef CONFIG_CRASH_DUMP
+ /* If we are doing a crash dump, we
+ * still need to know the real mem
+ * size before original memory map is
+ * reset.
+ */
+ find_max_pfn();
+ saved_max_pfn = max_pfn;
+#endif
from += 8+7;
e820.nr_map = 0;
userdef = 1;
@@ -835,6 +852,44 @@ static void __init parse_cmdline_early (char ** cmdline_p)
#endif /* CONFIG_X86_LOCAL_APIC */
#endif /* CONFIG_ACPI_BOOT */
+#ifdef CONFIG_X86_LOCAL_APIC
+ /* enable local APIC */
+ else if (!memcmp(from, "lapic", 5))
+ lapic_enable();
+
+ /* disable local APIC */
+ else if (!memcmp(from, "nolapic", 6))
+ lapic_disable();
+#endif /* CONFIG_X86_LOCAL_APIC */
+
+#ifdef CONFIG_KEXEC
+ /* crashkernel=size@addr specifies the location to reserve for
+ * a crash kernel. By reserving this memory we guarantee
+ * that linux never set's it up as a DMA target.
+ * Useful for holding code to do something appropriate
+ * after a kernel panic.
+ */
+ else if (!memcmp(from, "crashkernel=", 12)) {
+ unsigned long size, base;
+ size = memparse(from+12, &from);
+ if (*from == '@') {
+ base = memparse(from+1, &from);
+ /* FIXME: Do I want a sanity check
+ * to validate the memory range?
+ */
+ crashk_res.start = base;
+ crashk_res.end = base + size - 1;
+ }
+ }
+#endif
+#ifdef CONFIG_CRASH_DUMP
+ /* elfcorehdr= specifies the location of elf core header
+ * stored by the crashed kernel.
+ */
+ else if (!memcmp(from, "elfcorehdr=", 11))
+ elfcorehdr_addr = memparse(from+11, &from);
+#endif
+
/*
* highmem=size forces highmem to be exactly 'size' bytes.
* This works even on boxes that have no highmem otherwise.
@@ -1113,8 +1168,8 @@ void __init setup_bootmem_allocator(void)
* the (very unlikely) case of us accidentally initializing the
* bootmem allocator with an invalid RAM area.
*/
- reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(min_low_pfn) +
- bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
+ reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) +
+ bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START));
/*
* reserve physical page 0 - it's a special BIOS page on many boxes,
@@ -1170,6 +1225,11 @@ void __init setup_bootmem_allocator(void)
}
}
#endif
+#ifdef CONFIG_KEXEC
+ if (crashk_res.start != crashk_res.end)
+ reserve_bootmem(crashk_res.start,
+ crashk_res.end - crashk_res.start + 1);
+#endif
}
/*
@@ -1223,6 +1283,9 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat
*/
request_resource(res, code_resource);
request_resource(res, data_resource);
+#ifdef CONFIG_KEXEC
+ request_resource(res, &crashk_res);
+#endif
}
}
}
diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c
index b9b8f4e20fad..89ef7adc63a4 100644
--- a/arch/i386/kernel/signal.c
+++ b/arch/i386/kernel/signal.c
@@ -608,10 +608,8 @@ int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset)
if (!user_mode(regs))
return 1;
- if (current->flags & PF_FREEZE) {
- refrigerator(0);
+ if (try_to_freeze())
goto no_signal;
- }
if (!oldset)
oldset = &current->blocked;
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index 68be7d0c7238..cec4bde67161 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -19,6 +19,7 @@
#include <linux/mc146818rtc.h>
#include <linux/cache.h>
#include <linux/interrupt.h>
+#include <linux/cpu.h>
#include <linux/module.h>
#include <asm/mtrr.h>
@@ -164,7 +165,7 @@ void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
unsigned long flags;
local_irq_save(flags);
-
+ WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
/*
* Wait for idle.
*/
@@ -346,21 +347,21 @@ out:
static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
unsigned long va)
{
- cpumask_t tmp;
/*
* A couple of (to be removed) sanity checks:
*
- * - we do not send IPIs to not-yet booted CPUs.
* - current CPU must not be in mask
* - mask must exist :)
*/
BUG_ON(cpus_empty(cpumask));
-
- cpus_and(tmp, cpumask, cpu_online_map);
- BUG_ON(!cpus_equal(cpumask, tmp));
BUG_ON(cpu_isset(smp_processor_id(), cpumask));
BUG_ON(!mm);
+ /* If a CPU which we ran on has gone down, OK. */
+ cpus_and(cpumask, cpumask, cpu_online_map);
+ if (cpus_empty(cpumask))
+ return;
+
/*
* i'm not happy about this global shared spinlock in the
* MM hot path, but we'll see how contended it is.
@@ -476,6 +477,7 @@ void flush_tlb_all(void)
*/
void smp_send_reschedule(int cpu)
{
+ WARN_ON(cpu_is_offline(cpu));
send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
}
@@ -493,6 +495,16 @@ struct call_data_struct {
int wait;
};
+void lock_ipi_call_lock(void)
+{
+ spin_lock_irq(&call_lock);
+}
+
+void unlock_ipi_call_lock(void)
+{
+ spin_unlock_irq(&call_lock);
+}
+
static struct call_data_struct * call_data;
/*
@@ -516,10 +528,15 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
*/
{
struct call_data_struct data;
- int cpus = num_online_cpus()-1;
+ int cpus;
- if (!cpus)
+ /* Holding any lock stops cpus from going down. */
+ spin_lock(&call_lock);
+ cpus = num_online_cpus() - 1;
+ if (!cpus) {
+ spin_unlock(&call_lock);
return 0;
+ }
/* Can deadlock when called with interrupts disabled */
WARN_ON(irqs_disabled());
@@ -531,7 +548,6 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
if (wait)
atomic_set(&data.finished, 0);
- spin_lock(&call_lock);
call_data = &data;
mb();
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index c20d96d5c15c..d66bf489a2e9 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -44,6 +44,9 @@
#include <linux/smp_lock.h>
#include <linux/irq.h>
#include <linux/bootmem.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/percpu.h>
#include <linux/delay.h>
#include <linux/mc146818rtc.h>
@@ -56,18 +59,28 @@
#include <smpboot_hooks.h>
/* Set if we find a B stepping CPU */
-static int __initdata smp_b_stepping;
+static int __devinitdata smp_b_stepping;
/* Number of siblings per CPU package */
int smp_num_siblings = 1;
#ifdef CONFIG_X86_HT
EXPORT_SYMBOL(smp_num_siblings);
#endif
-int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
+
+/* Package ID of each logical CPU */
+int phys_proc_id[NR_CPUS] = {[0 ... NR_CPUS-1] = BAD_APICID};
EXPORT_SYMBOL(phys_proc_id);
-int cpu_core_id[NR_CPUS]; /* Core ID of each logical CPU */
+
+/* Core ID of each logical CPU */
+int cpu_core_id[NR_CPUS] = {[0 ... NR_CPUS-1] = BAD_APICID};
EXPORT_SYMBOL(cpu_core_id);
+cpumask_t cpu_sibling_map[NR_CPUS];
+EXPORT_SYMBOL(cpu_sibling_map);
+
+cpumask_t cpu_core_map[NR_CPUS];
+EXPORT_SYMBOL(cpu_core_map);
+
/* bitmap of online cpus */
cpumask_t cpu_online_map;
EXPORT_SYMBOL(cpu_online_map);
@@ -77,6 +90,12 @@ cpumask_t cpu_callout_map;
EXPORT_SYMBOL(cpu_callout_map);
static cpumask_t smp_commenced_mask;
+/* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there
+ * is no way to resync one AP against BP. TBD: for prescott and above, we
+ * should use IA64's algorithm
+ */
+static int __devinitdata tsc_sync_disabled;
+
/* Per CPU bogomips and other parameters */
struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
EXPORT_SYMBOL(cpu_data);
@@ -96,13 +115,16 @@ static int trampoline_exec;
static void map_cpu_to_logical_apicid(void);
+/* State of each CPU. */
+DEFINE_PER_CPU(int, cpu_state) = { 0 };
+
/*
* Currently trivial. Write the real->protected mode
* bootstrap into the page concerned. The caller
* has made sure it's suitably aligned.
*/
-static unsigned long __init setup_trampoline(void)
+static unsigned long __devinit setup_trampoline(void)
{
memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
return virt_to_phys(trampoline_base);
@@ -132,7 +154,7 @@ void __init smp_alloc_memory(void)
* a given CPU
*/
-static void __init smp_store_cpu_info(int id)
+static void __devinit smp_store_cpu_info(int id)
{
struct cpuinfo_x86 *c = cpu_data + id;
@@ -326,7 +348,7 @@ extern void calibrate_delay(void);
static atomic_t init_deasserted;
-static void __init smp_callin(void)
+static void __devinit smp_callin(void)
{
int cpuid, phys_id;
unsigned long timeout;
@@ -411,16 +433,48 @@ static void __init smp_callin(void)
/*
* Synchronize the TSC with the BP
*/
- if (cpu_has_tsc && cpu_khz)
+ if (cpu_has_tsc && cpu_khz && !tsc_sync_disabled)
synchronize_tsc_ap();
}
static int cpucount;
+static inline void
+set_cpu_sibling_map(int cpu)
+{
+ int i;
+
+ if (smp_num_siblings > 1) {
+ for (i = 0; i < NR_CPUS; i++) {
+ if (!cpu_isset(i, cpu_callout_map))
+ continue;
+ if (cpu_core_id[cpu] == cpu_core_id[i]) {
+ cpu_set(i, cpu_sibling_map[cpu]);
+ cpu_set(cpu, cpu_sibling_map[i]);
+ }
+ }
+ } else {
+ cpu_set(cpu, cpu_sibling_map[cpu]);
+ }
+
+ if (current_cpu_data.x86_num_cores > 1) {
+ for (i = 0; i < NR_CPUS; i++) {
+ if (!cpu_isset(i, cpu_callout_map))
+ continue;
+ if (phys_proc_id[cpu] == phys_proc_id[i]) {
+ cpu_set(i, cpu_core_map[cpu]);
+ cpu_set(cpu, cpu_core_map[i]);
+ }
+ }
+ } else {
+ cpu_core_map[cpu] = cpu_sibling_map[cpu];
+ }
+}
+
/*
* Activate a secondary processor.
*/
-static void __init start_secondary(void *unused)
+static void __devinit start_secondary(void *unused)
{
/*
* Dont put anything before smp_callin(), SMP
@@ -443,7 +497,23 @@ static void __init start_secondary(void *unused)
* the local TLBs too.
*/
local_flush_tlb();
+
+ /* This must be done before setting cpu_online_map */
+ set_cpu_sibling_map(raw_smp_processor_id());
+ wmb();
+
+ /*
+ * We need to hold call_lock, so there is no inconsistency
+ * between the time smp_call_function() determines number of
+ * IPI receipients, and the time when the determination is made
+ * for which cpus receive the IPI. Holding this
+ * lock helps us to not include this cpu in a currently in progress
+ * smp_call_function().
+ */
+ lock_ipi_call_lock();
cpu_set(smp_processor_id(), cpu_online_map);
+ unlock_ipi_call_lock();
+ per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
/* We can take interrupts now: we're officially "up". */
local_irq_enable();
@@ -458,7 +528,7 @@ static void __init start_secondary(void *unused)
* from the task structure
* This function must not return.
*/
-void __init initialize_secondary(void)
+void __devinit initialize_secondary(void)
{
/*
* We don't actually need to load the full TSS,
@@ -572,7 +642,7 @@ static inline void __inquire_remote_apic(int apicid)
* INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
* won't ... remember to clear down the APIC, etc later.
*/
-static int __init
+static int __devinit
wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
{
unsigned long send_status = 0, accept_status = 0;
@@ -618,7 +688,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
#endif /* WAKE_SECONDARY_VIA_NMI */
#ifdef WAKE_SECONDARY_VIA_INIT
-static int __init
+static int __devinit
wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
{
unsigned long send_status = 0, accept_status = 0;
@@ -753,8 +823,43 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
#endif /* WAKE_SECONDARY_VIA_INIT */
extern cpumask_t cpu_initialized;
+static inline int alloc_cpu_id(void)
+{
+ cpumask_t tmp_map;
+ int cpu;
+ cpus_complement(tmp_map, cpu_present_map);
+ cpu = first_cpu(tmp_map);
+ if (cpu >= NR_CPUS)
+ return -ENODEV;
+ return cpu;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static struct task_struct * __devinitdata cpu_idle_tasks[NR_CPUS];
+static inline struct task_struct * alloc_idle_task(int cpu)
+{
+ struct task_struct *idle;
+
+ if ((idle = cpu_idle_tasks[cpu]) != NULL) {
+ /* initialize thread_struct. we really want to avoid destroy
+ * idle tread
+ */
+ idle->thread.esp = (unsigned long)(((struct pt_regs *)
+ (THREAD_SIZE + (unsigned long) idle->thread_info)) - 1);
+ init_idle(idle, cpu);
+ return idle;
+ }
+ idle = fork_idle(cpu);
+
+ if (!IS_ERR(idle))
+ cpu_idle_tasks[cpu] = idle;
+ return idle;
+}
+#else
+#define alloc_idle_task(cpu) fork_idle(cpu)
+#endif
-static int __init do_boot_cpu(int apicid)
+static int __devinit do_boot_cpu(int apicid, int cpu)
/*
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
* (ie clustered apic addressing mode), this is a LOGICAL apic ID.
@@ -763,16 +868,17 @@ static int __init do_boot_cpu(int apicid)
{
struct task_struct *idle;
unsigned long boot_error;
- int timeout, cpu;
+ int timeout;
unsigned long start_eip;
unsigned short nmi_high = 0, nmi_low = 0;
- cpu = ++cpucount;
+ ++cpucount;
+
/*
* We can't use kernel_thread since we must avoid to
* reschedule the child.
*/
- idle = fork_idle(cpu);
+ idle = alloc_idle_task(cpu);
if (IS_ERR(idle))
panic("failed fork for CPU %d", cpu);
idle->thread.eip = (unsigned long) start_secondary;
@@ -839,13 +945,16 @@ static int __init do_boot_cpu(int apicid)
inquire_remote_apic(apicid);
}
}
- x86_cpu_to_apicid[cpu] = apicid;
+
if (boot_error) {
/* Try to put things back the way they were before ... */
unmap_cpu_to_logical_apicid(cpu);
cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
cpucount--;
+ } else {
+ x86_cpu_to_apicid[cpu] = apicid;
+ cpu_set(cpu, cpu_present_map);
}
/* mark "stuck" area as not stuck */
@@ -854,6 +963,75 @@ static int __init do_boot_cpu(int apicid)
return boot_error;
}
+#ifdef CONFIG_HOTPLUG_CPU
+void cpu_exit_clear(void)
+{
+ int cpu = raw_smp_processor_id();
+
+ idle_task_exit();
+
+ cpucount --;
+ cpu_uninit();
+ irq_ctx_exit(cpu);
+
+ cpu_clear(cpu, cpu_callout_map);
+ cpu_clear(cpu, cpu_callin_map);
+ cpu_clear(cpu, cpu_present_map);
+
+ cpu_clear(cpu, smp_commenced_mask);
+ unmap_cpu_to_logical_apicid(cpu);
+}
+
+struct warm_boot_cpu_info {
+ struct completion *complete;
+ int apicid;
+ int cpu;
+};
+
+static void __devinit do_warm_boot_cpu(void *p)
+{
+ struct warm_boot_cpu_info *info = p;
+ do_boot_cpu(info->apicid, info->cpu);
+ complete(info->complete);
+}
+
+int __devinit smp_prepare_cpu(int cpu)
+{
+ DECLARE_COMPLETION(done);
+ struct warm_boot_cpu_info info;
+ struct work_struct task;
+ int apicid, ret;
+
+ lock_cpu_hotplug();
+ apicid = x86_cpu_to_apicid[cpu];
+ if (apicid == BAD_APICID) {
+ ret = -ENODEV;
+ goto exit;
+ }
+
+ info.complete = &done;
+ info.apicid = apicid;
+ info.cpu = cpu;
+ INIT_WORK(&task, do_warm_boot_cpu, &info);
+
+ tsc_sync_disabled = 1;
+
+ /* init low mem mapping */
+ memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
+ sizeof(swapper_pg_dir[0]) * KERNEL_PGD_PTRS);
+ flush_tlb_all();
+ schedule_work(&task);
+ wait_for_completion(&done);
+
+ tsc_sync_disabled = 0;
+ zap_low_mappings();
+ ret = 0;
+exit:
+ unlock_cpu_hotplug();
+ return ret;
+}
+#endif
+
static void smp_tune_scheduling (void)
{
unsigned long cachesize; /* kB */
@@ -895,13 +1073,6 @@ void *xquad_portio;
EXPORT_SYMBOL(xquad_portio);
#endif
-cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
-#ifdef CONFIG_X86_HT
-EXPORT_SYMBOL(cpu_sibling_map);
-#endif
-cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
-EXPORT_SYMBOL(cpu_core_map);
-
static void __init smp_boot_cpus(unsigned int max_cpus)
{
int apicid, cpu, bit, kicked;
@@ -1013,7 +1184,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
if (max_cpus <= cpucount+1)
continue;
- if (do_boot_cpu(apicid))
+ if (((cpu = alloc_cpu_id()) <= 0) || do_boot_cpu(apicid, cpu))
printk("CPU #%d not responding - cannot use it.\n",
apicid);
else
@@ -1065,44 +1236,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
cpus_clear(cpu_core_map[cpu]);
}
- for (cpu = 0; cpu < NR_CPUS; cpu++) {
- struct cpuinfo_x86 *c = cpu_data + cpu;
- int siblings = 0;
- int i;
- if (!cpu_isset(cpu, cpu_callout_map))
- continue;
-
- if (smp_num_siblings > 1) {
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_isset(i, cpu_callout_map))
- continue;
- if (cpu_core_id[cpu] == cpu_core_id[i]) {
- siblings++;
- cpu_set(i, cpu_sibling_map[cpu]);
- }
- }
- } else {
- siblings++;
- cpu_set(cpu, cpu_sibling_map[cpu]);
- }
-
- if (siblings != smp_num_siblings) {
- printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
- smp_num_siblings = siblings;
- }
-
- if (c->x86_num_cores > 1) {
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_isset(i, cpu_callout_map))
- continue;
- if (phys_proc_id[cpu] == phys_proc_id[i]) {
- cpu_set(i, cpu_core_map[cpu]);
- }
- }
- } else {
- cpu_core_map[cpu] = cpu_sibling_map[cpu];
- }
- }
+ cpu_set(0, cpu_sibling_map[0]);
+ cpu_set(0, cpu_core_map[0]);
smpboot_setup_io_apic();
@@ -1119,6 +1254,9 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
void __init smp_prepare_cpus(unsigned int max_cpus)
{
+ smp_commenced_mask = cpumask_of_cpu(0);
+ cpu_callin_map = cpumask_of_cpu(0);
+ mb();
smp_boot_cpus(max_cpus);
}
@@ -1126,23 +1264,98 @@ void __devinit smp_prepare_boot_cpu(void)
{
cpu_set(smp_processor_id(), cpu_online_map);
cpu_set(smp_processor_id(), cpu_callout_map);
+ cpu_set(smp_processor_id(), cpu_present_map);
+ per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
}
-int __devinit __cpu_up(unsigned int cpu)
+#ifdef CONFIG_HOTPLUG_CPU
+static void
+remove_siblinginfo(int cpu)
{
- /* This only works at boot for x86. See "rewrite" above. */
- if (cpu_isset(cpu, smp_commenced_mask)) {
- local_irq_enable();
- return -ENOSYS;
+ int sibling;
+
+ for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
+ cpu_clear(cpu, cpu_sibling_map[sibling]);
+ for_each_cpu_mask(sibling, cpu_core_map[cpu])
+ cpu_clear(cpu, cpu_core_map[sibling]);
+ cpus_clear(cpu_sibling_map[cpu]);
+ cpus_clear(cpu_core_map[cpu]);
+ phys_proc_id[cpu] = BAD_APICID;
+ cpu_core_id[cpu] = BAD_APICID;
+}
+
+int __cpu_disable(void)
+{
+ cpumask_t map = cpu_online_map;
+ int cpu = smp_processor_id();
+
+ /*
+ * Perhaps use cpufreq to drop frequency, but that could go
+ * into generic code.
+ *
+ * We won't take down the boot processor on i386 due to some
+ * interrupts only being able to be serviced by the BSP.
+ * Especially so if we're not using an IOAPIC -zwane
+ */
+ if (cpu == 0)
+ return -EBUSY;
+
+ /* We enable the timer again on the exit path of the death loop */
+ disable_APIC_timer();
+ /* Allow any queued timer interrupts to get serviced */
+ local_irq_enable();
+ mdelay(1);
+ local_irq_disable();
+
+ remove_siblinginfo(cpu);
+
+ cpu_clear(cpu, map);
+ fixup_irqs(map);
+ /* It's now safe to remove this processor from the online map */
+ cpu_clear(cpu, cpu_online_map);
+ return 0;
+}
+
+void __cpu_die(unsigned int cpu)
+{
+ /* We don't do anything here: idle task is faking death itself. */
+ unsigned int i;
+
+ for (i = 0; i < 10; i++) {
+ /* They ack this in play_dead by setting CPU_DEAD */
+ if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
+ printk ("CPU %d is now offline\n", cpu);
+ return;
+ }
+ current->state = TASK_UNINTERRUPTIBLE;
+ schedule_timeout(HZ/10);
}
+ printk(KERN_ERR "CPU %u didn't die...\n", cpu);
+}
+#else /* ... !CONFIG_HOTPLUG_CPU */
+int __cpu_disable(void)
+{
+ return -ENOSYS;
+}
+
+void __cpu_die(unsigned int cpu)
+{
+ /* We said "no" in __cpu_disable */
+ BUG();
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+int __devinit __cpu_up(unsigned int cpu)
+{
/* In case one didn't come up */
if (!cpu_isset(cpu, cpu_callin_map)) {
+ printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu);
local_irq_enable();
return -EIO;
}
local_irq_enable();
+ per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
/* Unleash the CPU! */
cpu_set(cpu, smp_commenced_mask);
while (!cpu_isset(cpu, cpu_online_map))
@@ -1156,10 +1369,12 @@ void __init smp_cpus_done(unsigned int max_cpus)
setup_ioapic_dest();
#endif
zap_low_mappings();
+#ifndef CONFIG_HOTPLUG_CPU
/*
* Disable executability of the SMP trampoline:
*/
set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
+#endif
}
void __init smp_intr_init(void)
diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S
index d408afaf6495..442a6e937b19 100644
--- a/arch/i386/kernel/syscall_table.S
+++ b/arch/i386/kernel/syscall_table.S
@@ -283,7 +283,7 @@ ENTRY(sys_call_table)
.long sys_mq_timedreceive /* 280 */
.long sys_mq_notify
.long sys_mq_getsetattr
- .long sys_ni_syscall /* reserved for kexec */
+ .long sys_kexec_load
.long sys_waitid
.long sys_ni_syscall /* 285 */ /* available */
.long sys_add_key
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c
index 960d8bd137d0..0bada1870bdf 100644
--- a/arch/i386/kernel/sysenter.c
+++ b/arch/i386/kernel/sysenter.c
@@ -21,11 +21,16 @@
extern asmlinkage void sysenter_entry(void);
-void enable_sep_cpu(void *info)
+void enable_sep_cpu(void)
{
int cpu = get_cpu();
struct tss_struct *tss = &per_cpu(init_tss, cpu);
+ if (!boot_cpu_has(X86_FEATURE_SEP)) {
+ put_cpu();
+ return;
+ }
+
tss->ss1 = __KERNEL_CS;
tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss;
wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
@@ -41,7 +46,7 @@ void enable_sep_cpu(void *info)
extern const char vsyscall_int80_start, vsyscall_int80_end;
extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
-static int __init sysenter_setup(void)
+int __init sysenter_setup(void)
{
void *page = (void *)get_zeroed_page(GFP_ATOMIC);
@@ -58,8 +63,5 @@ static int __init sysenter_setup(void)
&vsyscall_sysenter_start,
&vsyscall_sysenter_end - &vsyscall_sysenter_start);
- on_each_cpu(enable_sep_cpu, NULL, 1, 1);
return 0;
}
-
-__initcall(sysenter_setup);
diff --git a/arch/i386/kernel/time_hpet.c b/arch/i386/kernel/time_hpet.c
index 10a0cbb88e75..658c0629ba6a 100644
--- a/arch/i386/kernel/time_hpet.c
+++ b/arch/i386/kernel/time_hpet.c
@@ -50,7 +50,7 @@ static void hpet_writel(unsigned long d, unsigned long a)
* comparator value and continue. Next tick can be caught by checking
* for a change in the comparator value. Used in apic.c.
*/
-static void __init wait_hpet_tick(void)
+static void __devinit wait_hpet_tick(void)
{
unsigned int start_cmp_val, end_cmp_val;
diff --git a/arch/i386/kernel/timers/common.c b/arch/i386/kernel/timers/common.c
index 37353bd31803..8163fe0cf1f0 100644
--- a/arch/i386/kernel/timers/common.c
+++ b/arch/i386/kernel/timers/common.c
@@ -86,7 +86,7 @@ bad_ctc:
#define CALIBRATE_CNT_HPET (5 * hpet_tick)
#define CALIBRATE_TIME_HPET (5 * KERNEL_TICK_USEC)
-unsigned long __init calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr)
+unsigned long __devinit calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr)
{
unsigned long tsc_startlow, tsc_starthigh;
unsigned long tsc_endlow, tsc_endhigh;
diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
index 54c36b182021..f46e625bab67 100644
--- a/arch/i386/kernel/timers/timer_tsc.c
+++ b/arch/i386/kernel/timers/timer_tsc.c
@@ -33,7 +33,7 @@ static struct timer_opts timer_tsc;
static inline void cpufreq_delayed_get(void);
-int tsc_disable __initdata = 0;
+int tsc_disable __devinitdata = 0;
extern spinlock_t i8253_lock;
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index e4d4e2162c7a..a61f33d06ea3 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -27,6 +27,7 @@
#include <linux/ptrace.h>
#include <linux/utsname.h>
#include <linux/kprobes.h>
+#include <linux/kexec.h>
#ifdef CONFIG_EISA
#include <linux/ioport.h>
@@ -234,22 +235,22 @@ void show_registers(struct pt_regs *regs)
* time of the fault..
*/
if (in_kernel) {
- u8 *eip;
+ u8 __user *eip;
printk("\nStack: ");
show_stack(NULL, (unsigned long*)esp);
printk("Code: ");
- eip = (u8 *)regs->eip - 43;
+ eip = (u8 __user *)regs->eip - 43;
for (i = 0; i < 64; i++, eip++) {
unsigned char c;
- if (eip < (u8 *)PAGE_OFFSET || __get_user(c, eip)) {
+ if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
printk(" Bad EIP value.");
break;
}
- if (eip == (u8 *)regs->eip)
+ if (eip == (u8 __user *)regs->eip)
printk("<%02x> ", c);
else
printk("%02x ", c);
@@ -273,13 +274,13 @@ static void handle_BUG(struct pt_regs *regs)
if (eip < PAGE_OFFSET)
goto no_bug;
- if (__get_user(ud2, (unsigned short *)eip))
+ if (__get_user(ud2, (unsigned short __user *)eip))
goto no_bug;
if (ud2 != 0x0b0f)
goto no_bug;
- if (__get_user(line, (unsigned short *)(eip + 2)))
+ if (__get_user(line, (unsigned short __user *)(eip + 2)))
goto bug;
- if (__get_user(file, (char **)(eip + 4)) ||
+ if (__get_user(file, (char * __user *)(eip + 4)) ||
(unsigned long)file < PAGE_OFFSET || __get_user(c, file))
file = "<bad filename>";
@@ -294,6 +295,9 @@ bug:
printk("Kernel BUG\n");
}
+/* This is gone through when something in the kernel
+ * has done something bad and is about to be terminated.
+*/
void die(const char * str, struct pt_regs * regs, long err)
{
static struct {
@@ -341,6 +345,10 @@ void die(const char * str, struct pt_regs * regs, long err)
bust_spinlocks(0);
die.lock_owner = -1;
spin_unlock_irq(&die.lock);
+
+ if (kexec_should_crash(current))
+ crash_kexec(regs);
+
if (in_interrupt())
panic("Fatal exception in interrupt");
@@ -361,6 +369,10 @@ static inline void die_if_kernel(const char * str, struct pt_regs * regs, long e
static void do_trap(int trapnr, int signr, char *str, int vm86,
struct pt_regs * regs, long error_code, siginfo_t *info)
{
+ struct task_struct *tsk = current;
+ tsk->thread.error_code = error_code;
+ tsk->thread.trap_no = trapnr;
+
if (regs->eflags & VM_MASK) {
if (vm86)
goto vm86_trap;
@@ -371,9 +383,6 @@ static void do_trap(int trapnr, int signr, char *str, int vm86,
goto kernel_trap;
trap_signal: {
- struct task_struct *tsk = current;
- tsk->thread.error_code = error_code;
- tsk->thread.trap_no = trapnr;
if (info)
force_sig_info(signr, info, tsk);
else
@@ -486,6 +495,9 @@ fastcall void do_general_protection(struct pt_regs * regs, long error_code)
}
put_cpu();
+ current->thread.error_code = error_code;
+ current->thread.trap_no = 13;
+
if (regs->eflags & VM_MASK)
goto gp_in_vm86;
@@ -570,6 +582,15 @@ void die_nmi (struct pt_regs *regs, const char *msg)
console_silent();
spin_unlock(&nmi_print_lock);
bust_spinlocks(0);
+
+ /* If we are in kernel we are probably nested up pretty bad
+ * and might aswell get out now while we still can.
+ */
+ if (!user_mode(regs)) {
+ current->thread.trap_no = 2;
+ crash_kexec(regs);
+ }
+
do_exit(SIGSEGV);
}
@@ -625,6 +646,14 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code)
nmi_enter();
cpu = smp_processor_id();
+
+#ifdef CONFIG_HOTPLUG_CPU
+ if (!cpu_online(cpu)) {
+ nmi_exit();
+ return;
+ }
+#endif
+
++nmi_count(cpu);
if (!nmi_callback(regs, cpu))
@@ -872,9 +901,9 @@ fastcall void do_simd_coprocessor_error(struct pt_regs * regs,
error_code);
return;
}
- die_if_kernel("cache flush denied", regs, error_code);
current->thread.trap_no = 19;
current->thread.error_code = error_code;
+ die_if_kernel("cache flush denied", regs, error_code);
force_sig(SIGSEGV, current);
}
}
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
index e0512cc8bea7..7e01a528a83a 100644
--- a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -2,20 +2,23 @@
* Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>;
*/
+#define LOAD_OFFSET __PAGE_OFFSET
+
#include <asm-generic/vmlinux.lds.h>
#include <asm/thread_info.h>
#include <asm/page.h>
OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
OUTPUT_ARCH(i386)
-ENTRY(startup_32)
+ENTRY(phys_startup_32)
jiffies = jiffies_64;
SECTIONS
{
- . = __PAGE_OFFSET + 0x100000;
+ . = __KERNEL_START;
+ phys_startup_32 = startup_32 - LOAD_OFFSET;
/* read-only */
_text = .; /* Text and read-only data */
- .text : {
+ .text : AT(ADDR(.text) - LOAD_OFFSET) {
*(.text)
SCHED_TEXT
LOCK_TEXT
@@ -27,49 +30,55 @@ SECTIONS
. = ALIGN(16); /* Exception table */
__start___ex_table = .;
- __ex_table : { *(__ex_table) }
+ __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) }
__stop___ex_table = .;
RODATA
/* writeable */
- .data : { /* Data */
+ .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */
*(.data)
CONSTRUCTORS
}
. = ALIGN(4096);
__nosave_begin = .;
- .data_nosave : { *(.data.nosave) }
+ .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) }
. = ALIGN(4096);
__nosave_end = .;
. = ALIGN(4096);
- .data.page_aligned : { *(.data.idt) }
+ .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
+ *(.data.idt)
+ }
. = ALIGN(32);
- .data.cacheline_aligned : { *(.data.cacheline_aligned) }
+ .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
+ *(.data.cacheline_aligned)
+ }
_edata = .; /* End of data section */
. = ALIGN(THREAD_SIZE); /* init_task */
- .data.init_task : { *(.data.init_task) }
+ .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
+ *(.data.init_task)
+ }
/* will be freed after init */
. = ALIGN(4096); /* Init code and data */
__init_begin = .;
- .init.text : {
+ .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
_sinittext = .;
*(.init.text)
_einittext = .;
}
- .init.data : { *(.init.data) }
+ .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { *(.init.data) }
. = ALIGN(16);
__setup_start = .;
- .init.setup : { *(.init.setup) }
+ .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { *(.init.setup) }
__setup_end = .;
__initcall_start = .;
- .initcall.init : {
+ .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
*(.initcall1.init)
*(.initcall2.init)
*(.initcall3.init)
@@ -80,33 +89,41 @@ SECTIONS
}
__initcall_end = .;
__con_initcall_start = .;
- .con_initcall.init : { *(.con_initcall.init) }
+ .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
+ *(.con_initcall.init)
+ }
__con_initcall_end = .;
SECURITY_INIT
. = ALIGN(4);
__alt_instructions = .;
- .altinstructions : { *(.altinstructions) }
+ .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
+ *(.altinstructions)
+ }
__alt_instructions_end = .;
- .altinstr_replacement : { *(.altinstr_replacement) }
+ .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
+ *(.altinstr_replacement)
+ }
/* .exit.text is discard at runtime, not link time, to deal with references
from .altinstructions and .eh_frame */
- .exit.text : { *(.exit.text) }
- .exit.data : { *(.exit.data) }
+ .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) }
+ .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) }
. = ALIGN(4096);
__initramfs_start = .;
- .init.ramfs : { *(.init.ramfs) }
+ .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) }
__initramfs_end = .;
. = ALIGN(32);
__per_cpu_start = .;
- .data.percpu : { *(.data.percpu) }
+ .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) }
__per_cpu_end = .;
. = ALIGN(4096);
__init_end = .;
/* freed after init ends here */
__bss_start = .; /* BSS */
- .bss : {
+ .bss.page_aligned : AT(ADDR(.bss.page_aligned) - LOAD_OFFSET) {
*(.bss.page_aligned)
+ }
+ .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
*(.bss)
}
. = ALIGN(4);
OpenPOWER on IntegriCloud