diff options
author | Steve French <sfrench@us.ibm.com> | 2008-04-28 04:01:34 +0000 |
---|---|---|
committer | Steve French <sfrench@us.ibm.com> | 2008-04-28 04:01:34 +0000 |
commit | 1dbbb6077426f8ce63d6a59c5ac6613e1689cbde (patch) | |
tree | 6141d4d7a8eb7c557705bdfa764137d4fd2e4924 /arch/x86/kernel | |
parent | d09e860cf07e7c9ee12920a09f5080e30a12a23a (diff) | |
parent | 064922a805ec7aadfafdd27aa6b4908d737c3c1d (diff) | |
download | blackbird-op-linux-1dbbb6077426f8ce63d6a59c5ac6613e1689cbde.tar.gz blackbird-op-linux-1dbbb6077426f8ce63d6a59c5ac6613e1689cbde.zip |
Merge branch 'master' of /pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'arch/x86/kernel')
39 files changed, 919 insertions, 508 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 90e092d0af0c..fa19c3819540 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -80,6 +80,8 @@ obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o +obj-$(CONFIG_KVM_GUEST) += kvm.o +obj-$(CONFIG_KVM_CLOCK) += kvmclock.o obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o ifdef CONFIG_INPUT_PCSPKR diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 057ccf1d5ad4..977ed5cdeaa3 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -697,10 +697,6 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table) #define HPET_RESOURCE_NAME_SIZE 9 hpet_res = alloc_bootmem(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE); - if (!hpet_res) - return 0; - - memset(hpet_res, 0, sizeof(*hpet_res)); hpet_res->name = (void *)&hpet_res[1]; hpet_res->flags = IORESOURCE_MEM; snprintf((char *)hpet_res->name, HPET_RESOURCE_NAME_SIZE, "HPET %u", diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 8317401170b8..4b99b1bdeb6c 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -451,7 +451,8 @@ void __init setup_boot_APIC_clock(void) } /* Calculate the scaled math multiplication factor */ - lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, 32); + lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, + lapic_clockevent.shift); lapic_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); lapic_clockevent.min_delta_ns = diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index bf83157337e4..5910020c3f24 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -360,7 +360,8 @@ static void __init calibrate_APIC_clock(void) result / 1000 / 1000, result / 1000 % 1000); /* Calculate the scaled math multiplication factor */ - lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, 32); + lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, + lapic_clockevent.shift); lapic_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); lapic_clockevent.min_delta_ns = diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index f0030a0999c7..e4ea362e8480 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -904,6 +904,7 @@ recalc: original_pm_idle(); else default_idle(); + local_irq_disable(); jiffies_since_last_check = jiffies - last_jiffies; if (jiffies_since_last_check > idle_period) goto recalc; @@ -911,6 +912,8 @@ recalc: if (apm_idle_done) apm_do_busy(); + + local_irq_enable(); } /** diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index ee7c45235e54..a0c6f8190887 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -11,7 +11,6 @@ obj-$(CONFIG_X86_32) += cyrix.o obj-$(CONFIG_X86_32) += centaur.o obj-$(CONFIG_X86_32) += transmeta.o obj-$(CONFIG_X86_32) += intel.o -obj-$(CONFIG_X86_32) += nexgen.o obj-$(CONFIG_X86_32) += umc.o obj-$(CONFIG_X86_MCE) += mcheck/ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 0173065dc3b7..245866828294 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -343,10 +343,4 @@ static struct cpu_dev amd_cpu_dev __cpuinitdata = { .c_size_cache = amd_size_cache, }; -int __init amd_init_cpu(void) -{ - cpu_devs[X86_VENDOR_AMD] = &amd_cpu_dev; - return 0; -} - cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev); diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 9a699ed03598..e07e8c068ae0 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -49,7 +49,7 @@ static int banks; static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL }; static unsigned long notify_user; static int rip_msr; -static int mce_bootlog = 1; +static int mce_bootlog = -1; static atomic_t mce_events; static char trigger[128]; @@ -471,13 +471,15 @@ static void mce_init(void *dummy) static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) { /* This should be disabled by the BIOS, but isn't always */ - if (c->x86_vendor == X86_VENDOR_AMD && c->x86 == 15) { - /* disable GART TBL walk error reporting, which trips off - incorrectly with the IOMMU & 3ware & Cerberus. */ - clear_bit(10, &bank[4]); - /* Lots of broken BIOS around that don't clear them - by default and leave crap in there. Don't log. */ - mce_bootlog = 0; + if (c->x86_vendor == X86_VENDOR_AMD) { + if(c->x86 == 15) + /* disable GART TBL walk error reporting, which trips off + incorrectly with the IOMMU & 3ware & Cerberus. */ + clear_bit(10, &bank[4]); + if(c->x86 <= 17 && mce_bootlog < 0) + /* Lots of broken BIOS around that don't clear them + by default and leave crap in there. Don't log. */ + mce_bootlog = 0; } } diff --git a/arch/x86/kernel/cpu/nexgen.c b/arch/x86/kernel/cpu/nexgen.c deleted file mode 100644 index 5d5e1c134123..000000000000 --- a/arch/x86/kernel/cpu/nexgen.c +++ /dev/null @@ -1,59 +0,0 @@ -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/string.h> -#include <asm/processor.h> - -#include "cpu.h" - -/* - * Detect a NexGen CPU running without BIOS hypercode new enough - * to have CPUID. (Thanks to Herbert Oppmann) - */ - -static int __cpuinit deep_magic_nexgen_probe(void) -{ - int ret; - - __asm__ __volatile__ ( - " movw $0x5555, %%ax\n" - " xorw %%dx,%%dx\n" - " movw $2, %%cx\n" - " divw %%cx\n" - " movl $0, %%eax\n" - " jnz 1f\n" - " movl $1, %%eax\n" - "1:\n" - : "=a" (ret) : : "cx", "dx"); - return ret; -} - -static void __cpuinit init_nexgen(struct cpuinfo_x86 *c) -{ - c->x86_cache_size = 256; /* A few had 1 MB... */ -} - -static void __cpuinit nexgen_identify(struct cpuinfo_x86 *c) -{ - /* Detect NexGen with old hypercode */ - if (deep_magic_nexgen_probe()) - strcpy(c->x86_vendor_id, "NexGenDriven"); -} - -static struct cpu_dev nexgen_cpu_dev __cpuinitdata = { - .c_vendor = "Nexgen", - .c_ident = { "NexGenDriven" }, - .c_models = { - { .vendor = X86_VENDOR_NEXGEN, - .family = 5, - .model_names = { [1] = "Nx586" } - }, - }, - .c_init = init_nexgen, - .c_identify = nexgen_identify, -}; - -int __init nexgen_init_cpu(void) -{ - cpu_devs[X86_VENDOR_NEXGEN] = &nexgen_cpu_dev; - return 0; -} diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index b943e10ad814..f9ae93adffe5 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -614,16 +614,6 @@ static struct wd_ops intel_arch_wd_ops __read_mostly = { .evntsel = MSR_ARCH_PERFMON_EVENTSEL1, }; -static struct wd_ops coreduo_wd_ops = { - .reserve = single_msr_reserve, - .unreserve = single_msr_unreserve, - .setup = setup_intel_arch_watchdog, - .rearm = p6_rearm, - .stop = single_msr_stop_watchdog, - .perfctr = MSR_ARCH_PERFMON_PERFCTR0, - .evntsel = MSR_ARCH_PERFMON_EVENTSEL0, -}; - static void probe_nmi_watchdog(void) { switch (boot_cpu_data.x86_vendor) { @@ -637,8 +627,8 @@ static void probe_nmi_watchdog(void) /* Work around Core Duo (Yonah) errata AE49 where perfctr1 doesn't have a working enable bit. */ if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) { - wd_ops = &coreduo_wd_ops; - break; + intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0; + intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0; } if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { wd_ops = &intel_arch_wd_ops; diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 2251d0ae9570..268553817909 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -25,6 +25,7 @@ #include <asm/hpet.h> #include <linux/kdebug.h> #include <asm/smp.h> +#include <asm/reboot.h> #include <mach_ipi.h> @@ -117,7 +118,7 @@ static void nmi_shootdown_cpus(void) } #endif -void machine_crash_shutdown(struct pt_regs *regs) +void native_machine_crash_shutdown(struct pt_regs *regs) { /* This function is only called after the system * has panicked or is otherwise in a critical state. diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index cbd42e51cb08..645ee5e32a27 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -84,14 +84,41 @@ void __init reserve_early(unsigned long start, unsigned long end, char *name) strncpy(r->name, name, sizeof(r->name) - 1); } -void __init early_res_to_bootmem(void) +void __init free_early(unsigned long start, unsigned long end) +{ + struct early_res *r; + int i, j; + + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { + r = &early_res[i]; + if (start == r->start && end == r->end) + break; + } + if (i >= MAX_EARLY_RES || !early_res[i].end) + panic("free_early on not reserved area: %lx-%lx!", start, end); + + for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++) + ; + + memcpy(&early_res[i], &early_res[i + 1], + (j - 1 - i) * sizeof(struct early_res)); + + early_res[j - 1].end = 0; +} + +void __init early_res_to_bootmem(unsigned long start, unsigned long end) { int i; + unsigned long final_start, final_end; for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { struct early_res *r = &early_res[i]; - printk(KERN_INFO "early res: %d [%lx-%lx] %s\n", i, - r->start, r->end - 1, r->name); - reserve_bootmem_generic(r->start, r->end - r->start); + final_start = max(start, r->start); + final_end = min(end, r->end); + if (final_start >= final_end) + continue; + printk(KERN_INFO " early res: %d [%lx-%lx] %s\n", i, + final_start, final_end - 1, r->name); + reserve_bootmem_generic(final_start, final_end - final_start); } } diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 9546ef408b92..021624c83583 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -51,7 +51,7 @@ void __init setup_apic_routing(void) else #endif - if (cpus_weight(cpu_possible_map) <= 8) + if (num_possible_cpus() <= 8) genapic = &apic_flat; else genapic = &apic_physflat; diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 993c76773256..e25c57b8aa84 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -11,6 +11,7 @@ #include <linux/string.h> #include <linux/percpu.h> #include <linux/start_kernel.h> +#include <linux/io.h> #include <asm/processor.h> #include <asm/proto.h> @@ -22,6 +23,7 @@ #include <asm/sections.h> #include <asm/kdebug.h> #include <asm/e820.h> +#include <asm/bios_ebda.h> static void __init zap_identity_mappings(void) { @@ -49,7 +51,6 @@ static void __init copy_bootdata(char *real_mode_data) } } -#define BIOS_EBDA_SEGMENT 0x40E #define BIOS_LOWMEM_KILOBYTES 0x413 /* @@ -80,8 +81,7 @@ static void __init reserve_ebda_region(void) lowmem <<= 10; /* start of EBDA area */ - ebda_addr = *(unsigned short *)__va(BIOS_EBDA_SEGMENT); - ebda_addr <<= 4; + ebda_addr = get_bios_ebda(); /* Fixup: bios puts an EBDA in the top 64K segment */ /* of conventional memory, but does not adjust lowmem. */ @@ -101,6 +101,24 @@ static void __init reserve_ebda_region(void) reserve_early(lowmem, 0x100000, "BIOS reserved"); } +static void __init reserve_setup_data(void) +{ + struct setup_data *data; + unsigned long pa_data; + char buf[32]; + + if (boot_params.hdr.version < 0x0209) + return; + pa_data = boot_params.hdr.setup_data; + while (pa_data) { + data = early_ioremap(pa_data, sizeof(*data)); + sprintf(buf, "setup data %x", data->type); + reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf); + pa_data = data->next; + early_iounmap(data, sizeof(*data)); + } +} + void __init x86_64_start_kernel(char * real_mode_data) { int i; @@ -157,6 +175,7 @@ void __init x86_64_start_kernel(char * real_mode_data) #endif reserve_ebda_region(); + reserve_setup_data(); /* * At this point everything still needed from the boot loader diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 36652ea1a265..9007f9ea64ee 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -218,7 +218,7 @@ static void hpet_legacy_clockevent_register(void) hpet_freq = 1000000000000000ULL; do_div(hpet_freq, hpet_period); hpet_clockevent.mult = div_sc((unsigned long) hpet_freq, - NSEC_PER_SEC, 32); + NSEC_PER_SEC, hpet_clockevent.shift); /* Calculate the min / max delta */ hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, &hpet_clockevent); diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index 8540abe86ade..c1b5e3ece1f2 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c @@ -115,7 +115,8 @@ void __init setup_pit_timer(void) * IO_APIC has been initialized. */ pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); - pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, 32); + pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, + pit_clockevent.shift); pit_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFF, &pit_clockevent); pit_clockevent.min_delta_ns = @@ -224,7 +225,8 @@ static int __init init_pit_clocksource(void) pit_clockevent.mode != CLOCK_EVT_MODE_PERIODIC) return 0; - clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20); + clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, + clocksource_pit.shift); return clocksource_register(&clocksource_pit); } arch_initcall(init_pit_clocksource); diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 2e2f42074e18..696b8e4e66bb 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -2068,7 +2068,7 @@ static void __init setup_nmi(void) * cycles as some i82489DX-based boards have glue logic that keeps the * 8259A interrupt line asserted until INTA. --macro */ -static inline void unlock_ExtINT_logic(void) +static inline void __init unlock_ExtINT_logic(void) { int apic, pin, i; struct IO_APIC_route_entry entry0, entry1; diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 9ba11d07920f..ef1a8dfcc529 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -1599,7 +1599,7 @@ static void __init setup_nmi(void) * cycles as some i82489DX-based boards have glue logic that keeps the * 8259A interrupt line asserted until INTA. --macro */ -static inline void unlock_ExtINT_logic(void) +static inline void __init unlock_ExtINT_logic(void) { int apic, pin, i; struct IO_APIC_route_entry entry0, entry1; diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 6ea67b76a214..00bda7bcda63 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -134,7 +134,7 @@ unsigned int do_IRQ(struct pt_regs *regs) : "=a" (arg1), "=d" (arg2), "=b" (bx) : "0" (irq), "1" (desc), "2" (isp), "D" (desc->handle_irq) - : "memory", "cc" + : "memory", "cc", "ecx" ); } else #endif diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index 73354302fda7..c03205991718 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c @@ -6,23 +6,171 @@ * * This file is released under the GPLv2. */ - #include <linux/debugfs.h> +#include <linux/uaccess.h> #include <linux/stat.h> #include <linux/init.h> +#include <linux/io.h> +#include <linux/mm.h> #include <asm/setup.h> #ifdef CONFIG_DEBUG_BOOT_PARAMS +struct setup_data_node { + u64 paddr; + u32 type; + u32 len; +}; + +static ssize_t +setup_data_read(struct file *file, char __user *user_buf, size_t count, + loff_t *ppos) +{ + struct setup_data_node *node = file->private_data; + unsigned long remain; + loff_t pos = *ppos; + struct page *pg; + void *p; + u64 pa; + + if (pos < 0) + return -EINVAL; + if (pos >= node->len) + return 0; + + if (count > node->len - pos) + count = node->len - pos; + pa = node->paddr + sizeof(struct setup_data) + pos; + pg = pfn_to_page((pa + count - 1) >> PAGE_SHIFT); + if (PageHighMem(pg)) { + p = ioremap_cache(pa, count); + if (!p) + return -ENXIO; + } else { + p = __va(pa); + } + + remain = copy_to_user(user_buf, p, count); + + if (PageHighMem(pg)) + iounmap(p); + + if (remain) + return -EFAULT; + + *ppos = pos + count; + + return count; +} + +static int setup_data_open(struct inode *inode, struct file *file) +{ + file->private_data = inode->i_private; + return 0; +} + +static const struct file_operations fops_setup_data = { + .read = setup_data_read, + .open = setup_data_open, +}; + +static int __init +create_setup_data_node(struct dentry *parent, int no, + struct setup_data_node *node) +{ + struct dentry *d, *type, *data; + char buf[16]; + int error; + + sprintf(buf, "%d", no); + d = debugfs_create_dir(buf, parent); + if (!d) { + error = -ENOMEM; + goto err_return; + } + type = debugfs_create_x32("type", S_IRUGO, d, &node->type); + if (!type) { + error = -ENOMEM; + goto err_dir; + } + data = debugfs_create_file("data", S_IRUGO, d, node, &fops_setup_data); + if (!data) { + error = -ENOMEM; + goto err_type; + } + return 0; + +err_type: + debugfs_remove(type); +err_dir: + debugfs_remove(d); +err_return: + return error; +} + +static int __init create_setup_data_nodes(struct dentry *parent) +{ + struct setup_data_node *node; + struct setup_data *data; + int error, no = 0; + struct dentry *d; + struct page *pg; + u64 pa_data; + + d = debugfs_create_dir("setup_data", parent); + if (!d) { + error = -ENOMEM; + goto err_return; + } + + pa_data = boot_params.hdr.setup_data; + + while (pa_data) { + node = kmalloc(sizeof(*node), GFP_KERNEL); + if (!node) { + error = -ENOMEM; + goto err_dir; + } + pg = pfn_to_page((pa_data+sizeof(*data)-1) >> PAGE_SHIFT); + if (PageHighMem(pg)) { + data = ioremap_cache(pa_data, sizeof(*data)); + if (!data) { + error = -ENXIO; + goto err_dir; + } + } else { + data = __va(pa_data); + } + + node->paddr = pa_data; + node->type = data->type; + node->len = data->len; + error = create_setup_data_node(d, no, node); + pa_data = data->next; + + if (PageHighMem(pg)) + iounmap(data); + if (error) + goto err_dir; + no++; + } + return 0; + +err_dir: + debugfs_remove(d); +err_return: + return error; +} + static struct debugfs_blob_wrapper boot_params_blob = { - .data = &boot_params, - .size = sizeof(boot_params), + .data = &boot_params, + .size = sizeof(boot_params), }; static int __init boot_params_kdebugfs_init(void) { - int error; struct dentry *dbp, *version, *data; + int error; dbp = debugfs_create_dir("boot_params", NULL); if (!dbp) { @@ -41,7 +189,13 @@ static int __init boot_params_kdebugfs_init(void) error = -ENOMEM; goto err_version; } + error = create_setup_data_nodes(dbp); + if (error) + goto err_data; return 0; + +err_data: + debugfs_remove(data); err_version: debugfs_remove(version); err_dir: @@ -61,5 +215,4 @@ static int __init arch_kdebugfs_init(void) return error; } - arch_initcall(arch_kdebugfs_init); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c new file mode 100644 index 000000000000..8b7a3cf37d2b --- /dev/null +++ b/arch/x86/kernel/kvm.c @@ -0,0 +1,248 @@ +/* + * KVM paravirt_ops implementation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (C) 2007, Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + * Copyright IBM Corporation, 2007 + * Authors: Anthony Liguori <aliguori@us.ibm.com> + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/kvm_para.h> +#include <linux/cpu.h> +#include <linux/mm.h> +#include <linux/highmem.h> +#include <linux/hardirq.h> + +#define MMU_QUEUE_SIZE 1024 + +struct kvm_para_state { + u8 mmu_queue[MMU_QUEUE_SIZE]; + int mmu_queue_len; + enum paravirt_lazy_mode mode; +}; + +static DEFINE_PER_CPU(struct kvm_para_state, para_state); + +static struct kvm_para_state *kvm_para_state(void) +{ + return &per_cpu(para_state, raw_smp_processor_id()); +} + +/* + * No need for any "IO delay" on KVM + */ +static void kvm_io_delay(void) +{ +} + +static void kvm_mmu_op(void *buffer, unsigned len) +{ + int r; + unsigned long a1, a2; + + do { + a1 = __pa(buffer); + a2 = 0; /* on i386 __pa() always returns <4G */ + r = kvm_hypercall3(KVM_HC_MMU_OP, len, a1, a2); + buffer += r; + len -= r; + } while (len); +} + +static void mmu_queue_flush(struct kvm_para_state *state) +{ + if (state->mmu_queue_len) { + kvm_mmu_op(state->mmu_queue, state->mmu_queue_len); + state->mmu_queue_len = 0; + } +} + +static void kvm_deferred_mmu_op(void *buffer, int len) +{ + struct kvm_para_state *state = kvm_para_state(); + + if (state->mode != PARAVIRT_LAZY_MMU) { + kvm_mmu_op(buffer, len); + return; + } + if (state->mmu_queue_len + len > sizeof state->mmu_queue) + mmu_queue_flush(state); + memcpy(state->mmu_queue + state->mmu_queue_len, buffer, len); + state->mmu_queue_len += len; +} + +static void kvm_mmu_write(void *dest, u64 val) +{ + __u64 pte_phys; + struct kvm_mmu_op_write_pte wpte; + +#ifdef CONFIG_HIGHPTE + struct page *page; + unsigned long dst = (unsigned long) dest; + + page = kmap_atomic_to_page(dest); + pte_phys = page_to_pfn(page); + pte_phys <<= PAGE_SHIFT; + pte_phys += (dst & ~(PAGE_MASK)); +#else + pte_phys = (unsigned long)__pa(dest); +#endif + wpte.header.op = KVM_MMU_OP_WRITE_PTE; + wpte.pte_val = val; + wpte.pte_phys = pte_phys; + + kvm_deferred_mmu_op(&wpte, sizeof wpte); +} + +/* + * We only need to hook operations that are MMU writes. We hook these so that + * we can use lazy MMU mode to batch these operations. We could probably + * improve the performance of the host code if we used some of the information + * here to simplify processing of batched writes. + */ +static void kvm_set_pte(pte_t *ptep, pte_t pte) +{ + kvm_mmu_write(ptep, pte_val(pte)); +} + +static void kvm_set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + kvm_mmu_write(ptep, pte_val(pte)); +} + +static void kvm_set_pmd(pmd_t *pmdp, pmd_t pmd) +{ + kvm_mmu_write(pmdp, pmd_val(pmd)); +} + +#if PAGETABLE_LEVELS >= 3 +#ifdef CONFIG_X86_PAE +static void kvm_set_pte_atomic(pte_t *ptep, pte_t pte) +{ + kvm_mmu_write(ptep, pte_val(pte)); +} + +static void kvm_set_pte_present(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + kvm_mmu_write(ptep, pte_val(pte)); +} + +static void kvm_pte_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + kvm_mmu_write(ptep, 0); +} + +static void kvm_pmd_clear(pmd_t *pmdp) +{ + kvm_mmu_write(pmdp, 0); +} +#endif + +static void kvm_set_pud(pud_t *pudp, pud_t pud) +{ + kvm_mmu_write(pudp, pud_val(pud)); +} + +#if PAGETABLE_LEVELS == 4 +static void kvm_set_pgd(pgd_t *pgdp, pgd_t pgd) +{ + kvm_mmu_write(pgdp, pgd_val(pgd)); +} +#endif +#endif /* PAGETABLE_LEVELS >= 3 */ + +static void kvm_flush_tlb(void) +{ + struct kvm_mmu_op_flush_tlb ftlb = { + .header.op = KVM_MMU_OP_FLUSH_TLB, + }; + + kvm_deferred_mmu_op(&ftlb, sizeof ftlb); +} + +static void kvm_release_pt(u32 pfn) +{ + struct kvm_mmu_op_release_pt rpt = { + .header.op = KVM_MMU_OP_RELEASE_PT, + .pt_phys = (u64)pfn << PAGE_SHIFT, + }; + + kvm_mmu_op(&rpt, sizeof rpt); +} + +static void kvm_enter_lazy_mmu(void) +{ + struct kvm_para_state *state = kvm_para_state(); + + paravirt_enter_lazy_mmu(); + state->mode = paravirt_get_lazy_mode(); +} + +static void kvm_leave_lazy_mmu(void) +{ + struct kvm_para_state *state = kvm_para_state(); + + mmu_queue_flush(state); + paravirt_leave_lazy(paravirt_get_lazy_mode()); + state->mode = paravirt_get_lazy_mode(); +} + +static void paravirt_ops_setup(void) +{ + pv_info.name = "KVM"; + pv_info.paravirt_enabled = 1; + + if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) + pv_cpu_ops.io_delay = kvm_io_delay; + + if (kvm_para_has_feature(KVM_FEATURE_MMU_OP)) { + pv_mmu_ops.set_pte = kvm_set_pte; + pv_mmu_ops.set_pte_at = kvm_set_pte_at; + pv_mmu_ops.set_pmd = kvm_set_pmd; +#if PAGETABLE_LEVELS >= 3 +#ifdef CONFIG_X86_PAE + pv_mmu_ops.set_pte_atomic = kvm_set_pte_atomic; + pv_mmu_ops.set_pte_present = kvm_set_pte_present; + pv_mmu_ops.pte_clear = kvm_pte_clear; + pv_mmu_ops.pmd_clear = kvm_pmd_clear; +#endif + pv_mmu_ops.set_pud = kvm_set_pud; +#if PAGETABLE_LEVELS == 4 + pv_mmu_ops.set_pgd = kvm_set_pgd; +#endif +#endif + pv_mmu_ops.flush_tlb_user = kvm_flush_tlb; + pv_mmu_ops.release_pte = kvm_release_pt; + pv_mmu_ops.release_pmd = kvm_release_pt; + pv_mmu_ops.release_pud = kvm_release_pt; + + pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu; + pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu; + } +} + +void __init kvm_guest_init(void) +{ + if (!kvm_para_available()) + return; + + paravirt_ops_setup(); +} diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c new file mode 100644 index 000000000000..ddee04043aeb --- /dev/null +++ b/arch/x86/kernel/kvmclock.c @@ -0,0 +1,187 @@ +/* KVM paravirtual clock driver. A clocksource implementation + Copyright (C) 2008 Glauber de Oliveira Costa, Red Hat Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#include <linux/clocksource.h> +#include <linux/kvm_para.h> +#include <asm/arch_hooks.h> +#include <asm/msr.h> +#include <asm/apic.h> +#include <linux/percpu.h> +#include <asm/reboot.h> + +#define KVM_SCALE 22 + +static int kvmclock = 1; + +static int parse_no_kvmclock(char *arg) +{ + kvmclock = 0; + return 0; +} +early_param("no-kvmclock", parse_no_kvmclock); + +/* The hypervisor will put information about time periodically here */ +static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock); +#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field + +static inline u64 kvm_get_delta(u64 last_tsc) +{ + int cpu = smp_processor_id(); + u64 delta = native_read_tsc() - last_tsc; + return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE; +} + +static struct kvm_wall_clock wall_clock; +static cycle_t kvm_clock_read(void); +/* + * The wallclock is the time of day when we booted. Since then, some time may + * have elapsed since the hypervisor wrote the data. So we try to account for + * that with system time + */ +unsigned long kvm_get_wallclock(void) +{ + u32 wc_sec, wc_nsec; + u64 delta; + struct timespec ts; + int version, nsec; + int low, high; + + low = (int)__pa(&wall_clock); + high = ((u64)__pa(&wall_clock) >> 32); + + delta = kvm_clock_read(); + + native_write_msr(MSR_KVM_WALL_CLOCK, low, high); + do { + version = wall_clock.wc_version; + rmb(); + wc_sec = wall_clock.wc_sec; + wc_nsec = wall_clock.wc_nsec; + rmb(); + } while ((wall_clock.wc_version != version) || (version & 1)); + + delta = kvm_clock_read() - delta; + delta += wc_nsec; + nsec = do_div(delta, NSEC_PER_SEC); + set_normalized_timespec(&ts, wc_sec + delta, nsec); + /* + * Of all mechanisms of time adjustment I've tested, this one + * was the champion! + */ + return ts.tv_sec + 1; +} + +int kvm_set_wallclock(unsigned long now) +{ + return 0; +} + +/* + * This is our read_clock function. The host puts an tsc timestamp each time + * it updates a new time. Without the tsc adjustment, we can have a situation + * in which a vcpu starts to run earlier (smaller system_time), but probes + * time later (compared to another vcpu), leading to backwards time + */ +static cycle_t kvm_clock_read(void) +{ + u64 last_tsc, now; + int cpu; + + preempt_disable(); + cpu = smp_processor_id(); + + last_tsc = get_clock(cpu, tsc_timestamp); + now = get_clock(cpu, system_time); + + now += kvm_get_delta(last_tsc); + preempt_enable(); + + return now; +} +static struct clocksource kvm_clock = { + .name = "kvm-clock", + .read = kvm_clock_read, + .rating = 400, + .mask = CLOCKSOURCE_MASK(64), + .mult = 1 << KVM_SCALE, + .shift = KVM_SCALE, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +static int kvm_register_clock(void) +{ + int cpu = smp_processor_id(); + int low, high; + low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1; + high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); + + return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); +} + +static void kvm_setup_secondary_clock(void) +{ + /* + * Now that the first cpu already had this clocksource initialized, + * we shouldn't fail. + */ + WARN_ON(kvm_register_clock()); + /* ok, done with our trickery, call native */ + setup_secondary_APIC_clock(); +} + +/* + * After the clock is registered, the host will keep writing to the + * registered memory location. If the guest happens to shutdown, this memory + * won't be valid. In cases like kexec, in which you install a new kernel, this + * means a random memory location will be kept being written. So before any + * kind of shutdown from our side, we unregister the clock by writting anything + * that does not have the 'enable' bit set in the msr + */ +#ifdef CONFIG_KEXEC +static void kvm_crash_shutdown(struct pt_regs *regs) +{ + native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0); + native_machine_crash_shutdown(regs); +} +#endif + +static void kvm_shutdown(void) +{ + native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0); + native_machine_shutdown(); +} + +void __init kvmclock_init(void) +{ + if (!kvm_para_available()) + return; + + if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) { + if (kvm_register_clock()) + return; + pv_time_ops.get_wallclock = kvm_get_wallclock; + pv_time_ops.set_wallclock = kvm_set_wallclock; + pv_time_ops.sched_clock = kvm_clock_read; + pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; + machine_ops.shutdown = kvm_shutdown; +#ifdef CONFIG_KEXEC + machine_ops.crash_shutdown = kvm_crash_shutdown; +#endif + clocksource_register(&kvm_clock); + } +} diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c index b402c0f3f192..cfc2648d25ff 100644 --- a/arch/x86/kernel/mfgpt_32.c +++ b/arch/x86/kernel/mfgpt_32.c @@ -364,7 +364,8 @@ int __init mfgpt_timer_setup(void) geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_SETUP, val); /* Set up the clock event */ - mfgpt_clockevent.mult = div_sc(MFGPT_HZ, NSEC_PER_SEC, 32); + mfgpt_clockevent.mult = div_sc(MFGPT_HZ, NSEC_PER_SEC, + mfgpt_clockevent.shift); mfgpt_clockevent.min_delta_ns = clockevent_delta2ns(0xF, &mfgpt_clockevent); mfgpt_clockevent.max_delta_ns = clockevent_delta2ns(0xFFFE, diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 70744e344fa1..3e2c54dc8b29 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -686,13 +686,11 @@ void __init get_smp_config(void) static int __init smp_scan_config(unsigned long base, unsigned long length, unsigned reserve) { - extern void __bad_mpf_size(void); unsigned int *bp = phys_to_virt(base); struct intel_mp_floating *mpf; Dprintk("Scan SMP from %p for %ld bytes.\n", bp, length); - if (sizeof(*mpf) != 16) - __bad_mpf_size(); + BUILD_BUG_ON(sizeof(*mpf) != 16); while (length > 0) { mpf = (struct intel_mp_floating *)bp; @@ -801,7 +799,6 @@ void __init find_smp_config(void) #ifdef CONFIG_X86_IO_APIC #define MP_ISA_BUS 0 -#define MP_MAX_IOAPIC_PIN 127 extern struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS]; @@ -820,7 +817,7 @@ static int mp_find_ioapic(int gsi) return -1; } -static u8 uniq_ioapic_id(u8 id) +static u8 __init uniq_ioapic_id(u8 id) { #ifdef CONFIG_X86_32 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && @@ -909,14 +906,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */ intsrc.mpc_dstirq = pin; /* INTIN# */ - Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n", - intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, - (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, - intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq); - - mp_irqs[mp_irq_entries] = intsrc; - if (++mp_irq_entries == MAX_IRQ_SOURCES) - panic("Max # of irq sources exceeded!\n"); + MP_intsrc_info(&intsrc); } int es7000_plat; @@ -985,23 +975,14 @@ void __init mp_config_acpi_legacy_irqs(void) intsrc.mpc_srcbusirq = i; /* Identity mapped */ intsrc.mpc_dstirq = i; - Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, " - "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, - (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, - intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, - intsrc.mpc_dstirq); - - mp_irqs[mp_irq_entries] = intsrc; - if (++mp_irq_entries == MAX_IRQ_SOURCES) - panic("Max # of irq sources exceeded!\n"); + MP_intsrc_info(&intsrc); } } int mp_register_gsi(u32 gsi, int triggering, int polarity) { - int ioapic = -1; - int ioapic_pin = 0; - int idx, bit = 0; + int ioapic; + int ioapic_pin; #ifdef CONFIG_X86_32 #define MAX_GSI_NUM 4096 #define IRQ_COMPRESSION_START 64 @@ -1041,15 +1022,13 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) * with redundant pin->gsi mappings (but unique PCI devices); * we only program the IOAPIC on the first. */ - bit = ioapic_pin % 32; - idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32); - if (idx > 3) { + if (ioapic_pin > MP_MAX_IOAPIC_PIN) { printk(KERN_ERR "Invalid reference to IOAPIC pin " "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, ioapic_pin); return gsi; } - if ((1 << bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) { + if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) { Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", mp_ioapic_routing[ioapic].apic_id, ioapic_pin); #ifdef CONFIG_X86_32 @@ -1059,7 +1038,7 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) #endif } - mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1 << bit); + set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed); #ifdef CONFIG_X86_32 /* * For GSI >= 64, use IRQ compression diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 2edee22e9c30..e28ec497e142 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -43,6 +43,7 @@ #include <asm/system.h> #include <asm/dma.h> #include <asm/rio.h> +#include <asm/bios_ebda.h> #ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT int use_calgary __read_mostly = 1; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 3004d716539d..67e9b4a1e89d 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -4,6 +4,8 @@ #include <linux/smp.h> #include <linux/slab.h> #include <linux/sched.h> +#include <linux/module.h> +#include <linux/pm.h> struct kmem_cache *task_xstate_cachep; @@ -42,3 +44,118 @@ void arch_task_cache_init(void) __alignof__(union thread_xstate), SLAB_PANIC, NULL); } + +static void do_nothing(void *unused) +{ +} + +/* + * cpu_idle_wait - Used to ensure that all the CPUs discard old value of + * pm_idle and update to new pm_idle value. Required while changing pm_idle + * handler on SMP systems. + * + * Caller must have changed pm_idle to the new value before the call. Old + * pm_idle value will not be used by any CPU after the return of this function. + */ +void cpu_idle_wait(void) +{ + smp_mb(); + /* kick all the CPUs so that they exit out of pm_idle */ + smp_call_function(do_nothing, NULL, 0, 1); +} +EXPORT_SYMBOL_GPL(cpu_idle_wait); + +/* + * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, + * which can obviate IPI to trigger checking of need_resched. + * We execute MONITOR against need_resched and enter optimized wait state + * through MWAIT. Whenever someone changes need_resched, we would be woken + * up from MWAIT (without an IPI). + * + * New with Core Duo processors, MWAIT can take some hints based on CPU + * capability. + */ +void mwait_idle_with_hints(unsigned long ax, unsigned long cx) +{ + if (!need_resched()) { + __monitor((void *)¤t_thread_info()->flags, 0, 0); + smp_mb(); + if (!need_resched()) + __mwait(ax, cx); + } +} + +/* Default MONITOR/MWAIT with no hints, used for default C1 state */ +static void mwait_idle(void) +{ + if (!need_resched()) { + __monitor((void *)¤t_thread_info()->flags, 0, 0); + smp_mb(); + if (!need_resched()) + __sti_mwait(0, 0); + else + local_irq_enable(); + } else + local_irq_enable(); +} + + +static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) +{ + if (force_mwait) + return 1; + /* Any C1 states supported? */ + return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0; +} + +/* + * On SMP it's slightly faster (but much more power-consuming!) + * to poll the ->work.need_resched flag instead of waiting for the + * cross-CPU IPI to arrive. Use this option with caution. + */ +static void poll_idle(void) +{ + local_irq_enable(); + cpu_relax(); +} + +void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) +{ + static int selected; + + if (selected) + return; +#ifdef CONFIG_X86_SMP + if (pm_idle == poll_idle && smp_num_siblings > 1) { + printk(KERN_WARNING "WARNING: polling idle and HT enabled," + " performance may degrade.\n"); + } +#endif + if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { + /* + * Skip, if setup has overridden idle. + * One CPU supports mwait => All CPUs supports mwait + */ + if (!pm_idle) { + printk(KERN_INFO "using mwait in idle threads.\n"); + pm_idle = mwait_idle; + } + } + selected = 1; +} + +static int __init idle_setup(char *str) +{ + if (!strcmp(str, "poll")) { + printk("using polling idle threads.\n"); + pm_idle = poll_idle; + } else if (!strcmp(str, "mwait")) + force_mwait = 1; + else + return -1; + + boot_option_idle_override = 1; + return 0; +} +early_param("idle", idle_setup); + diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 77de848bd1fb..f8476dfbb60d 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -111,12 +111,10 @@ void default_idle(void) */ smp_mb(); - local_irq_disable(); - if (!need_resched()) { + if (!need_resched()) safe_halt(); /* enables interrupts racelessly */ - local_irq_disable(); - } - local_irq_enable(); + else + local_irq_enable(); current_thread_info()->status |= TS_POLLING; } else { local_irq_enable(); @@ -128,17 +126,6 @@ void default_idle(void) EXPORT_SYMBOL(default_idle); #endif -/* - * On SMP it's slightly faster (but much more power-consuming!) - * to poll the ->work.need_resched flag instead of waiting for the - * cross-CPU IPI to arrive. Use this option with caution. - */ -static void poll_idle(void) -{ - local_irq_enable(); - cpu_relax(); -} - #ifdef CONFIG_HOTPLUG_CPU #include <asm/nmi.h> /* We don't actually take CPU down, just spin without interrupts. */ @@ -196,6 +183,7 @@ void cpu_idle(void) if (cpu_is_offline(cpu)) play_dead(); + local_irq_disable(); __get_cpu_var(irq_stat).idle_timestamp = jiffies; idle(); } @@ -206,104 +194,6 @@ void cpu_idle(void) } } -static void do_nothing(void *unused) -{ -} - -/* - * cpu_idle_wait - Used to ensure that all the CPUs discard old value of - * pm_idle and update to new pm_idle value. Required while changing pm_idle - * handler on SMP systems. - * - * Caller must have changed pm_idle to the new value before the call. Old - * pm_idle value will not be used by any CPU after the return of this function. - */ -void cpu_idle_wait(void) -{ - smp_mb(); - /* kick all the CPUs so that they exit out of pm_idle */ - smp_call_function(do_nothing, NULL, 0, 1); -} -EXPORT_SYMBOL_GPL(cpu_idle_wait); - -/* - * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, - * which can obviate IPI to trigger checking of need_resched. - * We execute MONITOR against need_resched and enter optimized wait state - * through MWAIT. Whenever someone changes need_resched, we would be woken - * up from MWAIT (without an IPI). - * - * New with Core Duo processors, MWAIT can take some hints based on CPU - * capability. - */ -void mwait_idle_with_hints(unsigned long ax, unsigned long cx) -{ - if (!need_resched()) { - __monitor((void *)¤t_thread_info()->flags, 0, 0); - smp_mb(); - if (!need_resched()) - __sti_mwait(ax, cx); - else - local_irq_enable(); - } else - local_irq_enable(); -} - -/* Default MONITOR/MWAIT with no hints, used for default C1 state */ -static void mwait_idle(void) -{ - local_irq_enable(); - mwait_idle_with_hints(0, 0); -} - -static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) -{ - if (force_mwait) - return 1; - /* Any C1 states supported? */ - return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0; -} - -void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) -{ - static int selected; - - if (selected) - return; -#ifdef CONFIG_X86_SMP - if (pm_idle == poll_idle && smp_num_siblings > 1) { - printk(KERN_WARNING "WARNING: polling idle and HT enabled," - " performance may degrade.\n"); - } -#endif - if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { - /* - * Skip, if setup has overridden idle. - * One CPU supports mwait => All CPUs supports mwait - */ - if (!pm_idle) { - printk(KERN_INFO "using mwait in idle threads.\n"); - pm_idle = mwait_idle; - } - } - selected = 1; -} - -static int __init idle_setup(char *str) -{ - if (!strcmp(str, "poll")) { - printk("using polling idle threads.\n"); - pm_idle = poll_idle; - } else if (!strcmp(str, "mwait")) - force_mwait = 1; - else - return -1; - - boot_option_idle_override = 1; - return 0; -} -early_param("idle", idle_setup); - void __show_registers(struct pt_regs *regs, int all) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 131c2ee7ac56..e2319f39988b 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -106,26 +106,13 @@ void default_idle(void) * test NEED_RESCHED: */ smp_mb(); - local_irq_disable(); - if (!need_resched()) { + if (!need_resched()) safe_halt(); /* enables interrupts racelessly */ - local_irq_disable(); - } - local_irq_enable(); + else + local_irq_enable(); current_thread_info()->status |= TS_POLLING; } -/* - * On SMP it's slightly faster (but much more power-consuming!) - * to poll the ->need_resched flag instead of waiting for the - * cross-CPU IPI to arrive. Use this option with caution. - */ -static void poll_idle(void) -{ - local_irq_enable(); - cpu_relax(); -} - #ifdef CONFIG_HOTPLUG_CPU DECLARE_PER_CPU(int, cpu_state); @@ -192,110 +179,6 @@ void cpu_idle(void) } } -static void do_nothing(void *unused) -{ -} - -/* - * cpu_idle_wait - Used to ensure that all the CPUs discard old value of - * pm_idle and update to new pm_idle value. Required while changing pm_idle - * handler on SMP systems. - * - * Caller must have changed pm_idle to the new value before the call. Old - * pm_idle value will not be used by any CPU after the return of this function. - */ -void cpu_idle_wait(void) -{ - smp_mb(); - /* kick all the CPUs so that they exit out of pm_idle */ - smp_call_function(do_nothing, NULL, 0, 1); -} -EXPORT_SYMBOL_GPL(cpu_idle_wait); - -/* - * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, - * which can obviate IPI to trigger checking of need_resched. - * We execute MONITOR against need_resched and enter optimized wait state - * through MWAIT. Whenever someone changes need_resched, we would be woken - * up from MWAIT (without an IPI). - * - * New with Core Duo processors, MWAIT can take some hints based on CPU - * capability. - */ -void mwait_idle_with_hints(unsigned long ax, unsigned long cx) -{ - if (!need_resched()) { - __monitor((void *)¤t_thread_info()->flags, 0, 0); - smp_mb(); - if (!need_resched()) - __mwait(ax, cx); - } -} - -/* Default MONITOR/MWAIT with no hints, used for default C1 state */ -static void mwait_idle(void) -{ - if (!need_resched()) { - __monitor((void *)¤t_thread_info()->flags, 0, 0); - smp_mb(); - if (!need_resched()) - __sti_mwait(0, 0); - else - local_irq_enable(); - } else { - local_irq_enable(); - } -} - - -static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) -{ - if (force_mwait) - return 1; - /* Any C1 states supported? */ - return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0; -} - -void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) -{ - static int selected; - - if (selected) - return; -#ifdef CONFIG_X86_SMP - if (pm_idle == poll_idle && smp_num_siblings > 1) { - printk(KERN_WARNING "WARNING: polling idle and HT enabled," - " performance may degrade.\n"); - } -#endif - if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { - /* - * Skip, if setup has overridden idle. - * One CPU supports mwait => All CPUs supports mwait - */ - if (!pm_idle) { - printk(KERN_INFO "using mwait in idle threads.\n"); - pm_idle = mwait_idle; - } - } - selected = 1; -} - -static int __init idle_setup(char *str) -{ - if (!strcmp(str, "poll")) { - printk("using polling idle threads.\n"); - pm_idle = poll_idle; - } else if (!strcmp(str, "mwait")) - force_mwait = 1; - else - return -1; - - boot_option_idle_override = 1; - return 0; -} -early_param("idle", idle_setup); - /* Prints also some state that isn't saved in the pt_regs */ void __show_regs(struct pt_regs * regs) { diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 559c1b027417..fb03ef380f0e 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1207,97 +1207,16 @@ static int genregs32_set(struct task_struct *target, return ret; } -static long ptrace32_siginfo(unsigned request, u32 pid, u32 addr, u32 data) +long compat_arch_ptrace(struct task_struct *child, compat_long_t request, + compat_ulong_t caddr, compat_ulong_t cdata) { - siginfo_t __user *si = compat_alloc_user_space(sizeof(siginfo_t)); - compat_siginfo_t __user *si32 = compat_ptr(data); - siginfo_t ssi; - int ret; - - if (request == PTRACE_SETSIGINFO) { - memset(&ssi, 0, sizeof(siginfo_t)); - ret = copy_siginfo_from_user32(&ssi, si32); - if (ret) - return ret; - if (copy_to_user(si, &ssi, sizeof(siginfo_t))) - return -EFAULT; - } - ret = sys_ptrace(request, pid, addr, (unsigned long)si); - if (ret) - return ret; - if (request == PTRACE_GETSIGINFO) { - if (copy_from_user(&ssi, si, sizeof(siginfo_t))) - return -EFAULT; - ret = copy_siginfo_to_user32(si32, &ssi); - } - return ret; -} - -asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) -{ - struct task_struct *child; - struct pt_regs *childregs; + unsigned long addr = caddr; + unsigned long data = cdata; void __user *datap = compat_ptr(data); int ret; __u32 val; switch (request) { - case PTRACE_TRACEME: - case PTRACE_ATTACH: - case PTRACE_KILL: - case PTRACE_CONT: - case PTRACE_SINGLESTEP: - case PTRACE_SINGLEBLOCK: - case PTRACE_DETACH: - case PTRACE_SYSCALL: - case PTRACE_OLDSETOPTIONS: - case PTRACE_SETOPTIONS: - case PTRACE_SET_THREAD_AREA: - case PTRACE_GET_THREAD_AREA: -#ifdef X86_BTS - case PTRACE_BTS_CONFIG: - case PTRACE_BTS_STATUS: - case PTRACE_BTS_SIZE: - case PTRACE_BTS_GET: - case PTRACE_BTS_CLEAR: - case PTRACE_BTS_DRAIN: -#endif - return sys_ptrace(request, pid, addr, data); - - default: - return -EINVAL; - - case PTRACE_PEEKTEXT: - case PTRACE_PEEKDATA: - case PTRACE_POKEDATA: - case PTRACE_POKETEXT: - case PTRACE_POKEUSR: - case PTRACE_PEEKUSR: - case PTRACE_GETREGS: - case PTRACE_SETREGS: - case PTRACE_SETFPREGS: - case PTRACE_GETFPREGS: - case PTRACE_SETFPXREGS: - case PTRACE_GETFPXREGS: - case PTRACE_GETEVENTMSG: - break; - - case PTRACE_SETSIGINFO: - case PTRACE_GETSIGINFO: - return ptrace32_siginfo(request, pid, addr, data); - } - - child = ptrace_get_task_struct(pid); - if (IS_ERR(child)) - return PTR_ERR(child); - - ret = ptrace_check_attach(child, request == PTRACE_KILL); - if (ret < 0) - goto out; - - childregs = task_pt_regs(child); - - switch (request) { case PTRACE_PEEKUSR: ret = getreg32(child, addr, &val); if (ret == 0) @@ -1343,12 +1262,14 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) sizeof(struct user32_fxsr_struct), datap); + case PTRACE_GET_THREAD_AREA: + case PTRACE_SET_THREAD_AREA: + return arch_ptrace(child, request, addr, data); + default: return compat_ptrace_request(child, request, addr, data); } - out: - put_task_struct(child); return ret; } diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 1791a751a772..a4a838306b2c 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -399,7 +399,7 @@ static void native_machine_emergency_restart(void) } } -static void native_machine_shutdown(void) +void native_machine_shutdown(void) { /* Stop the cpus and apics */ #ifdef CONFIG_SMP @@ -470,7 +470,10 @@ struct machine_ops machine_ops = { .shutdown = native_machine_shutdown, .emergency_restart = native_machine_emergency_restart, .restart = native_machine_restart, - .halt = native_machine_halt + .halt = native_machine_halt, +#ifdef CONFIG_KEXEC + .crash_shutdown = native_machine_crash_shutdown, +#endif }; void machine_power_off(void) @@ -498,3 +501,9 @@ void machine_halt(void) machine_ops.halt(); } +#ifdef CONFIG_KEXEC +void machine_crash_shutdown(struct pt_regs *regs) +{ + machine_ops.crash_shutdown(regs); +} +#endif diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 455d3c80960b..2283422af794 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -47,6 +47,7 @@ #include <linux/pfn.h> #include <linux/pci.h> #include <linux/init_ohci1394_dma.h> +#include <linux/kvm_para.h> #include <video/edid.h> @@ -389,7 +390,6 @@ unsigned long __init find_max_low_pfn(void) return max_low_pfn; } -#define BIOS_EBDA_SEGMENT 0x40E #define BIOS_LOWMEM_KILOBYTES 0x413 /* @@ -420,8 +420,7 @@ static void __init reserve_ebda_region(void) lowmem <<= 10; /* start of EBDA area */ - ebda_addr = *(unsigned short *)__va(BIOS_EBDA_SEGMENT); - ebda_addr <<= 4; + ebda_addr = get_bios_ebda(); /* Fixup: bios puts an EBDA in the top 64K segment */ /* of conventional memory, but does not adjust lowmem. */ @@ -822,6 +821,10 @@ void __init setup_arch(char **cmdline_p) max_low_pfn = setup_memory(); +#ifdef CONFIG_KVM_CLOCK + kvmclock_init(); +#endif + #ifdef CONFIG_VMI /* * Must be after max_low_pfn is determined, and before kernel @@ -829,6 +832,7 @@ void __init setup_arch(char **cmdline_p) */ vmi_init(); #endif + kvm_guest_init(); /* * NOTE: before this point _nobody_ is allowed to allocate diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index c2ec3dcb6b99..a94fb959a87a 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -42,6 +42,7 @@ #include <linux/ctype.h> #include <linux/uaccess.h> #include <linux/init_ohci1394_dma.h> +#include <linux/kvm_para.h> #include <asm/mtrr.h> #include <asm/uaccess.h> @@ -116,7 +117,7 @@ extern int root_mountflags; char __initdata command_line[COMMAND_LINE_SIZE]; -struct resource standard_io_resources[] = { +static struct resource standard_io_resources[] = { { .name = "dma1", .start = 0x00, .end = 0x1f, .flags = IORESOURCE_BUSY | IORESOURCE_IO }, { .name = "pic1", .start = 0x20, .end = 0x21, @@ -190,6 +191,7 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn) bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); e820_register_active_regions(0, start_pfn, end_pfn); free_bootmem_with_active_regions(0, end_pfn); + early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT); reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); } #endif @@ -264,6 +266,28 @@ void __attribute__((weak)) __init memory_setup(void) machine_specific_memory_setup(); } +static void __init parse_setup_data(void) +{ + struct setup_data *data; + unsigned long pa_data; + + if (boot_params.hdr.version < 0x0209) + return; + pa_data = boot_params.hdr.setup_data; + while (pa_data) { + data = early_ioremap(pa_data, PAGE_SIZE); + switch (data->type) { + default: + break; + } +#ifndef CONFIG_DEBUG_BOOT_PARAMS + free_early(pa_data, pa_data+sizeof(*data)+data->len); +#endif + pa_data = data->next; + early_iounmap(data, PAGE_SIZE); + } +} + /* * setup_arch - architecture-specific boot-time initializations * @@ -316,6 +340,8 @@ void __init setup_arch(char **cmdline_p) strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; + parse_setup_data(); + parse_early_param(); #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT @@ -359,6 +385,10 @@ void __init setup_arch(char **cmdline_p) io_delay_init(); +#ifdef CONFIG_KVM_CLOCK + kvmclock_init(); +#endif + #ifdef CONFIG_SMP /* setup to use the early static init tables during kernel startup */ x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init; @@ -397,8 +427,6 @@ void __init setup_arch(char **cmdline_p) contig_initmem_init(0, end_pfn); #endif - early_res_to_bootmem(); - dma32_reserve_bootmem(); #ifdef CONFIG_ACPI_SLEEP @@ -465,6 +493,8 @@ void __init setup_arch(char **cmdline_p) init_apic_mappings(); ioapic_init_mappings(); + kvm_guest_init(); + /* * We trust e820 completely. No explicit ROM probing in memory. */ diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index f1b117930837..8e05e7f7bd40 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -413,16 +413,6 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, regs->ss = __USER_DS; regs->cs = __USER_CS; - /* - * Clear TF when entering the signal handler, but - * notify any tracer that was single-stepping it. - * The tracer may want to single-step inside the - * handler too. - */ - regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF); - if (test_thread_flag(TIF_SINGLESTEP)) - ptrace_notify(SIGTRAP); - return 0; give_sigsegv: @@ -501,16 +491,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, regs->ss = __USER_DS; regs->cs = __USER_CS; - /* - * Clear TF when entering the signal handler, but - * notify any tracer that was single-stepping it. - * The tracer may want to single-step inside the - * handler too. - */ - regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF); - if (test_thread_flag(TIF_SINGLESTEP)) - ptrace_notify(SIGTRAP); - return 0; give_sigsegv: @@ -566,6 +546,21 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, if (ret) return ret; + /* + * Clear the direction flag as per the ABI for function entry. + */ + regs->flags &= ~X86_EFLAGS_DF; + + /* + * Clear TF when entering the signal handler, but + * notify any tracer that was single-stepping it. + * The tracer may want to single-step inside the + * handler too. + */ + regs->flags &= ~X86_EFLAGS_TF; + if (test_thread_flag(TIF_SINGLESTEP)) + ptrace_notify(SIGTRAP); + spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); if (!(ka->sa.sa_flags & SA_NODEFER)) diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 827179c5b32a..ccb2a4560c2d 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -285,14 +285,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, even if the handler happens to be interrupting 32-bit code. */ regs->cs = __USER_CS; - /* This, by contrast, has nothing to do with segment registers - - see include/asm-x86_64/uaccess.h for details. */ - set_fs(USER_DS); - - regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF); - if (test_thread_flag(TIF_SINGLESTEP)) - ptrace_notify(SIGTRAP); - return 0; give_sigsegv: @@ -380,6 +372,28 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, ret = setup_rt_frame(sig, ka, info, oldset, regs); if (ret == 0) { + /* + * This has nothing to do with segment registers, + * despite the name. This magic affects uaccess.h + * macros' behavior. Reset it to the normal setting. + */ + set_fs(USER_DS); + + /* + * Clear the direction flag as per the ABI for function entry. + */ + regs->flags &= ~X86_EFLAGS_DF; + + /* + * Clear TF when entering the signal handler, but + * notify any tracer that was single-stepping it. + * The tracer may want to single-step inside the + * handler too. + */ + regs->flags &= ~X86_EFLAGS_TF; + if (test_thread_flag(TIF_SINGLESTEP)) + ptrace_notify(SIGTRAP); + spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); if (!(ka->sa.sa_flags & SA_NODEFER)) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index eef79e84145f..04c662ba18f1 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1058,7 +1058,7 @@ int __cpuinit native_cpu_up(unsigned int cpu) check_tsc_sync_source(cpu); local_irq_restore(flags); - while (!cpu_isset(cpu, cpu_online_map)) { + while (!cpu_online(cpu)) { cpu_relax(); touch_nmi_watchdog(); } @@ -1168,7 +1168,7 @@ static void __init smp_cpu_index_default(void) int i; struct cpuinfo_x86 *c; - for_each_cpu_mask(i, cpu_possible_map) { + for_each_possible_cpu(i) { c = &cpu_data(i); /* mark all to hotplug */ c->cpu_index = NR_CPUS; diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index 6878a9c2df5d..ae751094eba9 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -29,6 +29,7 @@ #include <linux/mm.h> #include <linux/init.h> #include <asm/io.h> +#include <asm/bios_ebda.h> #include <asm/mach-summit/mach_mpparse.h> static struct rio_table_hdr *rio_table_hdr __initdata; @@ -140,8 +141,8 @@ void __init setup_summit(void) int i, next_wpeg, next_bus = 0; /* The pointer to the EBDA is stored in the word @ phys 0x40E(40:0E) */ - ptr = *(unsigned short *)phys_to_virt(0x40Eul); - ptr = (unsigned long)phys_to_virt(ptr << 4); + ptr = get_bios_ebda(); + ptr = (unsigned long)phys_to_virt(ptr); rio_table_hdr = NULL; offset = 0x180; diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index df224a8774cb..a1f07d793202 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -195,9 +195,9 @@ static int __cpuinit init_smp_flush(void) { int i; - for_each_cpu_mask(i, cpu_possible_map) { + for_each_possible_cpu(i) spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock); - } + return 0; } core_initcall(init_smp_flush); diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S index 64580679861e..d8ccc3c6552f 100644 --- a/arch/x86/kernel/trampoline_32.S +++ b/arch/x86/kernel/trampoline_32.S @@ -33,7 +33,7 @@ /* We can free up trampoline after bootup if cpu hotplug is not supported. */ #ifndef CONFIG_HOTPLUG_CPU -.section ".init.data","aw",@progbits +.section ".cpuinit.data","aw",@progbits #else .section .rodata,"a",@progbits #endif diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 471e694d6713..bde6f63e15d5 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -602,7 +602,7 @@ DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) DO_ERROR(12, SIGBUS, "stack segment", stack_segment) DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0) -DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1) +DO_ERROR_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1) void __kprobes do_general_protection(struct pt_regs *regs, long error_code) { |