diff options
Diffstat (limited to 'arch/i386')
47 files changed, 587 insertions, 643 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index f7db71d0b913..8dfa3054f10f 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -53,6 +53,35 @@ source "init/Kconfig" menu "Processor type and features" +config SMP + bool "Symmetric multi-processing support" + ---help--- + This enables support for systems with more than one CPU. If you have + a system with only one CPU, like most personal computers, say N. If + you have a system with more than one CPU, say Y. + + If you say N here, the kernel will run on single and multiprocessor + machines, but will use only one CPU of a multiprocessor machine. If + you say Y here, the kernel will run on many, but not all, + singleprocessor machines. On a singleprocessor machine, the kernel + will run faster if you say N here. + + Note that if you say Y here and choose architecture "586" or + "Pentium" under "Processor family", the kernel will not work on 486 + architectures. Similarly, multiprocessor kernels for the "PPro" + architecture may not work on all Pentium based boards. + + People using multiprocessor machines who say Y here should also say + Y to "Enhanced Real Time Clock Support", below. The "Advanced Power + Management" code will be disabled if you say Y here. + + See also the <file:Documentation/smp.txt>, + <file:Documentation/i386/IO-APIC.txt>, + <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at + <http://www.tldp.org/docs.html#howto>. + + If you don't know what to do here, say N. + choice prompt "Subarchitecture Type" default X86_PC @@ -178,35 +207,6 @@ config HPET_EMULATE_RTC depends on HPET_TIMER && RTC=y default y -config SMP - bool "Symmetric multi-processing support" - ---help--- - This enables support for systems with more than one CPU. If you have - a system with only one CPU, like most personal computers, say N. If - you have a system with more than one CPU, say Y. - - If you say N here, the kernel will run on single and multiprocessor - machines, but will use only one CPU of a multiprocessor machine. If - you say Y here, the kernel will run on many, but not all, - singleprocessor machines. On a singleprocessor machine, the kernel - will run faster if you say N here. - - Note that if you say Y here and choose architecture "586" or - "Pentium" under "Processor family", the kernel will not work on 486 - architectures. Similarly, multiprocessor kernels for the "PPro" - architecture may not work on all Pentium based boards. - - People using multiprocessor machines who say Y here should also say - Y to "Enhanced Real Time Clock Support", below. The "Advanced Power - Management" code will be disabled if you say Y here. - - See also the <file:Documentation/smp.txt>, - <file:Documentation/i386/IO-APIC.txt>, - <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at - <http://www.tldp.org/docs.html#howto>. - - If you don't know what to do here, say N. - config NR_CPUS int "Maximum number of CPUs (2-255)" range 2 255 @@ -231,6 +231,15 @@ config SCHED_SMT cost of slightly increased overhead in some places. If unsure say N here. +config SCHED_MC + bool "Multi-core scheduler support" + depends on SMP + default y + help + Multi-core scheduler support improves the CPU scheduler's decision + making when dealing with multi-core CPU chips at a cost of slightly + increased overhead in some places. If unsure say N here. + source "kernel/Kconfig.preempt" config X86_UP_APIC @@ -458,7 +467,7 @@ endchoice choice depends on EXPERIMENTAL && !X86_PAE - prompt "Memory split" + prompt "Memory split" if EMBEDDED default VMSPLIT_3G help Select the desired split between kernel and user memory. @@ -513,6 +522,12 @@ config NUMA comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" depends on X86_SUMMIT && (!HIGHMEM64G || !ACPI) +config NODES_SHIFT + int + default "4" if X86_NUMAQ + default "3" + depends on NEED_MULTIPLE_NODES + config HAVE_ARCH_BOOTMEM_NODE bool depends on NUMA @@ -741,21 +756,12 @@ config PHYSICAL_START config HOTPLUG_CPU bool "Support for hot-pluggable CPUs (EXPERIMENTAL)" - depends on SMP && HOTPLUG && EXPERIMENTAL && !X86_VOYAGER && !X86_PC + depends on SMP && HOTPLUG && EXPERIMENTAL && !X86_VOYAGER ---help--- - Say Y here to experiment with turning CPUs off and on. CPUs - can be controlled through /sys/devices/system/cpu. - - Say N. + Say Y here to experiment with turning CPUs off and on, and to + enable suspend on SMP systems. CPUs can be controlled through + /sys/devices/system/cpu. -config DOUBLEFAULT - default y - bool "Enable doublefault exception handler" if EMBEDDED - help - This option allows trapping of rare doublefault exceptions that - would otherwise cause a system to silently reboot. Disabling this - option saves about 4k and might cause you much additional grey - hair. endmenu diff --git a/arch/i386/Kconfig.cpu b/arch/i386/Kconfig.cpu index 79603b3471f9..eb130482ba18 100644 --- a/arch/i386/Kconfig.cpu +++ b/arch/i386/Kconfig.cpu @@ -311,5 +311,5 @@ config X86_OOSTORE config X86_TSC bool - depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1) && !X86_NUMAQ + depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1 || MGEODE_LX) && !X86_NUMAQ default y diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug index 6e97df6979e8..c92191b1fb67 100644 --- a/arch/i386/Kconfig.debug +++ b/arch/i386/Kconfig.debug @@ -81,4 +81,13 @@ config X86_MPPARSE depends on X86_LOCAL_APIC && !X86_VISWS default y +config DOUBLEFAULT + default y + bool "Enable doublefault exception handler" if EMBEDDED + help + This option allows trapping of rare doublefault exceptions that + would otherwise cause a system to silently reboot. Disabling this + option saves about 4k and might cause you much additional grey + hair. + endmenu diff --git a/arch/i386/boot/video.S b/arch/i386/boot/video.S index 2ac40c8244c4..c9343c3a8082 100644 --- a/arch/i386/boot/video.S +++ b/arch/i386/boot/video.S @@ -97,6 +97,7 @@ #define PARAM_VESAPM_OFF 0x30 #define PARAM_LFB_PAGES 0x32 #define PARAM_VESA_ATTRIB 0x34 +#define PARAM_CAPABILITIES 0x36 /* Define DO_STORE according to CONFIG_VIDEO_RETAIN */ #ifdef CONFIG_VIDEO_RETAIN @@ -233,6 +234,10 @@ mopar_gr: movw 18(%di), %ax movl %eax, %fs:(PARAM_LFB_SIZE) +# store mode capabilities + movl 10(%di), %eax + movl %eax, %fs:(PARAM_CAPABILITIES) + # switching the DAC to 8-bit is for <= 8 bpp only movw %fs:(PARAM_LFB_DEPTH), %ax cmpw $8, %ax @@ -1924,6 +1929,7 @@ skip10: movb %ah, %al ret store_edid: +#ifdef CONFIG_FB_FIRMWARE_EDID pushw %es # just save all registers pushw %ax pushw %bx @@ -1954,6 +1960,7 @@ store_edid: popw %bx popw %ax popw %es +#endif ret # VIDEO_SELECT-only variables diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 5b9ed21216cf..96fb8a020af2 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -6,7 +6,7 @@ extra-y := head.o init_task.o vmlinux.lds obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ - pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \ + pci-dma.o i386_ksyms.o i387.o bootflag.o \ quirks.o i8237.o topology.o alternative.o obj-y += cpu/ diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index d0980c79a5d2..5ccbf58ec94f 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -621,9 +621,9 @@ extern u32 pmtmr_ioport; static int __init acpi_parse_fadt(unsigned long phys, unsigned long size) { - struct fadt_descriptor_rev2 *fadt = NULL; + struct fadt_descriptor *fadt = NULL; - fadt = (struct fadt_descriptor_rev2 *)__acpi_map_table(phys, size); + fadt = (struct fadt_descriptor *)__acpi_map_table(phys, size); if (!fadt) { printk(KERN_WARNING PREFIX "Unable to map FADT\n"); return 0; @@ -693,6 +693,9 @@ static int __init acpi_parse_madt_lapic_entries(void) { int count; + if (!cpu_has_apic) + return -ENODEV; + /* * Note that the LAPIC address is obtained from the MADT (32-bit value) * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value). diff --git a/arch/i386/kernel/acpi/earlyquirk.c b/arch/i386/kernel/acpi/earlyquirk.c index 2e3b643a4dc4..1649a175a206 100644 --- a/arch/i386/kernel/acpi/earlyquirk.c +++ b/arch/i386/kernel/acpi/earlyquirk.c @@ -5,17 +5,34 @@ #include <linux/init.h> #include <linux/kernel.h> #include <linux/pci.h> +#include <linux/acpi.h> + #include <asm/pci-direct.h> #include <asm/acpi.h> #include <asm/apic.h> +#ifdef CONFIG_ACPI + +static int nvidia_hpet_detected __initdata; + +static int __init nvidia_hpet_check(unsigned long phys, unsigned long size) +{ + nvidia_hpet_detected = 1; + return 0; +} +#endif + static int __init check_bridge(int vendor, int device) { #ifdef CONFIG_ACPI - /* According to Nvidia all timer overrides are bogus. Just ignore - them all. */ + /* According to Nvidia all timer overrides are bogus unless HPET + is enabled. */ if (vendor == PCI_VENDOR_ID_NVIDIA) { - acpi_skip_timer_override = 1; + nvidia_hpet_detected = 0; + acpi_table_parse(ACPI_HPET, nvidia_hpet_check); + if (nvidia_hpet_detected == 0) { + acpi_skip_timer_override = 1; + } } #endif if (vendor == PCI_VENDOR_ID_ATI && timer_over_8254 == 1) { diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index eb5279d23b7f..3d4b2f3d116a 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -62,6 +62,18 @@ int apic_verbosity; static void apic_pm_activate(void); +int modern_apic(void) +{ + unsigned int lvr, version; + /* AMD systems use old APIC versions, so check the CPU */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && + boot_cpu_data.x86 >= 0xf) + return 1; + lvr = apic_read(APIC_LVR); + version = GET_APIC_VERSION(lvr); + return version >= 0x14; +} + /* * 'what should we do if we get a hw irq event on an illegal vector'. * each architecture has to answer this themselves. @@ -119,10 +131,7 @@ void enable_NMI_through_LVT0 (void * dummy) int get_physical_broadcast(void) { - unsigned int lvr, version; - lvr = apic_read(APIC_LVR); - version = GET_APIC_VERSION(lvr); - if (!APIC_INTEGRATED(version) || version >= 0x14) + if (modern_apic()) return 0xff; else return 0xf; @@ -349,9 +358,9 @@ int __init verify_local_APIC(void) void __init sync_Arb_IDs(void) { - /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 */ - unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); - if (ver >= 0x14) /* P4 or higher */ + /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 + And not needed on AMD */ + if (modern_apic()) return; /* * Wait for idle. @@ -415,6 +424,7 @@ void __init init_bsp_APIC(void) void __devinit setup_local_APIC(void) { unsigned long oldvalue, value, ver, maxlvt; + int i, j; /* Pound the ESR really hard over the head with a big hammer - mbligh */ if (esr_disable) { @@ -452,6 +462,25 @@ void __devinit setup_local_APIC(void) apic_write_around(APIC_TASKPRI, value); /* + * After a crash, we no longer service the interrupts and a pending + * interrupt from previous kernel might still have ISR bit set. + * + * Most probably by now CPU has serviced that pending interrupt and + * it might not have done the ack_APIC_irq() because it thought, + * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it + * does not clear the ISR bit and cpu thinks it has already serivced + * the interrupt. Hence a vector might get locked. It was noticed + * for timer irq (vector 0x31). Issue an extra EOI to clear ISR. + */ + for (i = APIC_ISR_NR - 1; i >= 0; i--) { + value = apic_read(APIC_ISR + i*0x10); + for (j = 31; j >= 0; j--) { + if (value & (1<<j)) + ack_APIC_irq(); + } + } + + /* * Now that we are all set up, enable the APIC */ value = apic_read(APIC_SPIV); @@ -728,11 +757,7 @@ static int __init apic_set_verbosity(char *str) apic_verbosity = APIC_DEBUG; else if (strcmp("verbose", str) == 0) apic_verbosity = APIC_VERBOSE; - else - printk(KERN_WARNING "APIC Verbosity level %s not recognised" - " use apic=verbose or apic=debug\n", str); - - return 0; + return 1; } __setup("apic=", apic_set_verbosity); @@ -1316,6 +1341,14 @@ int __init APIC_init_uniprocessor (void) connect_bsp_APIC(); + /* + * Hack: In case of kdump, after a crash, kernel might be booting + * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid + * might be zero if read from MP tables. Get it from LAPIC. + */ +#ifdef CONFIG_CRASH_DUMP + boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); +#endif phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); setup_local_APIC(); diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index da30a374dd4e..df0e1745f189 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c @@ -1079,7 +1079,7 @@ static int apm_console_blank(int blank) break; } - if (error == APM_NOT_ENGAGED && state != APM_STATE_READY) { + if (error == APM_NOT_ENGAGED) { static int tried; int eng_error; if (tried++ == 0) { diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c index 0810f81f2a05..786d1a57048b 100644 --- a/arch/i386/kernel/cpu/amd.c +++ b/arch/i386/kernel/cpu/amd.c @@ -207,13 +207,13 @@ static void __init init_amd(struct cpuinfo_x86 *c) set_bit(X86_FEATURE_K7, c->x86_capability); break; } + if (c->x86 >= 6) + set_bit(X86_FEATURE_FXSAVE_LEAK, c->x86_capability); display_cacheinfo(c); if (cpuid_eax(0x80000000) >= 0x80000008) { c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; - if (c->x86_max_cores & (c->x86_max_cores - 1)) - c->x86_max_cores = 1; } if (cpuid_eax(0x80000000) >= 0x80000007) { diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 7e3d6b6a4e96..a06a49075f10 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -266,7 +266,7 @@ static void __init early_cpu_detect(void) void __cpuinit generic_identify(struct cpuinfo_x86 * c) { u32 tfms, xlvl; - int junk; + int ebx; if (have_cpuid_p()) { /* Get vendor name */ @@ -282,7 +282,7 @@ void __cpuinit generic_identify(struct cpuinfo_x86 * c) /* Intel-defined flags: level 0x00000001 */ if ( c->cpuid_level >= 0x00000001 ) { u32 capability, excap; - cpuid(0x00000001, &tfms, &junk, &excap, &capability); + cpuid(0x00000001, &tfms, &ebx, &excap, &capability); c->x86_capability[0] = capability; c->x86_capability[4] = excap; c->x86 = (tfms >> 8) & 15; @@ -292,6 +292,11 @@ void __cpuinit generic_identify(struct cpuinfo_x86 * c) if (c->x86 >= 0x6) c->x86_model += ((tfms >> 16) & 0xF) << 4; c->x86_mask = tfms & 15; +#ifdef CONFIG_SMP + c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); +#else + c->apicid = (ebx >> 24) & 0xFF; +#endif } else { /* Have CPUID level 0 only - unheard of */ c->x86 = 4; @@ -474,7 +479,6 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) cpuid(1, &eax, &ebx, &ecx, &edx); - c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) return; diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index 1e70823e1cb5..71fffa174425 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -46,7 +46,7 @@ #define PFX "powernow-k8: " #define BFX PFX "BIOS error: " -#define VERSION "version 1.60.1" +#define VERSION "version 1.60.2" #include "powernow-k8.h" /* serialize freq changes */ @@ -55,7 +55,7 @@ static DEFINE_MUTEX(fidvid_mutex); static struct powernow_k8_data *powernow_data[NR_CPUS]; #ifndef CONFIG_SMP -static cpumask_t cpu_core_map[1] = { CPU_MASK_ALL }; +static cpumask_t cpu_core_map[1]; #endif /* Return a frequency in MHz, given an input fid */ @@ -905,11 +905,17 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi { cpumask_t oldmask = CPU_MASK_ALL; struct powernow_k8_data *data = powernow_data[pol->cpu]; - u32 checkfid = data->currfid; - u32 checkvid = data->currvid; + u32 checkfid; + u32 checkvid; unsigned int newstate; int ret = -EIO; + if (!data) + return -EINVAL; + + checkfid = data->currfid; + checkvid = data->currvid; + /* only run on specific CPU from here on */ oldmask = current->cpus_allowed; set_cpus_allowed(current, cpumask_of_cpu(pol->cpu)); @@ -969,6 +975,9 @@ static int powernowk8_verify(struct cpufreq_policy *pol) { struct powernow_k8_data *data = powernow_data[pol->cpu]; + if (!data) + return -EINVAL; + return cpufreq_frequency_table_verify(pol, data->powernow_table); } @@ -977,7 +986,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) { struct powernow_k8_data *data; cpumask_t oldmask = CPU_MASK_ALL; - int rc, i; + int rc; if (!cpu_online(pol->cpu)) return -ENODEV; @@ -1063,8 +1072,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) printk("cpu_init done, current fid 0x%x, vid 0x%x\n", data->currfid, data->currvid); - for_each_cpu_mask(i, cpu_core_map[pol->cpu]) - powernow_data[i] = data; + powernow_data[pol->cpu] = data; return 0; @@ -1095,10 +1103,15 @@ static int __devexit powernowk8_cpu_exit (struct cpufreq_policy *pol) static unsigned int powernowk8_get (unsigned int cpu) { - struct powernow_k8_data *data = powernow_data[cpu]; + struct powernow_k8_data *data; cpumask_t oldmask = current->cpus_allowed; unsigned int khz = 0; + data = powernow_data[first_cpu(cpu_core_map[cpu])]; + + if (!data) + return -EINVAL; + set_cpus_allowed(current, cpumask_of_cpu(cpu)); if (smp_processor_id() != cpu) { printk(KERN_ERR PFX "limiting to CPU %d failed in powernowk8_get\n", cpu); diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h index 00ea899c17e1..79a7c5c87edc 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h @@ -182,10 +182,6 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid); static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index); -#ifndef for_each_cpu_mask -#define for_each_cpu_mask(i,mask) for (i=0;i<1;i++) -#endif - #ifdef CONFIG_SMP static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[]) { diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index ce61921369e5..c8547a6fa7e6 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -173,6 +173,10 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ + unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; +#ifdef CONFIG_SMP + unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data); +#endif if (c->cpuid_level > 3) { static int is_initialized; @@ -205,9 +209,15 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) break; case 2: new_l2 = this_leaf.size/1024; + num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; + index_msb = get_count_order(num_threads_sharing); + l2_id = c->apicid >> index_msb; break; case 3: new_l3 = this_leaf.size/1024; + num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; + index_msb = get_count_order(num_threads_sharing); + l3_id = c->apicid >> index_msb; break; default: break; @@ -215,11 +225,19 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) } } } - if (c->cpuid_level > 1) { + /* + * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for + * trace cache + */ + if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) { /* supports eax=2 call */ int i, j, n; int regs[4]; unsigned char *dp = (unsigned char *)regs; + int only_trace = 0; + + if (num_cache_leaves != 0 && c->x86 == 15) + only_trace = 1; /* Number of times to iterate */ n = cpuid_eax(2) & 0xFF; @@ -241,6 +259,8 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) while (cache_table[k].descriptor != 0) { if (cache_table[k].descriptor == des) { + if (only_trace && cache_table[k].cache_type != LVL_TRACE) + break; switch (cache_table[k].cache_type) { case LVL_1_INST: l1i += cache_table[k].size; @@ -266,34 +286,45 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) } } } + } - if (new_l1d) - l1d = new_l1d; + if (new_l1d) + l1d = new_l1d; - if (new_l1i) - l1i = new_l1i; + if (new_l1i) + l1i = new_l1i; - if (new_l2) - l2 = new_l2; + if (new_l2) { + l2 = new_l2; +#ifdef CONFIG_SMP + cpu_llc_id[cpu] = l2_id; +#endif + } - if (new_l3) - l3 = new_l3; + if (new_l3) { + l3 = new_l3; +#ifdef CONFIG_SMP + cpu_llc_id[cpu] = l3_id; +#endif + } - if ( trace ) - printk (KERN_INFO "CPU: Trace cache: %dK uops", trace); - else if ( l1i ) - printk (KERN_INFO "CPU: L1 I cache: %dK", l1i); - if ( l1d ) - printk(", L1 D cache: %dK\n", l1d); - else - printk("\n"); - if ( l2 ) - printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); - if ( l3 ) - printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); + if (trace) + printk (KERN_INFO "CPU: Trace cache: %dK uops", trace); + else if ( l1i ) + printk (KERN_INFO "CPU: L1 I cache: %dK", l1i); - c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); - } + if (l1d) + printk(", L1 D cache: %dK\n", l1d); + else + printk("\n"); + + if (l2) + printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); + + if (l3) + printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); + + c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); return l2; } @@ -611,7 +642,7 @@ static void __cpuexit cache_remove_dev(struct sys_device * sys_dev) return; } -static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb, +static int cacheinfo_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; diff --git a/arch/i386/kernel/cpu/mcheck/mce.c b/arch/i386/kernel/cpu/mcheck/mce.c index 6170af3c271a..afa0888f9a1e 100644 --- a/arch/i386/kernel/cpu/mcheck/mce.c +++ b/arch/i386/kernel/cpu/mcheck/mce.c @@ -64,13 +64,13 @@ void mcheck_init(struct cpuinfo_x86 *c) static int __init mcheck_disable(char *str) { mce_disabled = 1; - return 0; + return 1; } static int __init mcheck_enable(char *str) { mce_disabled = -1; - return 0; + return 1; } __setup("nomce", mcheck_disable); diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c index 006141d1c12a..1d9a4abcdfc7 100644 --- a/arch/i386/kernel/cpuid.c +++ b/arch/i386/kernel/cpuid.c @@ -168,7 +168,7 @@ static int cpuid_class_device_create(int i) return err; } -static int __devinit cpuid_class_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) +static int cpuid_class_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c index e3c5fca0aa8a..2b0cfce24a61 100644 --- a/arch/i386/kernel/crash.c +++ b/arch/i386/kernel/crash.c @@ -69,7 +69,7 @@ static void crash_save_this_cpu(struct pt_regs *regs, int cpu) * for the data I pass, and I need tags * on the data to indicate what information I have * squirrelled away. ELF notes happen to provide - * all of that that no need to invent something new. + * all of that, so there is no need to invent something new. */ buf = (u32*)per_cpu_ptr(crash_notes, cpu); if (!buf) diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c deleted file mode 100644 index 5efceebc48dc..000000000000 --- a/arch/i386/kernel/dmi_scan.c +++ /dev/null @@ -1,358 +0,0 @@ -#include <linux/types.h> -#include <linux/string.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/dmi.h> -#include <linux/efi.h> -#include <linux/bootmem.h> -#include <linux/slab.h> -#include <asm/dmi.h> - -static char * __init dmi_string(struct dmi_header *dm, u8 s) -{ - u8 *bp = ((u8 *) dm) + dm->length; - char *str = ""; - - if (s) { - s--; - while (s > 0 && *bp) { - bp += strlen(bp) + 1; - s--; - } - - if (*bp != 0) { - str = dmi_alloc(strlen(bp) + 1); - if (str != NULL) - strcpy(str, bp); - else - printk(KERN_ERR "dmi_string: out of memory.\n"); - } - } - - return str; -} - -/* - * We have to be cautious here. We have seen BIOSes with DMI pointers - * pointing to completely the wrong place for example - */ -static int __init dmi_table(u32 base, int len, int num, - void (*decode)(struct dmi_header *)) -{ - u8 *buf, *data; - int i = 0; - - buf = dmi_ioremap(base, len); - if (buf == NULL) - return -1; - - data = buf; - - /* - * Stop when we see all the items the table claimed to have - * OR we run off the end of the table (also happens) - */ - while ((i < num) && (data - buf + sizeof(struct dmi_header)) <= len) { - struct dmi_header *dm = (struct dmi_header *)data; - /* - * We want to know the total length (formated area and strings) - * before decoding to make sure we won't run off the table in - * dmi_decode or dmi_string - */ - data += dm->length; - while ((data - buf < len - 1) && (data[0] || data[1])) - data++; - if (data - buf < len - 1) - decode(dm); - data += 2; - i++; - } - dmi_iounmap(buf, len); - return 0; -} - -static int __init dmi_checksum(u8 *buf) -{ - u8 sum = 0; - int a; - - for (a = 0; a < 15; a++) - sum += buf[a]; - - return sum == 0; -} - -static char *dmi_ident[DMI_STRING_MAX]; -static LIST_HEAD(dmi_devices); - -/* - * Save a DMI string - */ -static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string) -{ - char *p, *d = (char*) dm; - - if (dmi_ident[slot]) - return; - - p = dmi_string(dm, d[string]); - if (p == NULL) - return; - - dmi_ident[slot] = p; -} - -static void __init dmi_save_devices(struct dmi_header *dm) -{ - int i, count = (dm->length - sizeof(struct dmi_header)) / 2; - struct dmi_device *dev; - - for (i = 0; i < count; i++) { - char *d = (char *)(dm + 1) + (i * 2); - - /* Skip disabled device */ - if ((*d & 0x80) == 0) - continue; - - dev = dmi_alloc(sizeof(*dev)); - if (!dev) { - printk(KERN_ERR "dmi_save_devices: out of memory.\n"); - break; - } - - dev->type = *d++ & 0x7f; - dev->name = dmi_string(dm, *d); - dev->device_data = NULL; - - list_add(&dev->list, &dmi_devices); - } -} - -static void __init dmi_save_ipmi_device(struct dmi_header *dm) -{ - struct dmi_device *dev; - void * data; - - data = dmi_alloc(dm->length); - if (data == NULL) { - printk(KERN_ERR "dmi_save_ipmi_device: out of memory.\n"); - return; - } - - memcpy(data, dm, dm->length); - - dev = dmi_alloc(sizeof(*dev)); - if (!dev) { - printk(KERN_ERR "dmi_save_ipmi_device: out of memory.\n"); - return; - } - - dev->type = DMI_DEV_TYPE_IPMI; - dev->name = "IPMI controller"; - dev->device_data = data; - - list_add(&dev->list, &dmi_devices); -} - -/* - * Process a DMI table entry. Right now all we care about are the BIOS - * and machine entries. For 2.5 we should pull the smbus controller info - * out of here. - */ -static void __init dmi_decode(struct dmi_header *dm) -{ - switch(dm->type) { - case 0: /* BIOS Information */ - dmi_save_ident(dm, DMI_BIOS_VENDOR, 4); - dmi_save_ident(dm, DMI_BIOS_VERSION, 5); - dmi_save_ident(dm, DMI_BIOS_DATE, 8); - break; - case 1: /* System Information */ - dmi_save_ident(dm, DMI_SYS_VENDOR, 4); - dmi_save_ident(dm, DMI_PRODUCT_NAME, 5); - dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6); - dmi_save_ident(dm, DMI_PRODUCT_SERIAL, 7); - break; - case 2: /* Base Board Information */ - dmi_save_ident(dm, DMI_BOARD_VENDOR, 4); - dmi_save_ident(dm, DMI_BOARD_NAME, 5); - dmi_save_ident(dm, DMI_BOARD_VERSION, 6); - break; - case 10: /* Onboard Devices Information */ - dmi_save_devices(dm); - break; - case 38: /* IPMI Device Information */ - dmi_save_ipmi_device(dm); - } -} - -static int __init dmi_present(char __iomem *p) -{ - u8 buf[15]; - memcpy_fromio(buf, p, 15); - if ((memcmp(buf, "_DMI_", 5) == 0) && dmi_checksum(buf)) { - u16 num = (buf[13] << 8) | buf[12]; - u16 len = (buf[7] << 8) | buf[6]; - u32 base = (buf[11] << 24) | (buf[10] << 16) | - (buf[9] << 8) | buf[8]; - - /* - * DMI version 0.0 means that the real version is taken from - * the SMBIOS version, which we don't know at this point. - */ - if (buf[14] != 0) - printk(KERN_INFO "DMI %d.%d present.\n", - buf[14] >> 4, buf[14] & 0xF); - else - printk(KERN_INFO "DMI present.\n"); - if (dmi_table(base,len, num, dmi_decode) == 0) - return 0; - } - return 1; -} - -void __init dmi_scan_machine(void) -{ - char __iomem *p, *q; - int rc; - - if (efi_enabled) { - if (efi.smbios == EFI_INVALID_TABLE_ADDR) - goto out; - - /* This is called as a core_initcall() because it isn't - * needed during early boot. This also means we can - * iounmap the space when we're done with it. - */ - p = dmi_ioremap(efi.smbios, 32); - if (p == NULL) - goto out; - - rc = dmi_present(p + 0x10); /* offset of _DMI_ string */ - dmi_iounmap(p, 32); - if (!rc) - return; - } - else { - /* - * no iounmap() for that ioremap(); it would be a no-op, but - * it's so early in setup that sucker gets confused into doing - * what it shouldn't if we actually call it. - */ - p = dmi_ioremap(0xF0000, 0x10000); - if (p == NULL) - goto out; - - for (q = p; q < p + 0x10000; q += 16) { - rc = dmi_present(q); - if (!rc) - return; - } - } - out: printk(KERN_INFO "DMI not present or invalid.\n"); -} - -/** - * dmi_check_system - check system DMI data - * @list: array of dmi_system_id structures to match against - * - * Walk the blacklist table running matching functions until someone - * returns non zero or we hit the end. Callback function is called for - * each successfull match. Returns the number of matches. - */ -int dmi_check_system(struct dmi_system_id *list) -{ - int i, count = 0; - struct dmi_system_id *d = list; - - while (d->ident) { - for (i = 0; i < ARRAY_SIZE(d->matches); i++) { - int s = d->matches[i].slot; - if (s == DMI_NONE) - continue; - if (dmi_ident[s] && strstr(dmi_ident[s], d->matches[i].substr)) - continue; - /* No match */ - goto fail; - } - count++; - if (d->callback && d->callback(d)) - break; -fail: d++; - } - - return count; -} -EXPORT_SYMBOL(dmi_check_system); - -/** - * dmi_get_system_info - return DMI data value - * @field: data index (see enum dmi_filed) - * - * Returns one DMI data value, can be used to perform - * complex DMI data checks. - */ -char *dmi_get_system_info(int field) -{ - return dmi_ident[field]; -} -EXPORT_SYMBOL(dmi_get_system_info); - -/** - * dmi_find_device - find onboard device by type/name - * @type: device type or %DMI_DEV_TYPE_ANY to match all device types - * @desc: device name string or %NULL to match all - * @from: previous device found in search, or %NULL for new search. - * - * Iterates through the list of known onboard devices. If a device is - * found with a matching @vendor and @device, a pointer to its device - * structure is returned. Otherwise, %NULL is returned. - * A new search is initiated by passing %NULL to the @from argument. - * If @from is not %NULL, searches continue from next device. - */ -struct dmi_device * dmi_find_device(int type, const char *name, - struct dmi_device *from) -{ - struct list_head *d, *head = from ? &from->list : &dmi_devices; - - for(d = head->next; d != &dmi_devices; d = d->next) { - struct dmi_device *dev = list_entry(d, struct dmi_device, list); - - if (((type == DMI_DEV_TYPE_ANY) || (dev->type == type)) && - ((name == NULL) || (strcmp(dev->name, name) == 0))) - return dev; - } - - return NULL; -} -EXPORT_SYMBOL(dmi_find_device); - -/** - * dmi_get_year - Return year of a DMI date - * @field: data index (like dmi_get_system_info) - * - * Returns -1 when the field doesn't exist. 0 when it is broken. - */ -int dmi_get_year(int field) -{ - int year; - char *s = dmi_get_system_info(field); - - if (!s) - return -1; - if (*s == '\0') - return 0; - s = strrchr(s, '/'); - if (!s) - return 0; - - s += 1; - year = simple_strtoul(s, NULL, 0); - if (year && year < 100) { /* 2-digit year */ - year += 1900; - if (year < 1996) /* no dates < spec 1.0 */ - year += 100; - } - - return year; -} diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c index 055325056a74..036a9857936f 100644 --- a/arch/i386/kernel/i386_ksyms.c +++ b/arch/i386/kernel/i386_ksyms.c @@ -19,7 +19,6 @@ EXPORT_SYMBOL(__put_user_2); EXPORT_SYMBOL(__put_user_4); EXPORT_SYMBOL(__put_user_8); -EXPORT_SYMBOL(strpbrk); EXPORT_SYMBOL(strstr); #ifdef CONFIG_SMP diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 311b4e7266f1..d70f2ade5cde 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -381,7 +381,7 @@ static void do_irq_balance(void) unsigned long imbalance = 0; cpumask_t allowed_mask, target_cpu_mask, tmp; - for_each_cpu(i) { + for_each_possible_cpu(i) { int package_index; CPU_IRQ(i) = 0; if (!cpu_online(i)) @@ -632,7 +632,7 @@ static int __init balanced_irq_init(void) else printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); failed: - for_each_cpu(i) { + for_each_possible_cpu(i) { kfree(irq_cpu_data[i].irq_delta); irq_cpu_data[i].irq_delta = NULL; kfree(irq_cpu_data[i].last_irq); @@ -644,7 +644,7 @@ failed: int __init irqbalance_disable(char *str) { irqbalance_disabled = 1; - return 0; + return 1; } __setup("noirqbalance", irqbalance_disable); @@ -2238,6 +2238,8 @@ static inline void unlock_ExtINT_logic(void) spin_unlock_irqrestore(&ioapic_lock, flags); } +int timer_uses_ioapic_pin_0; + /* * This code may look a bit paranoid, but it's supposed to cooperate with * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ @@ -2274,6 +2276,9 @@ static inline void check_timer(void) pin2 = ioapic_i8259.pin; apic2 = ioapic_i8259.apic; + if (pin1 == 0) + timer_uses_ioapic_pin_0 = 1; + printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", vector, apic1, pin1, apic2, pin2); diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index f19768789e8a..38806f427849 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -43,7 +43,7 @@ DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); /* insert a jmp code */ -static inline void set_jmp_op(void *from, void *to) +static __always_inline void set_jmp_op(void *from, void *to) { struct __arch_jmp_op { char op; @@ -57,7 +57,7 @@ static inline void set_jmp_op(void *from, void *to) /* * returns non-zero if opcodes can be boosted. */ -static inline int can_boost(kprobe_opcode_t opcode) +static __always_inline int can_boost(kprobe_opcode_t opcode) { switch (opcode & 0xf0 ) { case 0x70: @@ -88,7 +88,7 @@ static inline int can_boost(kprobe_opcode_t opcode) /* * returns non-zero if opcode modifies the interrupt flag. */ -static inline int is_IF_modifier(kprobe_opcode_t opcode) +static int __kprobes is_IF_modifier(kprobe_opcode_t opcode) { switch (opcode) { case 0xfa: /* cli */ @@ -138,7 +138,7 @@ void __kprobes arch_remove_kprobe(struct kprobe *p) mutex_unlock(&kprobe_mutex); } -static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) +static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) { kcb->prev_kprobe.kp = kprobe_running(); kcb->prev_kprobe.status = kcb->kprobe_status; @@ -146,7 +146,7 @@ static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) kcb->prev_kprobe.saved_eflags = kcb->kprobe_saved_eflags; } -static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb) +static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) { __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; kcb->kprobe_status = kcb->prev_kprobe.status; @@ -154,7 +154,7 @@ static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb) kcb->kprobe_saved_eflags = kcb->prev_kprobe.saved_eflags; } -static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs, +static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { __get_cpu_var(current_kprobe) = p; @@ -164,7 +164,7 @@ static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs, kcb->kprobe_saved_eflags &= ~IF_MASK; } -static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) +static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) { regs->eflags |= TF_MASK; regs->eflags &= ~IF_MASK; @@ -242,10 +242,6 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) kcb->kprobe_status = KPROBE_REENTER; return 1; } else { - if (regs->eflags & VM_MASK) { - /* We are in virtual-8086 mode. Return 0 */ - goto no_kprobe; - } if (*addr != BREAKPOINT_INSTRUCTION) { /* The breakpoint instruction was removed by * another cpu right after we hit, no further @@ -265,11 +261,6 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) p = get_kprobe(addr); if (!p) { - if (regs->eflags & VM_MASK) { - /* We are in virtual-8086 mode. Return 0 */ - goto no_kprobe; - } - if (*addr != BREAKPOINT_INSTRUCTION) { /* * The breakpoint instruction was removed right @@ -452,10 +443,11 @@ static void __kprobes resume_execution(struct kprobe *p, *tos &= ~(TF_MASK | IF_MASK); *tos |= kcb->kprobe_old_eflags; break; - case 0xc3: /* ret/lret */ - case 0xcb: - case 0xc2: + case 0xc2: /* iret/ret/lret */ + case 0xc3: case 0xca: + case 0xcb: + case 0xcf: case 0xea: /* jmp absolute -- eip is correct */ /* eip is already adjusted, no more changes required */ p->ainsn.boostable = 1; @@ -463,10 +455,13 @@ static void __kprobes resume_execution(struct kprobe *p, case 0xe8: /* call relative - Fix return addr */ *tos = orig_eip + (*tos - copy_eip); break; + case 0x9a: /* call absolute -- same as call absolute, indirect */ + *tos = orig_eip + (*tos - copy_eip); + goto no_change; case 0xff: if ((p->ainsn.insn[1] & 0x30) == 0x10) { - /* call absolute, indirect */ /* + * call absolute, indirect * Fix return addr; eip is correct. * But this is not boostable */ @@ -507,7 +502,7 @@ no_change: * Interrupts are disabled on entry as trap1 is an interrupt gate and they * remain disabled thoroughout this function. */ -static inline int post_kprobe_handler(struct pt_regs *regs) +static int __kprobes post_kprobe_handler(struct pt_regs *regs) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); @@ -543,7 +538,7 @@ out: return 1; } -static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) +static int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c index dd780a00553f..e7c138f66c5a 100644 --- a/arch/i386/kernel/microcode.c +++ b/arch/i386/kernel/microcode.c @@ -459,26 +459,9 @@ static ssize_t microcode_write (struct file *file, const char __user *buf, size_ return ret; } -static int microcode_ioctl (struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) -{ - switch (cmd) { - /* - * XXX: will be removed after microcode_ctl - * is updated to ignore failure of this ioctl() - */ - case MICROCODE_IOCFREE: - return 0; - default: - return -EINVAL; - } - return -EINVAL; -} - static struct file_operations microcode_fops = { .owner = THIS_MODULE, .write = microcode_write, - .ioctl = microcode_ioctl, .open = microcode_open, }; diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index 8d8aa9d1796d..6b1392d33ed5 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -38,12 +38,6 @@ int smp_found_config; unsigned int __initdata maxcpus = NR_CPUS; -#ifdef CONFIG_HOTPLUG_CPU -#define CPU_HOTPLUG_ENABLED (1) -#else -#define CPU_HOTPLUG_ENABLED (0) -#endif - /* * Various Linux-internal data structures created from the * MP-table. @@ -110,21 +104,6 @@ static int __init mpf_checksum(unsigned char *mp, int len) static int mpc_record; static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __initdata; -#ifdef CONFIG_X86_NUMAQ -static int MP_valid_apicid(int apicid, int version) -{ - return hweight_long(apicid & 0xf) == 1 && (apicid >> 4) != 0xf; -} -#else -static int MP_valid_apicid(int apicid, int version) -{ - if (version >= 0x14) - return apicid < 0xff; - else - return apicid < 0xf; -} -#endif - static void __devinit MP_processor_info (struct mpc_config_processor *m) { int ver, apicid; @@ -190,12 +169,6 @@ static void __devinit MP_processor_info (struct mpc_config_processor *m) ver = m->mpc_apicver; - if (!MP_valid_apicid(apicid, ver)) { - printk(KERN_WARNING "Processor #%d INVALID. (Max ID: %d).\n", - m->mpc_apicid, MAX_APICS); - return; - } - /* * Validate version */ @@ -225,7 +198,14 @@ static void __devinit MP_processor_info (struct mpc_config_processor *m) cpu_set(num_processors, cpu_possible_map); num_processors++; - if (CPU_HOTPLUG_ENABLED || (num_processors > 8)) { + /* + * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y + * but we need to work other dependencies like SMP_SUSPEND etc + * before this can be done without some confusion. + * if (CPU_HOTPLUG_ENABLED || num_processors > 8) + * - Ashok Raj <ashok.raj@intel.com> + */ + if (num_processors > 8) { switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_INTEL: if (!APIC_XAPIC(ver)) { @@ -249,6 +229,13 @@ static void __init MP_bus_info (struct mpc_config_bus *m) mpc_oem_bus_info(m, str, translation_table[mpc_record]); + if (m->mpc_busid >= MAX_MP_BUSSES) { + printk(KERN_WARNING "MP table busid value (%d) for bustype %s " + " is too large, max. supported is %d\n", + m->mpc_busid, str, MAX_MP_BUSSES - 1); + return; + } + if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) { mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) { @@ -1143,7 +1130,17 @@ int mp_register_gsi (u32 gsi, int triggering, int polarity) */ int irq = gsi; if (gsi < MAX_GSI_NUM) { - if (gsi > 15) + /* + * Retain the VIA chipset work-around (gsi > 15), but + * avoid a problem where the 8254 timer (IRQ0) is setup + * via an override (so it's not on pin 0 of the ioapic), + * and at the same time, the pin 0 interrupt is a PCI + * type. The gsi > 15 test could cause these two pins + * to be shared as IRQ0, and they are not shareable. + * So test for this condition, and if necessary, avoid + * the pin collision. + */ + if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0)) gsi = pci_irq++; /* * Don't assign IRQ used by ACPI SCI diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c index 1d0a55e68760..7a328230e540 100644 --- a/arch/i386/kernel/msr.c +++ b/arch/i386/kernel/msr.c @@ -251,7 +251,7 @@ static int msr_class_device_create(int i) return err; } -static int __devinit msr_class_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) +static int msr_class_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 9074818b9473..d43b498ec745 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -138,12 +138,12 @@ static int __init check_nmi_watchdog(void) if (nmi_watchdog == NMI_LOCAL_APIC) smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); - for_each_cpu(cpu) + for_each_possible_cpu(cpu) prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count; local_irq_enable(); mdelay((10*1000)/nmi_hz); // wait 10 ticks - for_each_cpu(cpu) { + for_each_possible_cpu(cpu) { #ifdef CONFIG_SMP /* Check cpu_callin_map here because that is set after the timer is started. */ @@ -510,7 +510,7 @@ void touch_nmi_watchdog (void) * Just reset the alert counters, (other CPUs might be * spinning on locks we hold): */ - for_each_cpu(i) + for_each_possible_cpu(i) alert_counter[i] = 0; /* @@ -529,7 +529,8 @@ void nmi_watchdog_tick (struct pt_regs * regs) * always switch the stack NMI-atomically, it's safe to use * smp_processor_id(). */ - int sum, cpu = smp_processor_id(); + unsigned int sum; + int cpu = smp_processor_id(); sum = per_cpu(irq_stat, cpu).apic_timer_irqs; diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 24b3e745478b..6259afea46d1 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -781,7 +781,6 @@ unsigned long get_wchan(struct task_struct *p) } while (count++ < 16); return 0; } -EXPORT_SYMBOL(get_wchan); /* * sys_alloc_thread_area: get a yet unused TLS descriptor index. diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index 506462ef36a0..fd7eaf7866e0 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -671,7 +671,7 @@ int do_syscall_trace(struct pt_regs *regs, int entryexit) if (unlikely(current->audit_context)) { if (entryexit) - audit_syscall_exit(current, AUDITSC_RESULT(regs->eax), + audit_syscall_exit(AUDITSC_RESULT(regs->eax), regs->eax); /* Debug traps, when using PTRACE_SINGLESTEP, must be sent only * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is @@ -720,14 +720,13 @@ int do_syscall_trace(struct pt_regs *regs, int entryexit) ret = is_sysemu; out: if (unlikely(current->audit_context) && !entryexit) - audit_syscall_entry(current, AUDIT_ARCH_I386, regs->orig_eax, + audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_eax, regs->ebx, regs->ecx, regs->edx, regs->esi); if (ret == 0) return 0; regs->orig_eax = -1; /* force skip of syscall restarting */ if (unlikely(current->audit_context)) - audit_syscall_exit(current, AUDITSC_RESULT(regs->eax), - regs->eax); + audit_syscall_exit(AUDITSC_RESULT(regs->eax), regs->eax); return 1; } diff --git a/arch/i386/kernel/reboot_fixups.c b/arch/i386/kernel/reboot_fixups.c index 10e21a4773dd..99aab41a05b0 100644 --- a/arch/i386/kernel/reboot_fixups.c +++ b/arch/i386/kernel/reboot_fixups.c @@ -51,7 +51,5 @@ void mach_reboot_fixups(void) cur->reboot_fixup(dev); } - - printk(KERN_WARNING "No reboot fixup found for your hardware\n"); } diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 6917daa159ab..dd6b0e3386ce 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -34,6 +34,7 @@ #include <linux/initrd.h> #include <linux/bootmem.h> #include <linux/seq_file.h> +#include <linux/platform_device.h> #include <linux/console.h> #include <linux/mca.h> #include <linux/root_dev.h> @@ -46,6 +47,7 @@ #include <linux/kexec.h> #include <linux/crash_dump.h> #include <linux/dmi.h> +#include <linux/pfn.h> #include <video/edid.h> @@ -961,6 +963,38 @@ efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg) return 0; } + /* + * This function checks if the entire range <start,end> is mapped with type. + * + * Note: this function only works correct if the e820 table is sorted and + * not-overlapping, which is the case + */ +int __init +e820_all_mapped(unsigned long s, unsigned long e, unsigned type) +{ + u64 start = s; + u64 end = e; + int i; + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + if (type && ei->type != type) + continue; + /* is the region (part) in overlap with the current region ?*/ + if (ei->addr >= end || ei->addr + ei->size <= start) + continue; + /* if the region is at the beginning of <start,end> we move + * start to the end of the region since it's ok until there + */ + if (ei->addr <= start) + start = ei->addr + ei->size; + /* if start is now at or beyond end, we're done, full + * coverage */ + if (start >= end) + return 1; /* we're done */ + } + return 0; +} + /* * Find the highest page frame number we have available */ @@ -1286,6 +1320,8 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat probe_roms(); for (i = 0; i < e820.nr_map; i++) { struct resource *res; + if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL) + continue; res = kzalloc(sizeof(struct resource), GFP_ATOMIC); switch (e820.map[i].type) { case E820_RAM: res->name = "System RAM"; break; @@ -1315,8 +1351,8 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat /* * Request address space for all standard resources * - * This is called just before pcibios_assign_resources(), which is also - * an fs_initcall, but is linked in later (in arch/i386/pci/i386.c). + * This is called just before pcibios_init(), which is also a + * subsys_initcall, but is linked in later (in arch/i386/pci/common.c). */ static int __init request_standard_resources(void) { @@ -1337,7 +1373,7 @@ static int __init request_standard_resources(void) return 0; } -fs_initcall(request_standard_resources); +subsys_initcall(request_standard_resources); static void __init register_memory(void) { @@ -1511,15 +1547,18 @@ void __init setup_arch(char **cmdline_p) if (efi_enabled) efi_map_memmap(); -#ifdef CONFIG_X86_IO_APIC - check_acpi_pci(); /* Checks more than just ACPI actually */ -#endif - #ifdef CONFIG_ACPI /* * Parse the ACPI tables for possible boot-time SMP configuration. */ acpi_boot_table_init(); +#endif + +#ifdef CONFIG_X86_IO_APIC + check_acpi_pci(); /* Checks more than just ACPI actually */ +#endif + +#ifdef CONFIG_ACPI acpi_boot_init(); #if defined(CONFIG_SMP) && defined(CONFIG_X86_PC) @@ -1546,6 +1585,23 @@ void __init setup_arch(char **cmdline_p) #endif } +static __init int add_pcspkr(void) +{ + struct platform_device *pd; + int ret; + + pd = platform_device_alloc("pcspkr", -1); + if (!pd) + return -ENOMEM; + + ret = platform_device_add(pd); + if (ret) + platform_device_put(pd); + + return ret; +} +device_initcall(add_pcspkr); + #include "setup_arch_post.h" /* * Local Variables: diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 82371d83bfa9..825b2b4ca721 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -72,6 +72,9 @@ int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; /* Core ID of each logical CPU */ int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; +/* Last level cache ID of each logical CPU */ +int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; + /* representing HT siblings of each logical CPU */ cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_sibling_map); @@ -310,7 +313,9 @@ static void __init synchronize_tsc_bp (void) if (tsc_values[i] < avg) realdelta = -realdelta; - printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta); + if (realdelta > 0) + printk(KERN_INFO "CPU#%d had %ld usecs TSC " + "skew, fixed it up.\n", i, realdelta); } sum += delta; @@ -440,6 +445,18 @@ static void __devinit smp_callin(void) static int cpucount; +/* maps the cpu to the sched domain representing multi-core */ +cpumask_t cpu_coregroup_map(int cpu) +{ + struct cpuinfo_x86 *c = cpu_data + cpu; + /* + * For perf, we return last level cache shared map. + * TBD: when power saving sched policy is added, we will return + * cpu_core_map when power saving policy is enabled + */ + return c->llc_shared_map; +} + /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; @@ -459,12 +476,16 @@ set_cpu_sibling_map(int cpu) cpu_set(cpu, cpu_sibling_map[i]); cpu_set(i, cpu_core_map[cpu]); cpu_set(cpu, cpu_core_map[i]); + cpu_set(i, c[cpu].llc_shared_map); + cpu_set(cpu, c[i].llc_shared_map); } } } else { cpu_set(cpu, cpu_sibling_map[cpu]); } + cpu_set(cpu, c[cpu].llc_shared_map); + if (current_cpu_data.x86_max_cores == 1) { cpu_core_map[cpu] = cpu_sibling_map[cpu]; c[cpu].booted_cores = 1; @@ -472,6 +493,11 @@ set_cpu_sibling_map(int cpu) } for_each_cpu_mask(i, cpu_sibling_setup_map) { + if (cpu_llc_id[cpu] != BAD_APICID && + cpu_llc_id[cpu] == cpu_llc_id[i]) { + cpu_set(i, c[cpu].llc_shared_map); + cpu_set(cpu, c[i].llc_shared_map); + } if (phys_proc_id[cpu] == phys_proc_id[i]) { cpu_set(i, cpu_core_map[cpu]); cpu_set(cpu, cpu_core_map[i]); diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S index ac687d00a1ce..af56987f69b0 100644 --- a/arch/i386/kernel/syscall_table.S +++ b/arch/i386/kernel/syscall_table.S @@ -310,3 +310,9 @@ ENTRY(sys_call_table) .long sys_pselect6 .long sys_ppoll .long sys_unshare /* 310 */ + .long sys_set_robust_list + .long sys_get_robust_list + .long sys_splice + .long sys_sync_file_range + .long sys_tee /* 315 */ + .long sys_vmsplice diff --git a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c index 264edaaac315..144e94a04933 100644 --- a/arch/i386/kernel/timers/timer_pm.c +++ b/arch/i386/kernel/timers/timer_pm.c @@ -15,6 +15,7 @@ #include <linux/module.h> #include <linux/device.h> #include <linux/init.h> +#include <linux/pci.h> #include <asm/types.h> #include <asm/timer.h> #include <asm/smp.h> @@ -45,24 +46,31 @@ static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; #define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ +static int pmtmr_need_workaround __read_mostly = 1; + /*helper function to safely read acpi pm timesource*/ static inline u32 read_pmtmr(void) { - u32 v1=0,v2=0,v3=0; - /* It has been reported that because of various broken - * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time - * source is not latched, so you must read it multiple - * times to insure a safe value is read. - */ - do { - v1 = inl(pmtmr_ioport); - v2 = inl(pmtmr_ioport); - v3 = inl(pmtmr_ioport); - } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1) - || (v3 > v1 && v3 < v2)); - - /* mask the output to 24 bits */ - return v2 & ACPI_PM_MASK; + if (pmtmr_need_workaround) { + u32 v1, v2, v3; + + /* It has been reported that because of various broken + * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time + * source is not latched, so you must read it multiple + * times to insure a safe value is read. + */ + do { + v1 = inl(pmtmr_ioport); + v2 = inl(pmtmr_ioport); + v3 = inl(pmtmr_ioport); + } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1) + || (v3 > v1 && v3 < v2)); + + /* mask the output to 24 bits */ + return v2 & ACPI_PM_MASK; + } + + return inl(pmtmr_ioport) & ACPI_PM_MASK; } @@ -263,6 +271,72 @@ struct init_timer_opts __initdata timer_pmtmr_init = { .opts = &timer_pmtmr, }; +#ifdef CONFIG_PCI +/* + * PIIX4 Errata: + * + * The power management timer may return improper results when read. + * Although the timer value settles properly after incrementing, + * while incrementing there is a 3 ns window every 69.8 ns where the + * timer value is indeterminate (a 4.2% chance that the data will be + * incorrect when read). As a result, the ACPI free running count up + * timer specification is violated due to erroneous reads. + */ +static int __init pmtmr_bug_check(void) +{ + static struct pci_device_id gray_list[] __initdata = { + /* these chipsets may have bug. */ + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_82801DB_0) }, + { }, + }; + struct pci_dev *dev; + int pmtmr_has_bug = 0; + u8 rev; + + if (cur_timer != &timer_pmtmr || !pmtmr_need_workaround) + return 0; + + dev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_82371AB_3, NULL); + if (dev) { + pci_read_config_byte(dev, PCI_REVISION_ID, &rev); + /* the bug has been fixed in PIIX4M */ + if (rev < 3) { + printk(KERN_WARNING "* Found PM-Timer Bug on this " + "chipset. Due to workarounds for a bug,\n" + "* this time source is slow. Consider trying " + "other time sources (clock=)\n"); + pmtmr_has_bug = 1; + } + pci_dev_put(dev); + } + + if (pci_dev_present(gray_list)) { + printk(KERN_WARNING "* This chipset may have PM-Timer Bug. Due" + " to workarounds for a bug,\n" + "* this time source is slow. If you are sure your timer" + " does not have\n" + "* this bug, please use \"pmtmr_good\" to disable the " + "workaround\n"); + pmtmr_has_bug = 1; + } + + if (!pmtmr_has_bug) + pmtmr_need_workaround = 0; + + return 0; +} +device_initcall(pmtmr_bug_check); +#endif + +static int __init pmtr_good_setup(char *__str) +{ + pmtmr_need_workaround = 0; + return 1; +} +__setup("pmtmr_good", pmtr_good_setup); + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>"); MODULE_DESCRIPTION("Power Management Timer (PMTMR) as primary timing source for x86"); diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c index 5e41ee29c8cf..f1187ddb0d0f 100644 --- a/arch/i386/kernel/timers/timer_tsc.c +++ b/arch/i386/kernel/timers/timer_tsc.c @@ -279,7 +279,7 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, { struct cpufreq_freqs *freq = data; - if (val != CPUFREQ_RESUMECHANGE) + if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE) write_seqlock_irq(&xtime_lock); if (!ref_freq) { if (!freq->old){ @@ -312,7 +312,7 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, } end: - if (val != CPUFREQ_RESUMECHANGE) + if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE) write_sequnlock_irq(&xtime_lock); return 0; diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 4624f8ca2459..0e498369f35e 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -92,22 +92,21 @@ asmlinkage void spurious_interrupt_bug(void); asmlinkage void machine_check(void); static int kstack_depth_to_print = 24; -struct notifier_block *i386die_chain; -static DEFINE_SPINLOCK(die_notifier_lock); +ATOMIC_NOTIFIER_HEAD(i386die_chain); int register_die_notifier(struct notifier_block *nb) { - int err = 0; - unsigned long flags; - vmalloc_sync_all(); - spin_lock_irqsave(&die_notifier_lock, flags); - err = notifier_chain_register(&i386die_chain, nb); - spin_unlock_irqrestore(&die_notifier_lock, flags); - return err; + return atomic_notifier_chain_register(&i386die_chain, nb); } EXPORT_SYMBOL(register_die_notifier); +int unregister_die_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&i386die_chain, nb); +} +EXPORT_SYMBOL(unregister_die_notifier); + static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) { return p > (void *)tinfo && @@ -131,9 +130,8 @@ static inline int print_addr_and_symbol(unsigned long addr, char *log_lvl, print_symbol("%s", addr); printed = (printed + 1) % CONFIG_STACK_BACKTRACE_COLS; - if (printed) - printk(" "); + printk(" "); else printk("\n"); @@ -213,7 +211,6 @@ static void show_stack_log_lvl(struct task_struct *task, unsigned long *esp, } stack = esp; - printk(log_lvl); for(i = 0; i < kstack_depth_to_print; i++) { if (kstack_end(stack)) break; @@ -366,6 +363,9 @@ void die(const char * str, struct pt_regs * regs, long err) if (++die.lock_owner_depth < 3) { int nl = 0; + unsigned long esp; + unsigned short ss; + handle_BUG(regs); printk(KERN_EMERG "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); #ifdef CONFIG_PREEMPT @@ -388,8 +388,19 @@ void die(const char * str, struct pt_regs * regs, long err) printk("\n"); if (notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV) != - NOTIFY_STOP) + NOTIFY_STOP) { show_registers(regs); + /* Executive summary in case the oops scrolled away */ + esp = (unsigned long) (®s->esp); + savesegment(ss, ss); + if (user_mode(regs)) { + esp = regs->esp; + ss = regs->xss & 0xffff; + } + printk(KERN_EMERG "EIP: [<%08lx>] ", regs->eip); + print_symbol("%s", regs->eip); + printk(" SS:ESP %04x:%08lx\n", ss, esp); + } else regs = NULL; } else @@ -1194,6 +1205,6 @@ void __init trap_init(void) static int __init kstack_setup(char *s) { kstack_depth_to_print = simple_strtoul(s, NULL, 0); - return 0; + return 1; } __setup("kstack=", kstack_setup); diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c index aee14fafd13d..00e0118e717c 100644 --- a/arch/i386/kernel/vm86.c +++ b/arch/i386/kernel/vm86.c @@ -312,7 +312,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk /*call audit_syscall_exit since we do not exit via the normal paths */ if (unlikely(current->audit_context)) - audit_syscall_exit(current, AUDITSC_RESULT(eax), eax); + audit_syscall_exit(AUDITSC_RESULT(eax), eax); __asm__ __volatile__( "movl %0,%%esp\n\t" diff --git a/arch/i386/kernel/vsyscall-sigreturn.S b/arch/i386/kernel/vsyscall-sigreturn.S index fadb5bc3c374..a92262f41659 100644 --- a/arch/i386/kernel/vsyscall-sigreturn.S +++ b/arch/i386/kernel/vsyscall-sigreturn.S @@ -44,7 +44,7 @@ __kernel_rt_sigreturn: .LSTARTCIEDLSI1: .long 0 /* CIE ID */ .byte 1 /* Version number */ - .string "zR" /* NUL-terminated augmentation string */ + .string "zRS" /* NUL-terminated augmentation string */ .uleb128 1 /* Code alignment factor */ .sleb128 -4 /* Data alignment factor */ .byte 8 /* Return address register column */ diff --git a/arch/i386/mach-generic/probe.c b/arch/i386/mach-generic/probe.c index cea5b3ce4b57..d55fa7b187ab 100644 --- a/arch/i386/mach-generic/probe.c +++ b/arch/i386/mach-generic/probe.c @@ -93,9 +93,11 @@ int __init mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid int i; for (i = 0; apic_probe[i]; ++i) { if (apic_probe[i]->mps_oem_check(mpc,oem,productid)) { - genapic = apic_probe[i]; - printk(KERN_INFO "Switched to APIC driver `%s'.\n", - genapic->name); + if (!cmdline_apic) { + genapic = apic_probe[i]; + printk(KERN_INFO "Switched to APIC driver `%s'.\n", + genapic->name); + } return 1; } } @@ -107,9 +109,11 @@ int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) int i; for (i = 0; apic_probe[i]; ++i) { if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { - genapic = apic_probe[i]; - printk(KERN_INFO "Switched to APIC driver `%s'.\n", - genapic->name); + if (!cmdline_apic) { + genapic = apic_probe[i]; + printk(KERN_INFO "Switched to APIC driver `%s'.\n", + genapic->name); + } return 1; } } diff --git a/arch/i386/mach-voyager/voyager_cat.c b/arch/i386/mach-voyager/voyager_cat.c index 23967fe658d3..10d21df14531 100644 --- a/arch/i386/mach-voyager/voyager_cat.c +++ b/arch/i386/mach-voyager/voyager_cat.c @@ -106,15 +106,20 @@ voyager_module_t *voyager_cat_list; /* the I/O port assignments for the VIC and QIC */ static struct resource vic_res = { - "Voyager Interrupt Controller", 0xFC00, 0xFC6F }; + .name = "Voyager Interrupt Controller", + .start = 0xFC00, + .end = 0xFC6F +}; static struct resource qic_res = { - "Quad Interrupt Controller", 0xFC70, 0xFCFF }; + .name = "Quad Interrupt Controller", + .start = 0xFC70, + .end = 0xFCFF +}; /* This function is used to pack a data bit stream inside a message. * It writes num_bits of the data buffer in msg starting at start_bit. * Note: This function assumes that any unused bit in the data stream * is set to zero so that the ors will work correctly */ -#define BITS_PER_BYTE 8 static void cat_pack(__u8 *msg, const __u16 start_bit, __u8 *data, const __u16 num_bits) { diff --git a/arch/i386/mach-voyager/voyager_smp.c b/arch/i386/mach-voyager/voyager_smp.c index 8165626a5c30..70e560a1b79a 100644 --- a/arch/i386/mach-voyager/voyager_smp.c +++ b/arch/i386/mach-voyager/voyager_smp.c @@ -1700,7 +1700,7 @@ after_handle_vic_irq(unsigned int irq) printk("VOYAGER SMP: CPU%d lost interrupt %d\n", cpu, irq); - for_each_cpu(real_cpu, mask) { + for_each_possible_cpu(real_cpu, mask) { outb(VIC_CPU_MASQUERADE_ENABLE | real_cpu, VIC_PROCESSOR_ID); diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index c4af9638dbfa..fe6eb901326e 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c @@ -31,6 +31,7 @@ #include <linux/nodemask.h> #include <linux/module.h> #include <linux/kexec.h> +#include <linux/pfn.h> #include <asm/e820.h> #include <asm/setup.h> @@ -352,17 +353,6 @@ void __init zone_sizes_init(void) { int nid; - /* - * Insert nodes into pgdat_list backward so they appear in order. - * Clobber node 0's links and NULL out pgdat_list before starting. - */ - pgdat_list = NULL; - for (nid = MAX_NUMNODES - 1; nid >= 0; nid--) { - if (!node_online(nid)) - continue; - NODE_DATA(nid)->pgdat_next = pgdat_list; - pgdat_list = NODE_DATA(nid); - } for_each_online_node(nid) { unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 9f66ac582a8b..3df1371d4520 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -651,6 +651,7 @@ void __init mem_init(void) * Specifically, in the case of x86, we will always add * memory to the highmem for now. */ +#ifdef CONFIG_MEMORY_HOTPLUG #ifndef CONFIG_NEED_MULTIPLE_NODES int add_memory(u64 start, u64 size) { @@ -667,6 +668,7 @@ int remove_memory(u64 start, u64 size) return -EINVAL; } #endif +#endif kmem_cache_t *pgd_cache; kmem_cache_t *pmd_cache; diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c index 9db3242103be..2889567e21a1 100644 --- a/arch/i386/mm/pgtable.c +++ b/arch/i386/mm/pgtable.c @@ -36,7 +36,7 @@ void show_mem(void) printk(KERN_INFO "Mem-info:\n"); show_free_areas(); printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); - for_each_pgdat(pgdat) { + for_each_online_pgdat(pgdat) { pgdat_resize_lock(pgdat, &flags); for (i = 0; i < pgdat->node_spanned_pages; ++i) { page = pgdat_page_nr(pgdat, i); diff --git a/arch/i386/oprofile/nmi_int.c b/arch/i386/oprofile/nmi_int.c index 1accce50c2c7..ec0fd3cfa774 100644 --- a/arch/i386/oprofile/nmi_int.c +++ b/arch/i386/oprofile/nmi_int.c @@ -122,7 +122,7 @@ static void nmi_save_registers(void * dummy) static void free_msrs(void) { int i; - for_each_cpu(i) { + for_each_possible_cpu(i) { kfree(cpu_msrs[i].counters); cpu_msrs[i].counters = NULL; kfree(cpu_msrs[i].controls); @@ -332,10 +332,11 @@ static int __init ppro_init(char ** cpu_type) { __u8 cpu_model = boot_cpu_data.x86_model; - if (cpu_model > 0xd) + if (cpu_model == 14) + *cpu_type = "i386/core"; + else if (cpu_model > 0xd) return 0; - - if (cpu_model == 9) { + else if (cpu_model == 9) { *cpu_type = "i386/p6_mobile"; } else if (cpu_model > 5) { *cpu_type = "i386/piii"; diff --git a/arch/i386/pci/direct.c b/arch/i386/pci/direct.c index 99012b93bd12..5d81fb510375 100644 --- a/arch/i386/pci/direct.c +++ b/arch/i386/pci/direct.c @@ -4,6 +4,7 @@ #include <linux/pci.h> #include <linux/init.h> +#include <linux/dmi.h> #include "pci.h" /* @@ -18,8 +19,10 @@ int pci_conf1_read(unsigned int seg, unsigned int bus, { unsigned long flags; - if (!value || (bus > 255) || (devfn > 255) || (reg > 255)) + if ((bus > 255) || (devfn > 255) || (reg > 255)) { + *value = -1; return -EINVAL; + } spin_lock_irqsave(&pci_config_lock, flags); @@ -91,8 +94,10 @@ static int pci_conf2_read(unsigned int seg, unsigned int bus, unsigned long flags; int dev, fn; - if (!value || (bus > 255) || (devfn > 255) || (reg > 255)) + if ((bus > 255) || (devfn > 255) || (reg > 255)) { + *value = -1; return -EINVAL; + } dev = PCI_SLOT(devfn); fn = PCI_FUNC(devfn); @@ -188,6 +193,10 @@ static int __init pci_sanity_check(struct pci_raw_ops *o) if (pci_probe & PCI_NO_CHECKS) return 1; + /* Assume Type 1 works for newer systems. + This handles machines that don't have anything on PCI Bus 0. */ + if (dmi_get_year(DMI_BIOS_DATE) >= 2001) + return 1; for (devfn = 0; devfn < 0x100; devfn++) { if (o->read(0, 0, devfn, PCI_CLASS_DEVICE, 2, &x)) diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c index 3ca59cad05f3..06dab00aaadc 100644 --- a/arch/i386/pci/irq.c +++ b/arch/i386/pci/irq.c @@ -588,7 +588,9 @@ static __init int via_router_probe(struct irq_router *r, case PCI_DEVICE_ID_VIA_82C596: case PCI_DEVICE_ID_VIA_82C686: case PCI_DEVICE_ID_VIA_8231: + case PCI_DEVICE_ID_VIA_8233A: case PCI_DEVICE_ID_VIA_8235: + case PCI_DEVICE_ID_VIA_8237: /* FIXME: add new ones for 8233/5 */ r->name = "VIA"; r->get = pirq_via_get; diff --git a/arch/i386/pci/mmconfig.c b/arch/i386/pci/mmconfig.c index 613789071f30..6b1ea0c9a570 100644 --- a/arch/i386/pci/mmconfig.c +++ b/arch/i386/pci/mmconfig.c @@ -12,14 +12,20 @@ #include <linux/pci.h> #include <linux/init.h> #include <linux/acpi.h> +#include <asm/e820.h> #include "pci.h" +#define MMCONFIG_APER_SIZE (256*1024*1024) + +/* Assume systems with more busses have correct MCFG */ +#define MAX_CHECK_BUS 16 + #define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG)) /* The base address of the last MMCONFIG device accessed */ static u32 mmcfg_last_accessed_device; -static DECLARE_BITMAP(fallback_slots, 32); +static DECLARE_BITMAP(fallback_slots, MAX_CHECK_BUS*32); /* * Functions for accessing PCI configuration space with MMCONFIG accesses @@ -29,8 +35,8 @@ static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn) int cfg_num = -1; struct acpi_table_mcfg_config *cfg; - if (seg == 0 && bus == 0 && - test_bit(PCI_SLOT(devfn), fallback_slots)) + if (seg == 0 && bus < MAX_CHECK_BUS && + test_bit(PCI_SLOT(devfn) + 32*bus, fallback_slots)) return 0; while (1) { @@ -74,8 +80,10 @@ static int pci_mmcfg_read(unsigned int seg, unsigned int bus, unsigned long flags; u32 base; - if (!value || (bus > 255) || (devfn > 255) || (reg > 4095)) + if ((bus > 255) || (devfn > 255) || (reg > 4095)) { + *value = -1; return -EINVAL; + } base = get_base_addr(seg, bus, devfn); if (!base) @@ -146,29 +154,34 @@ static struct pci_raw_ops pci_mmcfg = { Normally this can be expressed in the MCFG by not listing them and assigning suitable _SEGs, but this isn't implemented in some BIOS. Instead try to discover all devices on bus 0 that are unreachable using MM - and fallback for them. - We only do this for bus 0/seg 0 */ + and fallback for them. */ static __init void unreachable_devices(void) { - int i; + int i, k; unsigned long flags; - for (i = 0; i < 32; i++) { - u32 val1; - u32 addr; - - pci_conf1_read(0, 0, PCI_DEVFN(i, 0), 0, 4, &val1); - if (val1 == 0xffffffff) - continue; - - /* Locking probably not needed, but safer */ - spin_lock_irqsave(&pci_config_lock, flags); - addr = get_base_addr(0, 0, PCI_DEVFN(i, 0)); - if (addr != 0) - pci_exp_set_dev_base(addr, 0, PCI_DEVFN(i, 0)); - if (addr == 0 || readl((u32 __iomem *)mmcfg_virt_addr) != val1) - set_bit(i, fallback_slots); - spin_unlock_irqrestore(&pci_config_lock, flags); + for (k = 0; k < MAX_CHECK_BUS; k++) { + for (i = 0; i < 32; i++) { + u32 val1; + u32 addr; + + pci_conf1_read(0, k, PCI_DEVFN(i, 0), 0, 4, &val1); + if (val1 == 0xffffffff) + continue; + + /* Locking probably not needed, but safer */ + spin_lock_irqsave(&pci_config_lock, flags); + addr = get_base_addr(0, k, PCI_DEVFN(i, 0)); + if (addr != 0) + pci_exp_set_dev_base(addr, k, PCI_DEVFN(i, 0)); + if (addr == 0 || + readl((u32 __iomem *)mmcfg_virt_addr) != val1) { + set_bit(i, fallback_slots); + printk(KERN_NOTICE + "PCI: No mmconfig possible on %x:%x\n", k, i); + } + spin_unlock_irqrestore(&pci_config_lock, flags); + } } } @@ -183,6 +196,14 @@ void __init pci_mmcfg_init(void) (pci_mmcfg_config[0].base_address == 0)) return; + if (!e820_all_mapped(pci_mmcfg_config[0].base_address, + pci_mmcfg_config[0].base_address + MMCONFIG_APER_SIZE, + E820_RESERVED)) { + printk(KERN_ERR "PCI: BIOS Bug: MCFG area is not E820-reserved\n"); + printk(KERN_ERR "PCI: Not using MMCONFIG.\n"); + return; + } + printk(KERN_INFO "PCI: Using MMCONFIG\n"); raw_pci_ops = &pci_mmcfg; pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; diff --git a/arch/i386/power/cpu.c b/arch/i386/power/cpu.c index 50a0bef8c85f..79b2370c7fac 100644 --- a/arch/i386/power/cpu.c +++ b/arch/i386/power/cpu.c @@ -92,7 +92,7 @@ void __restore_processor_state(struct saved_context *ctxt) write_cr4(ctxt->cr4); write_cr3(ctxt->cr3); write_cr2(ctxt->cr2); - write_cr2(ctxt->cr0); + write_cr0(ctxt->cr0); /* * now restore the descriptor tables to their proper values |