diff options
Diffstat (limited to 'arch/i386')
53 files changed, 1039 insertions, 709 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index ea70359b02d0..0dfee812811a 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -49,6 +49,11 @@ config GENERIC_IOMAP bool default y +config GENERIC_BUG + bool + default y + depends on BUG + config GENERIC_HWEIGHT bool default y @@ -185,6 +190,7 @@ endchoice config PARAVIRT bool "Paravirtualization support (EXPERIMENTAL)" depends on EXPERIMENTAL + depends on !(X86_VISWS || X86_VOYAGER) help Paravirtualization is a way of running multiple instances of Linux on the same machine, under a hypervisor. This option @@ -771,6 +777,47 @@ config CRASH_DUMP PHYSICAL_START. For more details see Documentation/kdump/kdump.txt +config PHYSICAL_START + hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) + default "0x100000" + help + This gives the physical address where the kernel is loaded. + + If kernel is a not relocatable (CONFIG_RELOCATABLE=n) then + bzImage will decompress itself to above physical address and + run from there. Otherwise, bzImage will run from the address where + it has been loaded by the boot loader and will ignore above physical + address. + + In normal kdump cases one does not have to set/change this option + as now bzImage can be compiled as a completely relocatable image + (CONFIG_RELOCATABLE=y) and be used to load and run from a different + address. This option is mainly useful for the folks who don't want + to use a bzImage for capturing the crash dump and want to use a + vmlinux instead. vmlinux is not relocatable hence a kernel needs + to be specifically compiled to run from a specific memory area + (normally a reserved region) and this option comes handy. + + So if you are using bzImage for capturing the crash dump, leave + the value here unchanged to 0x100000 and set CONFIG_RELOCATABLE=y. + Otherwise if you plan to use vmlinux for capturing the crash dump + change this value to start of the reserved region (Typically 16MB + 0x1000000). In other words, it can be set based on the "X" value as + specified in the "crashkernel=YM@XM" command line boot parameter + passed to the panic-ed kernel. Typically this parameter is set as + crashkernel=64M@16M. Please take a look at + Documentation/kdump/kdump.txt for more details about crash dumps. + + Usage of bzImage for capturing the crash dump is recommended as + one does not have to build two kernels. Same kernel can be used + as production kernel and capture kernel. Above option should have + gone away after relocatable bzImage support is introduced. But it + is present because there are users out there who continue to use + vmlinux for dump capture. This option should go away down the + line. + + Don't change this unless you know what you are doing. + config RELOCATABLE bool "Build a relocatable kernel(EXPERIMENTAL)" depends on EXPERIMENTAL diff --git a/arch/i386/Kconfig.cpu b/arch/i386/Kconfig.cpu index 821fd269ca58..2aecfba4ac4f 100644 --- a/arch/i386/Kconfig.cpu +++ b/arch/i386/Kconfig.cpu @@ -248,6 +248,14 @@ config RWSEM_XCHGADD_ALGORITHM depends on !M386 default y +config ARCH_HAS_ILOG2_U32 + bool + default n + +config ARCH_HAS_ILOG2_U64 + bool + default n + config GENERIC_CALIBRATE_DELAY bool default y diff --git a/arch/i386/boot/compressed/.gitignore b/arch/i386/boot/compressed/.gitignore new file mode 100644 index 000000000000..be0ed065249b --- /dev/null +++ b/arch/i386/boot/compressed/.gitignore @@ -0,0 +1 @@ +relocs diff --git a/arch/i386/boot/compressed/head.S b/arch/i386/boot/compressed/head.S index f395a4bb38bb..3517a32aaf41 100644 --- a/arch/i386/boot/compressed/head.S +++ b/arch/i386/boot/compressed/head.S @@ -28,7 +28,7 @@ #include <asm/page.h> #include <asm/boot.h> -.section ".text.head" +.section ".text.head","ax",@progbits .globl startup_32 startup_32: diff --git a/arch/i386/boot/compressed/relocs.c b/arch/i386/boot/compressed/relocs.c index 468da89153c4..881951ca03e1 100644 --- a/arch/i386/boot/compressed/relocs.c +++ b/arch/i386/boot/compressed/relocs.c @@ -43,6 +43,8 @@ static int is_safe_abs_reloc(const char* sym_name) /* Match found */ return 1; } + if (strncmp(sym_name, "__crc_", 6) == 0) + return 1; return 0; } diff --git a/arch/i386/defconfig b/arch/i386/defconfig index 65891f11aced..5d80edfc61b7 100644 --- a/arch/i386/defconfig +++ b/arch/i386/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.19-git7 -# Wed Dec 6 23:50:49 2006 +# Linux kernel version: 2.6.20-rc3 +# Fri Jan 5 11:54:46 2007 # CONFIG_X86_32=y CONFIG_GENERIC_TIME=y @@ -12,6 +12,7 @@ CONFIG_X86=y CONFIG_MMU=y CONFIG_GENERIC_ISA_DMA=y CONFIG_GENERIC_IOMAP=y +CONFIG_GENERIC_BUG=y CONFIG_GENERIC_HWEIGHT=y CONFIG_ARCH_MAY_HAVE_PC_FDC=y CONFIG_DMI=y @@ -141,6 +142,8 @@ CONFIG_X86_CMPXCHG=y CONFIG_X86_XADD=y CONFIG_X86_L1_CACHE_SHIFT=7 CONFIG_RWSEM_XCHGADD_ALGORITHM=y +# CONFIG_ARCH_HAS_ILOG2_U32 is not set +# CONFIG_ARCH_HAS_ILOG2_U64 is not set CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_X86_WP_WORKS_OK=y CONFIG_X86_INVLPG=y @@ -203,6 +206,7 @@ CONFIG_MTRR=y CONFIG_SECCOMP=y # CONFIG_HZ_100 is not set CONFIG_HZ_250=y +# CONFIG_HZ_300 is not set # CONFIG_HZ_1000 is not set CONFIG_HZ=250 # CONFIG_KEXEC is not set @@ -563,6 +567,7 @@ CONFIG_IDEDMA_AUTO=y # # CONFIG_RAID_ATTRS is not set CONFIG_SCSI=y +# CONFIG_SCSI_TGT is not set CONFIG_SCSI_NETLINK=y # CONFIG_SCSI_PROC_FS is not set @@ -583,6 +588,7 @@ CONFIG_CHR_DEV_SG=y # CONFIG_SCSI_MULTI_LUN is not set # CONFIG_SCSI_CONSTANTS is not set # CONFIG_SCSI_LOGGING is not set +# CONFIG_SCSI_SCAN_ASYNC is not set # # SCSI Transports @@ -642,6 +648,7 @@ CONFIG_AIC79XX_DEBUG_MASK=0 # CONFIG_SCSI_DC390T is not set # CONFIG_SCSI_NSP32 is not set # CONFIG_SCSI_DEBUG is not set +# CONFIG_SCSI_SRP is not set # # Serial ATA (prod) and Parallel ATA (experimental) drivers @@ -1082,10 +1089,7 @@ CONFIG_SOUND=y # Open Sound System # CONFIG_SOUND_PRIME=y -CONFIG_OSS_OBSOLETE_DRIVER=y # CONFIG_SOUND_BT878 is not set -# CONFIG_SOUND_EMU10K1 is not set -# CONFIG_SOUND_FUSION is not set # CONFIG_SOUND_ES1371 is not set CONFIG_SOUND_ICH=y # CONFIG_SOUND_TRIDENT is not set @@ -1095,6 +1099,11 @@ CONFIG_SOUND_ICH=y # CONFIG_SOUND_OSS is not set # +# HID Devices +# +CONFIG_HID=y + +# # USB support # CONFIG_USB_ARCH_HAS_HCD=y @@ -1158,7 +1167,6 @@ CONFIG_USB_STORAGE=y # USB Input Devices # CONFIG_USB_HID=y -CONFIG_USB_HIDINPUT=y # CONFIG_USB_HIDINPUT_POWERBOOK is not set # CONFIG_HID_FF is not set # CONFIG_USB_HIDDEV is not set @@ -1279,6 +1287,11 @@ CONFIG_USB_MON=y # # +# Virtualization +# +# CONFIG_KVM is not set + +# # File systems # CONFIG_EXT2_FS=y @@ -1444,6 +1457,11 @@ CONFIG_NLS_ISO8859_15=y CONFIG_NLS_UTF8=y # +# Distributed Lock Manager +# +# CONFIG_DLM is not set + +# # Instrumentation Support # CONFIG_PROFILING=y @@ -1458,6 +1476,8 @@ CONFIG_TRACE_IRQFLAGS_SUPPORT=y # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_MAGIC_SYSRQ=y CONFIG_UNUSED_SYMBOLS=y +# CONFIG_DEBUG_FS is not set +# CONFIG_HEADERS_CHECK is not set CONFIG_DEBUG_KERNEL=y CONFIG_LOG_BUF_SHIFT=18 CONFIG_DETECT_SOFTLOCKUP=y @@ -1476,14 +1496,10 @@ CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_DEBUG_HIGHMEM is not set CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_INFO is not set -# CONFIG_DEBUG_FS is not set # CONFIG_DEBUG_VM is not set # CONFIG_DEBUG_LIST is not set # CONFIG_FRAME_POINTER is not set -CONFIG_UNWIND_INFO=y -CONFIG_STACK_UNWIND=y # CONFIG_FORCED_INLINING is not set -# CONFIG_HEADERS_CHECK is not set # CONFIG_RCU_TORTURE_TEST is not set # CONFIG_LKDTM is not set CONFIG_EARLY_PRINTK=y @@ -1509,12 +1525,14 @@ CONFIG_DOUBLEFAULT=y # # Library routines # +CONFIG_BITREVERSE=y # CONFIG_CRC_CCITT is not set # CONFIG_CRC16 is not set CONFIG_CRC32=y # CONFIG_LIBCRC32C is not set CONFIG_ZLIB_INFLATE=y CONFIG_PLIST=y +CONFIG_IOMAP_COPY=y CONFIG_GENERIC_HARDIRQS=y CONFIG_GENERIC_IRQ_PROBE=y CONFIG_GENERIC_PENDING_IRQ=y diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index c8f96cff07c6..cbcb2c27f48b 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -333,7 +333,7 @@ acpi_parse_ioapic(acpi_table_entry_header * header, const unsigned long end) /* * Parse Interrupt Source Override for the ACPI SCI */ -static void acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger) +static void __init acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger) { if (trigger == 0) /* compatible SCI trigger is level */ trigger = 3; @@ -1327,3 +1327,25 @@ static int __init setup_acpi_sci(char *s) return 0; } early_param("acpi_sci", setup_acpi_sci); + +int __acpi_acquire_global_lock(unsigned int *lock) +{ + unsigned int old, new, val; + do { + old = *lock; + new = (((old & ~0x3) + 2) + ((old >> 1) & 0x1)); + val = cmpxchg(lock, old, new); + } while (unlikely (val != old)); + return (new < 3) ? -1 : 0; +} + +int __acpi_release_global_lock(unsigned int *lock) +{ + unsigned int old, new, val; + do { + old = *lock; + new = old & ~0x3; + val = cmpxchg(lock, old, new); + } while (unlikely (val != old)); + return old & 0x1; +} diff --git a/arch/i386/kernel/acpi/cstate.c b/arch/i386/kernel/acpi/cstate.c index 12e937c1ce4b..2d39f55d29a8 100644 --- a/arch/i386/kernel/acpi/cstate.c +++ b/arch/i386/kernel/acpi/cstate.c @@ -47,13 +47,13 @@ EXPORT_SYMBOL(acpi_processor_power_init_bm_check); /* The code below handles cstate entry with monitor-mwait pair on Intel*/ -struct cstate_entry_s { +struct cstate_entry { struct { unsigned int eax; unsigned int ecx; } states[ACPI_PROCESSOR_MAX_POWER]; }; -static struct cstate_entry_s *cpu_cstate_entry; /* per CPU ptr */ +static struct cstate_entry *cpu_cstate_entry; /* per CPU ptr */ static short mwait_supported[ACPI_PROCESSOR_MAX_POWER]; @@ -71,7 +71,7 @@ static short mwait_supported[ACPI_PROCESSOR_MAX_POWER]; int acpi_processor_ffh_cstate_probe(unsigned int cpu, struct acpi_processor_cx *cx, struct acpi_power_register *reg) { - struct cstate_entry_s *percpu_entry; + struct cstate_entry *percpu_entry; struct cpuinfo_x86 *c = cpu_data + cpu; cpumask_t saved_mask; @@ -136,7 +136,7 @@ EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx) { unsigned int cpu = smp_processor_id(); - struct cstate_entry_s *percpu_entry; + struct cstate_entry *percpu_entry; percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu); mwait_idle_with_hints(percpu_entry->states[cx->index].eax, @@ -150,7 +150,7 @@ static int __init ffh_cstate_init(void) if (c->x86_vendor != X86_VENDOR_INTEL) return -1; - cpu_cstate_entry = alloc_percpu(struct cstate_entry_s); + cpu_cstate_entry = alloc_percpu(struct cstate_entry); return 0; } diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index a97847da9ed5..199016927541 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c @@ -785,7 +785,11 @@ static int apm_do_idle(void) polling = !!(current_thread_info()->status & TS_POLLING); if (polling) { current_thread_info()->status &= ~TS_POLLING; - smp_mb__after_clear_bit(); + /* + * TS_POLLING-cleared state must be visible before we + * test NEED_RESCHED: + */ + smp_mb(); } if (!need_resched()) { idled = 1; @@ -1604,7 +1608,7 @@ static int do_open(struct inode * inode, struct file * filp) { struct apm_user * as; - as = (struct apm_user *)kmalloc(sizeof(*as), GFP_KERNEL); + as = kmalloc(sizeof(*as), GFP_KERNEL); if (as == NULL) { printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n", sizeof(*as)); diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 1b34c56f8123..8a8bbdaaf38a 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -54,7 +54,7 @@ static struct cpu_dev __cpuinitdata default_cpu = { .c_init = default_init, .c_vendor = "Unknown", }; -static struct cpu_dev * this_cpu = &default_cpu; +static struct cpu_dev * this_cpu __cpuinitdata = &default_cpu; static int __init cachesize_setup(char *str) { @@ -710,11 +710,8 @@ __cpuinit int init_gdt(int cpu, struct task_struct *idle) return 1; } -/* Common CPU init for both boot and secondary CPUs */ -static void __cpuinit _cpu_init(int cpu, struct task_struct *curr) +void __cpuinit cpu_set_gdt(int cpu) { - struct tss_struct * t = &per_cpu(init_tss, cpu); - struct thread_struct *thread = &curr->thread; struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); /* Reinit these anyway, even if they've already been done (on @@ -722,6 +719,13 @@ static void __cpuinit _cpu_init(int cpu, struct task_struct *curr) the real ones). */ load_gdt(cpu_gdt_descr); set_kernel_gs(); +} + +/* Common CPU init for both boot and secondary CPUs */ +static void __cpuinit _cpu_init(int cpu, struct task_struct *curr) +{ + struct tss_struct * t = &per_cpu(init_tss, cpu); + struct thread_struct *thread = &curr->thread; if (cpu_test_and_set(cpu, cpu_initialized)) { printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); @@ -807,6 +811,7 @@ void __cpuinit cpu_init(void) local_irq_enable(); } + cpu_set_gdt(cpu); _cpu_init(cpu, curr); } diff --git a/arch/i386/kernel/cpu/cpufreq/Kconfig b/arch/i386/kernel/cpu/cpufreq/Kconfig index ccc1edff5c97..5299c5bf4454 100644 --- a/arch/i386/kernel/cpu/cpufreq/Kconfig +++ b/arch/i386/kernel/cpu/cpufreq/Kconfig @@ -17,6 +17,7 @@ config X86_ACPI_CPUFREQ help This driver adds a CPUFreq driver which utilizes the ACPI Processor Performance States. + This driver also supports Intel Enhanced Speedstep. For details, take a look at <file:Documentation/cpu-freq/>. @@ -121,11 +122,14 @@ config X86_SPEEDSTEP_CENTRINO If in doubt, say N. config X86_SPEEDSTEP_CENTRINO_ACPI - bool "Use ACPI tables to decode valid frequency/voltage pairs" + bool "Use ACPI tables to decode valid frequency/voltage (deprecated)" depends on X86_SPEEDSTEP_CENTRINO && ACPI_PROCESSOR depends on !(X86_SPEEDSTEP_CENTRINO = y && ACPI_PROCESSOR = m) default y help + This is deprecated and this functionality is now merged into + acpi_cpufreq (X86_ACPI_CPUFREQ). Use that driver instead of + speedstep_centrino. Use primarily the information provided in the BIOS ACPI tables to determine valid CPU frequency and voltage pairings. It is required for the driver to work on non-Banias CPUs. diff --git a/arch/i386/kernel/cpu/cpufreq/Makefile b/arch/i386/kernel/cpu/cpufreq/Makefile index 2e894f1c8910..8de3abe322a9 100644 --- a/arch/i386/kernel/cpu/cpufreq/Makefile +++ b/arch/i386/kernel/cpu/cpufreq/Makefile @@ -7,9 +7,9 @@ obj-$(CONFIG_SC520_CPUFREQ) += sc520_freq.o obj-$(CONFIG_X86_LONGRUN) += longrun.o obj-$(CONFIG_X86_GX_SUSPMOD) += gx-suspmod.o obj-$(CONFIG_X86_SPEEDSTEP_ICH) += speedstep-ich.o -obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o obj-$(CONFIG_X86_SPEEDSTEP_LIB) += speedstep-lib.o obj-$(CONFIG_X86_SPEEDSTEP_SMI) += speedstep-smi.o obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o +obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o obj-$(CONFIG_X86_CPUFREQ_NFORCE2) += cpufreq-nforce2.o diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c index 57c880bf0bd6..10baa3501ed3 100644 --- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -1,9 +1,10 @@ /* - * acpi-cpufreq.c - ACPI Processor P-States Driver ($Revision: 1.3 $) + * acpi-cpufreq.c - ACPI Processor P-States Driver ($Revision: 1.4 $) * * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com> * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> * Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de> + * Copyright (C) 2006 Denis Sadykov <denis.m.sadykov@intel.com> * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * @@ -27,202 +28,370 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> +#include <linux/smp.h> +#include <linux/sched.h> #include <linux/cpufreq.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> #include <linux/compiler.h> -#include <linux/sched.h> /* current */ #include <linux/dmi.h> -#include <asm/io.h> -#include <asm/delay.h> -#include <asm/uaccess.h> #include <linux/acpi.h> #include <acpi/processor.h> +#include <asm/io.h> +#include <asm/msr.h> +#include <asm/processor.h> +#include <asm/cpufeature.h> +#include <asm/delay.h> +#include <asm/uaccess.h> + #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg) MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski"); MODULE_DESCRIPTION("ACPI Processor P-States Driver"); MODULE_LICENSE("GPL"); +enum { + UNDEFINED_CAPABLE = 0, + SYSTEM_INTEL_MSR_CAPABLE, + SYSTEM_IO_CAPABLE, +}; + +#define INTEL_MSR_RANGE (0xffff) +#define CPUID_6_ECX_APERFMPERF_CAPABILITY (0x1) -struct cpufreq_acpi_io { - struct acpi_processor_performance *acpi_data; - struct cpufreq_frequency_table *freq_table; - unsigned int resume; +struct acpi_cpufreq_data { + struct acpi_processor_performance *acpi_data; + struct cpufreq_frequency_table *freq_table; + unsigned int max_freq; + unsigned int resume; + unsigned int cpu_feature; }; -static struct cpufreq_acpi_io *acpi_io_data[NR_CPUS]; -static struct acpi_processor_performance *acpi_perf_data[NR_CPUS]; +static struct acpi_cpufreq_data *drv_data[NR_CPUS]; +static struct acpi_processor_performance *acpi_perf_data[NR_CPUS]; static struct cpufreq_driver acpi_cpufreq_driver; static unsigned int acpi_pstate_strict; -static int -acpi_processor_write_port( - u16 port, - u8 bit_width, - u32 value) +static int check_est_cpu(unsigned int cpuid) { - if (bit_width <= 8) { - outb(value, port); - } else if (bit_width <= 16) { - outw(value, port); - } else if (bit_width <= 32) { - outl(value, port); - } else { - return -ENODEV; + struct cpuinfo_x86 *cpu = &cpu_data[cpuid]; + + if (cpu->x86_vendor != X86_VENDOR_INTEL || + !cpu_has(cpu, X86_FEATURE_EST)) + return 0; + + return 1; +} + +static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data) +{ + struct acpi_processor_performance *perf; + int i; + + perf = data->acpi_data; + + for (i=0; i<perf->state_count; i++) { + if (value == perf->states[i].status) + return data->freq_table[i].frequency; } return 0; } -static int -acpi_processor_read_port( - u16 port, - u8 bit_width, - u32 *ret) +static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data) { - *ret = 0; - if (bit_width <= 8) { - *ret = inb(port); - } else if (bit_width <= 16) { - *ret = inw(port); - } else if (bit_width <= 32) { - *ret = inl(port); - } else { - return -ENODEV; + int i; + struct acpi_processor_performance *perf; + + msr &= INTEL_MSR_RANGE; + perf = data->acpi_data; + + for (i=0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) { + if (msr == perf->states[data->freq_table[i].index].status) + return data->freq_table[i].frequency; } - return 0; + return data->freq_table[0].frequency; } -static int -acpi_processor_set_performance ( - struct cpufreq_acpi_io *data, - unsigned int cpu, - int state) +static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data) { - u16 port = 0; - u8 bit_width = 0; - int i = 0; - int ret = 0; - u32 value = 0; - int retval; - struct acpi_processor_performance *perf; - - dprintk("acpi_processor_set_performance\n"); - - retval = 0; - perf = data->acpi_data; - if (state == perf->state) { - if (unlikely(data->resume)) { - dprintk("Called after resume, resetting to P%d\n", state); - data->resume = 0; - } else { - dprintk("Already at target state (P%d)\n", state); - return (retval); - } + switch (data->cpu_feature) { + case SYSTEM_INTEL_MSR_CAPABLE: + return extract_msr(val, data); + case SYSTEM_IO_CAPABLE: + return extract_io(val, data); + default: + return 0; } +} + +struct msr_addr { + u32 reg; +}; - dprintk("Transitioning from P%d to P%d\n", perf->state, state); +struct io_addr { + u16 port; + u8 bit_width; +}; - /* - * First we write the target state's 'control' value to the - * control_register. - */ +typedef union { + struct msr_addr msr; + struct io_addr io; +} drv_addr_union; - port = perf->control_register.address; - bit_width = perf->control_register.bit_width; - value = (u32) perf->states[state].control; +struct drv_cmd { + unsigned int type; + cpumask_t mask; + drv_addr_union addr; + u32 val; +}; - dprintk("Writing 0x%08x to port 0x%04x\n", value, port); +static void do_drv_read(struct drv_cmd *cmd) +{ + u32 h; + + switch (cmd->type) { + case SYSTEM_INTEL_MSR_CAPABLE: + rdmsr(cmd->addr.msr.reg, cmd->val, h); + break; + case SYSTEM_IO_CAPABLE: + acpi_os_read_port((acpi_io_address)cmd->addr.io.port, + &cmd->val, + (u32)cmd->addr.io.bit_width); + break; + default: + break; + } +} - ret = acpi_processor_write_port(port, bit_width, value); - if (ret) { - dprintk("Invalid port width 0x%04x\n", bit_width); - return (ret); +static void do_drv_write(struct drv_cmd *cmd) +{ + u32 h = 0; + + switch (cmd->type) { + case SYSTEM_INTEL_MSR_CAPABLE: + wrmsr(cmd->addr.msr.reg, cmd->val, h); + break; + case SYSTEM_IO_CAPABLE: + acpi_os_write_port((acpi_io_address)cmd->addr.io.port, + cmd->val, + (u32)cmd->addr.io.bit_width); + break; + default: + break; } +} +static void drv_read(struct drv_cmd *cmd) +{ + cpumask_t saved_mask = current->cpus_allowed; + cmd->val = 0; + + set_cpus_allowed(current, cmd->mask); + do_drv_read(cmd); + set_cpus_allowed(current, saved_mask); +} + +static void drv_write(struct drv_cmd *cmd) +{ + cpumask_t saved_mask = current->cpus_allowed; + unsigned int i; + + for_each_cpu_mask(i, cmd->mask) { + set_cpus_allowed(current, cpumask_of_cpu(i)); + do_drv_write(cmd); + } + + set_cpus_allowed(current, saved_mask); + return; +} + +static u32 get_cur_val(cpumask_t mask) +{ + struct acpi_processor_performance *perf; + struct drv_cmd cmd; + + if (unlikely(cpus_empty(mask))) + return 0; + + switch (drv_data[first_cpu(mask)]->cpu_feature) { + case SYSTEM_INTEL_MSR_CAPABLE: + cmd.type = SYSTEM_INTEL_MSR_CAPABLE; + cmd.addr.msr.reg = MSR_IA32_PERF_STATUS; + break; + case SYSTEM_IO_CAPABLE: + cmd.type = SYSTEM_IO_CAPABLE; + perf = drv_data[first_cpu(mask)]->acpi_data; + cmd.addr.io.port = perf->control_register.address; + cmd.addr.io.bit_width = perf->control_register.bit_width; + break; + default: + return 0; + } + + cmd.mask = mask; + + drv_read(&cmd); + + dprintk("get_cur_val = %u\n", cmd.val); + + return cmd.val; +} + +/* + * Return the measured active (C0) frequency on this CPU since last call + * to this function. + * Input: cpu number + * Return: Average CPU frequency in terms of max frequency (zero on error) + * + * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance + * over a period of time, while CPU is in C0 state. + * IA32_MPERF counts at the rate of max advertised frequency + * IA32_APERF counts at the rate of actual CPU frequency + * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and + * no meaning should be associated with absolute values of these MSRs. + */ +static unsigned int get_measured_perf(unsigned int cpu) +{ + union { + struct { + u32 lo; + u32 hi; + } split; + u64 whole; + } aperf_cur, mperf_cur; + + cpumask_t saved_mask; + unsigned int perf_percent; + unsigned int retval; + + saved_mask = current->cpus_allowed; + set_cpus_allowed(current, cpumask_of_cpu(cpu)); + if (get_cpu() != cpu) { + /* We were not able to run on requested processor */ + put_cpu(); + return 0; + } + + rdmsr(MSR_IA32_APERF, aperf_cur.split.lo, aperf_cur.split.hi); + rdmsr(MSR_IA32_MPERF, mperf_cur.split.lo, mperf_cur.split.hi); + + wrmsr(MSR_IA32_APERF, 0,0); + wrmsr(MSR_IA32_MPERF, 0,0); + +#ifdef __i386__ /* - * Assume the write went through when acpi_pstate_strict is not used. - * As read status_register is an expensive operation and there - * are no specific error cases where an IO port write will fail. + * We dont want to do 64 bit divide with 32 bit kernel + * Get an approximate value. Return failure in case we cannot get + * an approximate value. */ - if (acpi_pstate_strict) { - /* Then we read the 'status_register' and compare the value - * with the target state's 'status' to make sure the - * transition was successful. - * Note that we'll poll for up to 1ms (100 cycles of 10us) - * before giving up. - */ - - port = perf->status_register.address; - bit_width = perf->status_register.bit_width; - - dprintk("Looking for 0x%08x from port 0x%04x\n", - (u32) perf->states[state].status, port); - - for (i = 0; i < 100; i++) { - ret = acpi_processor_read_port(port, bit_width, &value); - if (ret) { - dprintk("Invalid port width 0x%04x\n", bit_width); - return (ret); - } - if (value == (u32) perf->states[state].status) - break; - udelay(10); - } - } else { - value = (u32) perf->states[state].status; + if (unlikely(aperf_cur.split.hi || mperf_cur.split.hi)) { + int shift_count; + u32 h; + + h = max_t(u32, aperf_cur.split.hi, mperf_cur.split.hi); + shift_count = fls(h); + + aperf_cur.whole >>= shift_count; + mperf_cur.whole >>= shift_count; + } + + if (((unsigned long)(-1) / 100) < aperf_cur.split.lo) { + int shift_count = 7; + aperf_cur.split.lo >>= shift_count; + mperf_cur.split.lo >>= shift_count; + } + + if (aperf_cur.split.lo && mperf_cur.split.lo) + perf_percent = (aperf_cur.split.lo * 100) / mperf_cur.split.lo; + else + perf_percent = 0; + +#else + if (unlikely(((unsigned long)(-1) / 100) < aperf_cur.whole)) { + int shift_count = 7; + aperf_cur.whole >>= shift_count; + mperf_cur.whole >>= shift_count; } - if (unlikely(value != (u32) perf->states[state].status)) { - printk(KERN_WARNING "acpi-cpufreq: Transition failed\n"); - retval = -ENODEV; - return (retval); + if (aperf_cur.whole && mperf_cur.whole) + perf_percent = (aperf_cur.whole * 100) / mperf_cur.whole; + else + perf_percent = 0; + +#endif + + retval = drv_data[cpu]->max_freq * perf_percent / 100; + + put_cpu(); + set_cpus_allowed(current, saved_mask); + + dprintk("cpu %d: performance percent %d\n", cpu, perf_percent); + return retval; +} + +static unsigned int get_cur_freq_on_cpu(unsigned int cpu) +{ + struct acpi_cpufreq_data *data = drv_data[cpu]; + unsigned int freq; + + dprintk("get_cur_freq_on_cpu (%d)\n", cpu); + + if (unlikely(data == NULL || + data->acpi_data == NULL || data->freq_table == NULL)) { + return 0; } - dprintk("Transition successful after %d microseconds\n", i * 10); + freq = extract_freq(get_cur_val(cpumask_of_cpu(cpu)), data); + dprintk("cur freq = %u\n", freq); - perf->state = state; - return (retval); + return freq; } +static unsigned int check_freqs(cpumask_t mask, unsigned int freq, + struct acpi_cpufreq_data *data) +{ + unsigned int cur_freq; + unsigned int i; + + for (i=0; i<100; i++) { + cur_freq = extract_freq(get_cur_val(mask), data); + if (cur_freq == freq) + return 1; + udelay(10); + } + return 0; +} -static int -acpi_cpufreq_target ( - struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) +static int acpi_cpufreq_target(struct cpufreq_policy *policy, + unsigned int target_freq, unsigned int relation) { - struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu]; + struct acpi_cpufreq_data *data = drv_data[policy->cpu]; struct acpi_processor_performance *perf; struct cpufreq_freqs freqs; cpumask_t online_policy_cpus; - cpumask_t saved_mask; - cpumask_t set_mask; - cpumask_t covered_cpus; - unsigned int cur_state = 0; - unsigned int next_state = 0; - unsigned int result = 0; - unsigned int j; - unsigned int tmp; + struct drv_cmd cmd; + unsigned int msr; + unsigned int next_state = 0; /* Index into freq_table */ + unsigned int next_perf_state = 0; /* Index into perf table */ + unsigned int i; + int result = 0; - dprintk("acpi_cpufreq_setpolicy\n"); + dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); - result = cpufreq_frequency_table_target(policy, - data->freq_table, - target_freq, - relation, - &next_state); - if (unlikely(result)) - return (result); + if (unlikely(data == NULL || + data->acpi_data == NULL || data->freq_table == NULL)) { + return -ENODEV; + } perf = data->acpi_data; - cur_state = perf->state; - freqs.old = data->freq_table[cur_state].frequency; - freqs.new = data->freq_table[next_state].frequency; + result = cpufreq_frequency_table_target(policy, + data->freq_table, + target_freq, + relation, &next_state); + if (unlikely(result)) + return -ENODEV; #ifdef CONFIG_HOTPLUG_CPU /* cpufreq holds the hotplug lock, so we are safe from here on */ @@ -231,106 +400,85 @@ acpi_cpufreq_target ( online_policy_cpus = policy->cpus; #endif - for_each_cpu_mask(j, online_policy_cpus) { - freqs.cpu = j; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + next_perf_state = data->freq_table[next_state].index; + if (perf->state == next_perf_state) { + if (unlikely(data->resume)) { + dprintk("Called after resume, resetting to P%d\n", + next_perf_state); + data->resume = 0; + } else { + dprintk("Already at target state (P%d)\n", + next_perf_state); + return 0; + } } - /* - * We need to call driver->target() on all or any CPU in - * policy->cpus, depending on policy->shared_type. - */ - saved_mask = current->cpus_allowed; - cpus_clear(covered_cpus); - for_each_cpu_mask(j, online_policy_cpus) { - /* - * Support for SMP systems. - * Make sure we are running on CPU that wants to change freq - */ - cpus_clear(set_mask); - if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) - cpus_or(set_mask, set_mask, online_policy_cpus); - else - cpu_set(j, set_mask); - - set_cpus_allowed(current, set_mask); - if (unlikely(!cpu_isset(smp_processor_id(), set_mask))) { - dprintk("couldn't limit to CPUs in this domain\n"); - result = -EAGAIN; - break; - } + switch (data->cpu_feature) { + case SYSTEM_INTEL_MSR_CAPABLE: + cmd.type = SYSTEM_INTEL_MSR_CAPABLE; + cmd.addr.msr.reg = MSR_IA32_PERF_CTL; + msr = + (u32) perf->states[next_perf_state]. + control & INTEL_MSR_RANGE; + cmd.val = get_cur_val(online_policy_cpus); + cmd.val = (cmd.val & ~INTEL_MSR_RANGE) | msr; + break; + case SYSTEM_IO_CAPABLE: + cmd.type = SYSTEM_IO_CAPABLE; + cmd.addr.io.port = perf->control_register.address; + cmd.addr.io.bit_width = perf->control_register.bit_width; + cmd.val = (u32) perf->states[next_perf_state].control; + break; + default: + return -ENODEV; + } - result = acpi_processor_set_performance (data, j, next_state); - if (result) { - result = -EAGAIN; - break; - } + cpus_clear(cmd.mask); - if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) - break; - - cpu_set(j, covered_cpus); - } + if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY) + cmd.mask = online_policy_cpus; + else + cpu_set(policy->cpu, cmd.mask); - for_each_cpu_mask(j, online_policy_cpus) { - freqs.cpu = j; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + freqs.old = perf->states[perf->state].core_frequency * 1000; + freqs.new = data->freq_table[next_state].frequency; + for_each_cpu_mask(i, cmd.mask) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); } - if (unlikely(result)) { - /* - * We have failed halfway through the frequency change. - * We have sent callbacks to online_policy_cpus and - * acpi_processor_set_performance() has been called on - * coverd_cpus. Best effort undo.. - */ - - if (!cpus_empty(covered_cpus)) { - for_each_cpu_mask(j, covered_cpus) { - policy->cpu = j; - acpi_processor_set_performance (data, - j, - cur_state); - } - } + drv_write(&cmd); - tmp = freqs.new; - freqs.new = freqs.old; - freqs.old = tmp; - for_each_cpu_mask(j, online_policy_cpus) { - freqs.cpu = j; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + if (acpi_pstate_strict) { + if (!check_freqs(cmd.mask, freqs.new, data)) { + dprintk("acpi_cpufreq_target failed (%d)\n", + policy->cpu); + return -EAGAIN; } } - set_cpus_allowed(current, saved_mask); - return (result); -} + for_each_cpu_mask(i, cmd.mask) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } + perf->state = next_perf_state; + return result; +} -static int -acpi_cpufreq_verify ( - struct cpufreq_policy *policy) +static int acpi_cpufreq_verify(struct cpufreq_policy *policy) { - unsigned int result = 0; - struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu]; + struct acpi_cpufreq_data *data = drv_data[policy->cpu]; dprintk("acpi_cpufreq_verify\n"); - result = cpufreq_frequency_table_verify(policy, - data->freq_table); - - return (result); + return cpufreq_frequency_table_verify(policy, data->freq_table); } - static unsigned long -acpi_cpufreq_guess_freq ( - struct cpufreq_acpi_io *data, - unsigned int cpu) +acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) { - struct acpi_processor_performance *perf = data->acpi_data; + struct acpi_processor_performance *perf = data->acpi_data; if (cpu_khz) { /* search the closest match to cpu_khz */ @@ -338,16 +486,16 @@ acpi_cpufreq_guess_freq ( unsigned long freq; unsigned long freqn = perf->states[0].core_frequency * 1000; - for (i = 0; i < (perf->state_count - 1); i++) { + for (i=0; i<(perf->state_count-1); i++) { freq = freqn; freqn = perf->states[i+1].core_frequency * 1000; if ((2 * cpu_khz) > (freqn + freq)) { perf->state = i; - return (freq); + return freq; } } - perf->state = perf->state_count - 1; - return (freqn); + perf->state = perf->state_count-1; + return freqn; } else { /* assume CPU is at P0... */ perf->state = 0; @@ -355,7 +503,6 @@ acpi_cpufreq_guess_freq ( } } - /* * acpi_cpufreq_early_init - initialize ACPI P-States library * @@ -364,30 +511,34 @@ acpi_cpufreq_guess_freq ( * do _PDC and _PSD and find out the processor dependency for the * actual init that will happen later... */ -static int acpi_cpufreq_early_init_acpi(void) +static int acpi_cpufreq_early_init(void) { - struct acpi_processor_performance *data; - unsigned int i, j; + struct acpi_processor_performance *data; + cpumask_t covered; + unsigned int i, j; dprintk("acpi_cpufreq_early_init\n"); for_each_possible_cpu(i) { - data = kzalloc(sizeof(struct acpi_processor_performance), - GFP_KERNEL); + data = kzalloc(sizeof(struct acpi_processor_performance), + GFP_KERNEL); if (!data) { - for_each_possible_cpu(j) { + for_each_cpu_mask(j, covered) { kfree(acpi_perf_data[j]); acpi_perf_data[j] = NULL; } - return (-ENOMEM); + return -ENOMEM; } acpi_perf_data[i] = data; + cpu_set(i, covered); } /* Do initialization in ACPI core */ - return acpi_processor_preregister_performance(acpi_perf_data); + acpi_processor_preregister_performance(acpi_perf_data); + return 0; } +#ifdef CONFIG_SMP /* * Some BIOSes do SW_ANY coordination internally, either set it up in hw * or do it in BIOS firmware and won't inform about it to OS. If not @@ -414,39 +565,42 @@ static struct dmi_system_id sw_any_bug_dmi_table[] = { }, { } }; +#endif -static int -acpi_cpufreq_cpu_init ( - struct cpufreq_policy *policy) +static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) { - unsigned int i; - unsigned int cpu = policy->cpu; - struct cpufreq_acpi_io *data; - unsigned int result = 0; + unsigned int i; + unsigned int valid_states = 0; + unsigned int cpu = policy->cpu; + struct acpi_cpufreq_data *data; + unsigned int result = 0; struct cpuinfo_x86 *c = &cpu_data[policy->cpu]; - struct acpi_processor_performance *perf; + struct acpi_processor_performance *perf; dprintk("acpi_cpufreq_cpu_init\n"); if (!acpi_perf_data[cpu]) - return (-ENODEV); + return -ENODEV; - data = kzalloc(sizeof(struct cpufreq_acpi_io), GFP_KERNEL); + data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL); if (!data) - return (-ENOMEM); + return -ENOMEM; data->acpi_data = acpi_perf_data[cpu]; - acpi_io_data[cpu] = data; + drv_data[cpu] = data; - result = acpi_processor_register_performance(data->acpi_data, cpu); + if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) + acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; + result = acpi_processor_register_performance(data->acpi_data, cpu); if (result) goto err_free; perf = data->acpi_data; policy->shared_type = perf->shared_type; + /* - * Will let policy->cpus know about dependency only when software + * Will let policy->cpus know about dependency only when software * coordination is required. */ if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || @@ -462,10 +616,6 @@ acpi_cpufreq_cpu_init ( } #endif - if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { - acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; - } - /* capability check */ if (perf->state_count <= 1) { dprintk("No P-States\n"); @@ -473,17 +623,33 @@ acpi_cpufreq_cpu_init ( goto err_unreg; } - if ((perf->control_register.space_id != ACPI_ADR_SPACE_SYSTEM_IO) || - (perf->status_register.space_id != ACPI_ADR_SPACE_SYSTEM_IO)) { - dprintk("Unsupported address space [%d, %d]\n", - (u32) (perf->control_register.space_id), - (u32) (perf->status_register.space_id)); + if (perf->control_register.space_id != perf->status_register.space_id) { result = -ENODEV; goto err_unreg; } - /* alloc freq_table */ - data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) * (perf->state_count + 1), GFP_KERNEL); + switch (perf->control_register.space_id) { + case ACPI_ADR_SPACE_SYSTEM_IO: + dprintk("SYSTEM IO addr space\n"); + data->cpu_feature = SYSTEM_IO_CAPABLE; + break; + case ACPI_ADR_SPACE_FIXED_HARDWARE: + dprintk("HARDWARE addr space\n"); + if (!check_est_cpu(cpu)) { + result = -ENODEV; + goto err_unreg; + } + data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE; + break; + default: + dprintk("Unknown addr space %d\n", + (u32) (perf->control_register.space_id)); + result = -ENODEV; + goto err_unreg; + } + + data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) * + (perf->state_count+1), GFP_KERNEL); if (!data->freq_table) { result = -ENOMEM; goto err_unreg; @@ -492,129 +658,141 @@ acpi_cpufreq_cpu_init ( /* detect transition latency */ policy->cpuinfo.transition_latency = 0; for (i=0; i<perf->state_count; i++) { - if ((perf->states[i].transition_latency * 1000) > policy->cpuinfo.transition_latency) - policy->cpuinfo.transition_latency = perf->states[i].transition_latency * 1000; + if ((perf->states[i].transition_latency * 1000) > + policy->cpuinfo.transition_latency) + policy->cpuinfo.transition_latency = + perf->states[i].transition_latency * 1000; } policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - /* The current speed is unknown and not detectable by ACPI... */ - policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu); - + data->max_freq = perf->states[0].core_frequency * 1000; /* table init */ - for (i=0; i<=perf->state_count; i++) - { - data->freq_table[i].index = i; - if (i<perf->state_count) - data->freq_table[i].frequency = perf->states[i].core_frequency * 1000; - else - data->freq_table[i].frequency = CPUFREQ_TABLE_END; + for (i=0; i<perf->state_count; i++) { + if (i>0 && perf->states[i].core_frequency == + perf->states[i-1].core_frequency) + continue; + + data->freq_table[valid_states].index = i; + data->freq_table[valid_states].frequency = + perf->states[i].core_frequency * 1000; + valid_states++; } + data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END; + perf->state = 0; result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table); - if (result) { + if (result) goto err_freqfree; + + switch (perf->control_register.space_id) { + case ACPI_ADR_SPACE_SYSTEM_IO: + /* Current speed is unknown and not detectable by IO port */ + policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu); + break; + case ACPI_ADR_SPACE_FIXED_HARDWARE: + acpi_cpufreq_driver.get = get_cur_freq_on_cpu; + policy->cur = get_cur_freq_on_cpu(cpu); + break; + default: + break; } /* notify BIOS that we exist */ acpi_processor_notify_smm(THIS_MODULE); - printk(KERN_INFO "acpi-cpufreq: CPU%u - ACPI performance management activated.\n", - cpu); + /* Check for APERF/MPERF support in hardware */ + if (c->x86_vendor == X86_VENDOR_INTEL && c->cpuid_level >= 6) { + unsigned int ecx; + ecx = cpuid_ecx(6); + if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY) + acpi_cpufreq_driver.getavg = get_measured_perf; + } + + dprintk("CPU%u - ACPI performance management activated.\n", cpu); for (i = 0; i < perf->state_count; i++) dprintk(" %cP%d: %d MHz, %d mW, %d uS\n", - (i == perf->state?'*':' '), i, + (i == perf->state ? '*' : ' '), i, (u32) perf->states[i].core_frequency, (u32) perf->states[i].power, (u32) perf->states[i].transition_latency); cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu); - + /* * the first call to ->target() should result in us actually * writing something to the appropriate registers. */ data->resume = 1; - - return (result); - err_freqfree: + return result; + +err_freqfree: kfree(data->freq_table); - err_unreg: +err_unreg: acpi_processor_unregister_performance(perf, cpu); - err_free: +err_free: kfree(data); - acpi_io_data[cpu] = NULL; + drv_data[cpu] = NULL; - return (result); + return result; } - -static int -acpi_cpufreq_cpu_exit ( - struct cpufreq_policy *policy) +static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) { - struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu]; - + struct acpi_cpufreq_data *data = drv_data[policy->cpu]; dprintk("acpi_cpufreq_cpu_exit\n"); if (data) { cpufreq_frequency_table_put_attr(policy->cpu); - acpi_io_data[policy->cpu] = NULL; - acpi_processor_unregister_performance(data->acpi_data, policy->cpu); + drv_data[policy->cpu] = NULL; + acpi_processor_unregister_performance(data->acpi_data, + policy->cpu); kfree(data); } - return (0); + return 0; } -static int -acpi_cpufreq_resume ( - struct cpufreq_policy *policy) +static int acpi_cpufreq_resume(struct cpufreq_policy *policy) { - struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu]; - + struct acpi_cpufreq_data *data = drv_data[policy->cpu]; dprintk("acpi_cpufreq_resume\n"); data->resume = 1; - return (0); + return 0; } - -static struct freq_attr* acpi_cpufreq_attr[] = { +static struct freq_attr *acpi_cpufreq_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, NULL, }; static struct cpufreq_driver acpi_cpufreq_driver = { - .verify = acpi_cpufreq_verify, - .target = acpi_cpufreq_target, - .init = acpi_cpufreq_cpu_init, - .exit = acpi_cpufreq_cpu_exit, - .resume = acpi_cpufreq_resume, - .name = "acpi-cpufreq", - .owner = THIS_MODULE, - .attr = acpi_cpufreq_attr, + .verify = acpi_cpufreq_verify, + .target = acpi_cpufreq_target, + .init = acpi_cpufreq_cpu_init, + .exit = acpi_cpufreq_cpu_exit, + .resume = acpi_cpufreq_resume, + .name = "acpi-cpufreq", + .owner = THIS_MODULE, + .attr = acpi_cpufreq_attr, }; - -static int __init -acpi_cpufreq_init (void) +static int __init acpi_cpufreq_init(void) { dprintk("acpi_cpufreq_init\n"); - acpi_cpufreq_early_init_acpi(); + acpi_cpufreq_early_init(); return cpufreq_register_driver(&acpi_cpufreq_driver); } - -static void __exit -acpi_cpufreq_exit (void) +static void __exit acpi_cpufreq_exit(void) { - unsigned int i; + unsigned int i; dprintk("acpi_cpufreq_exit\n"); cpufreq_unregister_driver(&acpi_cpufreq_driver); @@ -627,7 +805,9 @@ acpi_cpufreq_exit (void) } module_param(acpi_pstate_strict, uint, 0644); -MODULE_PARM_DESC(acpi_pstate_strict, "value 0 or non-zero. non-zero -> strict ACPI checks are performed during frequency changes."); +MODULE_PARM_DESC(acpi_pstate_strict, + "value 0 or non-zero. non-zero -> strict ACPI checks are " + "performed during frequency changes."); late_initcall(acpi_cpufreq_init); module_exit(acpi_cpufreq_exit); diff --git a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c index 92afa3bc84f1..6667e9cceb9f 100644 --- a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c +++ b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c @@ -447,7 +447,6 @@ static int __init cpufreq_gx_init(void) int ret; struct gxfreq_params *params; struct pci_dev *gx_pci; - u32 class_rev; /* Test if we have the right hardware */ if ((gx_pci = gx_detect_chipset()) == NULL) @@ -472,8 +471,7 @@ static int __init cpufreq_gx_init(void) pci_read_config_byte(params->cs55x0, PCI_PMER2, &(params->pci_pmer2)); pci_read_config_byte(params->cs55x0, PCI_MODON, &(params->on_duration)); pci_read_config_byte(params->cs55x0, PCI_MODOFF, &(params->off_duration)); - pci_read_config_dword(params->cs55x0, PCI_CLASS_REVISION, &class_rev); - params->pci_rev = class_rev && 0xff; + pci_read_config_byte(params->cs55x0, PCI_REVISION_ID, ¶ms->pci_rev); if ((ret = cpufreq_register_driver(&gx_suspmod_driver))) { kfree(params); diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c index 7233abe5d695..e940e00b96c9 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c @@ -52,6 +52,10 @@ #define CPU_EZRA_T 4 #define CPU_NEHEMIAH 5 +/* Flags */ +#define USE_ACPI_C3 (1 << 1) +#define USE_NORTHBRIDGE (1 << 2) + static int cpu_model; static unsigned int numscales=16; static unsigned int fsb; @@ -68,7 +72,7 @@ static unsigned int minmult, maxmult; static int can_scale_voltage; static struct acpi_processor *pr = NULL; static struct acpi_processor_cx *cx = NULL; -static int port22_en; +static u8 longhaul_flags; /* Module parameters */ static int scale_voltage; @@ -80,7 +84,6 @@ static int ignore_latency; /* Clock ratios multiplied by 10 */ static int clock_ratio[32]; static int eblcr_table[32]; -static unsigned int highest_speed, lowest_speed; /* kHz */ static int longhaul_version; static struct cpufreq_frequency_table *longhaul_table; @@ -178,7 +181,7 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index) safe_halt(); /* Change frequency on next halt or sleep */ wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); - if (port22_en) { + if (!cx_address) { ACPI_FLUSH_CPU_CACHE(); /* Invoke C1 */ halt(); @@ -189,7 +192,6 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index) /* Dummy op - must do something useless after P_LVL3 read */ t = inl(acpi_fadt.xpm_tmr_blk.address); } - /* Disable bus ratio bit */ local_irq_disable(); longhaul.bits.RevisionKey = longhaul.bits.RevisionID; @@ -243,15 +245,14 @@ static void longhaul_setstate(unsigned int clock_ratio_index) outb(0xFF,0xA1); /* Overkill */ outb(0xFE,0x21); /* TMR0 only */ - if (pr->flags.bm_control) { + if (longhaul_flags & USE_NORTHBRIDGE) { + /* Disable AGP and PCI arbiters */ + outb(3, 0x22); + } else if ((pr != NULL) && pr->flags.bm_control) { /* Disable bus master arbitration */ acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1, ACPI_MTX_DO_NOT_LOCK); - } else if (port22_en) { - /* Disable AGP and PCI arbiters */ - outb(3, 0x22); } - switch (longhaul_version) { /* @@ -278,22 +279,25 @@ static void longhaul_setstate(unsigned int clock_ratio_index) * to work in practice. */ case TYPE_POWERSAVER: - /* Don't allow wakeup */ - acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0, - ACPI_MTX_DO_NOT_LOCK); - do_powersaver(cx->address, clock_ratio_index); + if (longhaul_flags & USE_ACPI_C3) { + /* Don't allow wakeup */ + acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0, + ACPI_MTX_DO_NOT_LOCK); + do_powersaver(cx->address, clock_ratio_index); + } else { + do_powersaver(0, clock_ratio_index); + } break; } - if (pr->flags.bm_control) { + if (longhaul_flags & USE_NORTHBRIDGE) { + /* Enable arbiters */ + outb(0, 0x22); + } else if ((pr != NULL) && pr->flags.bm_control) { /* Enable bus master arbitration */ acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0, ACPI_MTX_DO_NOT_LOCK); - } else if (port22_en) { - /* Enable arbiters */ - outb(0, 0x22); } - outb(pic2_mask,0xA1); /* restore mask */ outb(pic1_mask,0x21); @@ -314,12 +318,12 @@ static void longhaul_setstate(unsigned int clock_ratio_index) #define ROUNDING 0xf -static int _guess(int guess) +static int _guess(int guess, int mult) { int target; - target = ((maxmult/10)*guess); - if (maxmult%10 != 0) + target = ((mult/10)*guess); + if (mult%10 != 0) target += (guess/2); target += ROUNDING/2; target &= ~ROUNDING; @@ -327,17 +331,17 @@ static int _guess(int guess) } -static int guess_fsb(void) +static int guess_fsb(int mult) { int speed = (cpu_khz/1000); int i; - int speeds[3] = { 66, 100, 133 }; + int speeds[] = { 66, 100, 133, 200 }; speed += ROUNDING/2; speed &= ~ROUNDING; - for (i=0; i<3; i++) { - if (_guess(speeds[i]) == speed) + for (i=0; i<4; i++) { + if (_guess(speeds[i], mult) == speed) return speeds[i]; } return 0; @@ -354,9 +358,7 @@ static int __init longhaul_get_ranges(void) 130, 150, 160, 140, -1, 155, -1, 145 }; unsigned int j, k = 0; union msr_longhaul longhaul; - unsigned long lo, hi; - unsigned int eblcr_fsb_table_v1[] = { 66, 133, 100, -1 }; - unsigned int eblcr_fsb_table_v2[] = { 133, 100, -1, 66 }; + int mult = 0; switch (longhaul_version) { case TYPE_LONGHAUL_V1: @@ -364,30 +366,18 @@ static int __init longhaul_get_ranges(void) /* Ugh, Longhaul v1 didn't have the min/max MSRs. Assume min=3.0x & max = whatever we booted at. */ minmult = 30; - maxmult = longhaul_get_cpu_mult(); - rdmsr (MSR_IA32_EBL_CR_POWERON, lo, hi); - invalue = (lo & (1<<18|1<<19)) >>18; - if (cpu_model==CPU_SAMUEL || cpu_model==CPU_SAMUEL2) - fsb = eblcr_fsb_table_v1[invalue]; - else - fsb = guess_fsb(); + maxmult = mult = longhaul_get_cpu_mult(); break; case TYPE_POWERSAVER: /* Ezra-T */ if (cpu_model==CPU_EZRA_T) { + minmult = 30; rdmsrl (MSR_VIA_LONGHAUL, longhaul.val); invalue = longhaul.bits.MaxMHzBR; if (longhaul.bits.MaxMHzBR4) invalue += 16; - maxmult=ezra_t_multipliers[invalue]; - - invalue = longhaul.bits.MinMHzBR; - if (longhaul.bits.MinMHzBR4 == 1) - minmult = 30; - else - minmult = ezra_t_multipliers[invalue]; - fsb = eblcr_fsb_table_v2[longhaul.bits.MaxMHzFSB]; + maxmult = mult = ezra_t_multipliers[invalue]; break; } @@ -407,21 +397,16 @@ static int __init longhaul_get_ranges(void) * But it works, so we don't grumble. */ minmult=40; - maxmult=longhaul_get_cpu_mult(); - - /* Starting with the 1.2GHz parts, theres a 200MHz bus. */ - if ((cpu_khz/1000) > 1200) - fsb = 200; - else - fsb = eblcr_fsb_table_v2[longhaul.bits.MaxMHzFSB]; + maxmult = mult = longhaul_get_cpu_mult(); break; } } + fsb = guess_fsb(mult); dprintk ("MinMult:%d.%dx MaxMult:%d.%dx\n", minmult/10, minmult%10, maxmult/10, maxmult%10); - if (fsb == -1) { + if (fsb == 0) { printk (KERN_INFO PFX "Invalid (reserved) FSB!\n"); return -EINVAL; } @@ -583,6 +568,10 @@ static int enable_arbiter_disable(void) if (dev == NULL) { reg = 0x76; dev = pci_find_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_862X_0, NULL); + /* Find CN400 V-Link host bridge */ + if (dev == NULL) + dev = pci_find_device(PCI_VENDOR_ID_VIA, 0x7259, NULL); + } if (dev != NULL) { /* Enable access to port 0x22 */ @@ -687,27 +676,32 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) /* Find ACPI data for processor */ acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, &longhaul_walk_callback, NULL, (void *)&pr); - if (pr == NULL) - goto err_acpi; - if (longhaul_version == TYPE_POWERSAVER) { - /* Check ACPI support for C3 state */ + /* Check ACPI support for C3 state */ + if ((pr != NULL) && (longhaul_version == TYPE_POWERSAVER)) { cx = &pr->power.states[ACPI_STATE_C3]; if (cx->address > 0 && (cx->latency <= 1000 || ignore_latency != 0) ) { + longhaul_flags |= USE_ACPI_C3; goto print_support_type; } } + /* Check if northbridge is friendly */ + if (enable_arbiter_disable()) { + longhaul_flags |= USE_NORTHBRIDGE; + goto print_support_type; + } + + /* No ACPI C3 or we can't use it */ /* Check ACPI support for bus master arbiter disable */ - if (!pr->flags.bm_control) { - if (enable_arbiter_disable()) { - port22_en = 1; - } else { - goto err_acpi; - } + if ((pr == NULL) || !(pr->flags.bm_control)) { + printk(KERN_ERR PFX + "No ACPI support. Unsupported northbridge.\n"); + return -ENODEV; } + print_support_type: - if (!port22_en) { + if (!(longhaul_flags & USE_NORTHBRIDGE)) { printk (KERN_INFO PFX "Using ACPI support.\n"); } else { printk (KERN_INFO PFX "Using northbridge support.\n"); @@ -732,10 +726,6 @@ print_support_type: cpufreq_frequency_table_get_attr(longhaul_table, policy->cpu); return 0; - -err_acpi: - printk(KERN_ERR PFX "No ACPI support. No VT8601 or VT8623 northbridge. Aborting.\n"); - return -ENODEV; } static int __devexit longhaul_cpu_exit(struct cpufreq_policy *policy) @@ -770,8 +760,8 @@ static int __init longhaul_init(void) #ifdef CONFIG_SMP if (num_online_cpus() > 1) { - return -ENODEV; printk(KERN_ERR PFX "More than 1 CPU detected, longhaul disabled.\n"); + return -ENODEV; } #endif #ifdef CONFIG_X86_IO_APIC @@ -783,8 +773,10 @@ static int __init longhaul_init(void) switch (c->x86_model) { case 6 ... 9: return cpufreq_register_driver(&longhaul_driver); + case 10: + printk(KERN_ERR PFX "Use acpi-cpufreq driver for VIA C7\n"); default: - printk (KERN_INFO PFX "Unknown VIA CPU. Contact davej@codemonkey.org.uk\n"); + ;; } return -ENODEV; diff --git a/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c b/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c index 304d2eaa4a1b..4786fedca6eb 100644 --- a/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c @@ -51,7 +51,6 @@ enum { static int has_N44_O17_errata[NR_CPUS]; -static int has_N60_errata[NR_CPUS]; static unsigned int stock_freq; static struct cpufreq_driver p4clockmod_driver; static unsigned int cpufreq_p4_get(unsigned int cpu); @@ -163,29 +162,27 @@ static int cpufreq_p4_verify(struct cpufreq_policy *policy) static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) { - if ((c->x86 == 0x06) && (c->x86_model == 0x09)) { - /* Pentium M (Banias) */ - printk(KERN_WARNING PFX "Warning: Pentium M detected. " - "The speedstep_centrino module offers voltage scaling" - " in addition of frequency scaling. You should use " - "that instead of p4-clockmod, if possible.\n"); - return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PM); - } - - if ((c->x86 == 0x06) && (c->x86_model == 0x0D)) { - /* Pentium M (Dothan) */ - printk(KERN_WARNING PFX "Warning: Pentium M detected. " - "The speedstep_centrino module offers voltage scaling" - " in addition of frequency scaling. You should use " - "that instead of p4-clockmod, if possible.\n"); - /* on P-4s, the TSC runs with constant frequency independent whether - * throttling is active or not. */ - p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; - return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PM); + if (c->x86 == 0x06) { + if (cpu_has(c, X86_FEATURE_EST)) + printk(KERN_WARNING PFX "Warning: EST-capable CPU detected. " + "The acpi-cpufreq module offers voltage scaling" + " in addition of frequency scaling. You should use " + "that instead of p4-clockmod, if possible.\n"); + switch (c->x86_model) { + case 0x0E: /* Core */ + case 0x0F: /* Core Duo */ + p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; + return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PCORE); + case 0x0D: /* Pentium M (Dothan) */ + p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; + /* fall through */ + case 0x09: /* Pentium M (Banias) */ + return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PM); + } } if (c->x86 != 0xF) { - printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to <linux@brodo.de>\n"); + printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to <cpufreq@lists.linux.org.uk>\n"); return 0; } @@ -226,12 +223,6 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) case 0x0f12: has_N44_O17_errata[policy->cpu] = 1; dprintk("has errata -- disabling low frequencies\n"); - break; - - case 0x0f29: - has_N60_errata[policy->cpu] = 1; - dprintk("has errata -- disabling frequencies lower than 2ghz\n"); - break; } /* get max frequency */ @@ -243,8 +234,6 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) for (i=1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) { if ((i<2) && (has_N44_O17_errata[policy->cpu])) p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID; - else if (has_N60_errata[policy->cpu] && ((stock_freq * i)/8) < 2000000) - p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID; else p4clockmod_table[i].frequency = (stock_freq * i)/8; } diff --git a/arch/i386/kernel/cpu/cpufreq/sc520_freq.c b/arch/i386/kernel/cpu/cpufreq/sc520_freq.c index ef457d50f4ac..b8fb4b521c62 100644 --- a/arch/i386/kernel/cpu/cpufreq/sc520_freq.c +++ b/arch/i386/kernel/cpu/cpufreq/sc520_freq.c @@ -153,6 +153,7 @@ static struct cpufreq_driver sc520_freq_driver = { static int __init sc520_freq_init(void) { struct cpuinfo_x86 *c = cpu_data; + int err; /* Test if we have the right hardware */ if(c->x86_vendor != X86_VENDOR_AMD || @@ -166,7 +167,11 @@ static int __init sc520_freq_init(void) return -ENOMEM; } - return cpufreq_register_driver(&sc520_freq_driver); + err = cpufreq_register_driver(&sc520_freq_driver); + if (err) + iounmap(cpuctl); + + return err; } diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c index e8993baf3d14..f43b987f952b 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c @@ -36,6 +36,7 @@ #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) +#define INTEL_MSR_RANGE (0xffff) struct cpu_id { @@ -379,6 +380,7 @@ static int centrino_cpu_early_init_acpi(void) } +#ifdef CONFIG_SMP /* * Some BIOSes do SW_ANY coordination internally, either set it up in hw * or do it in BIOS firmware and won't inform about it to OS. If not @@ -392,7 +394,6 @@ static int sw_any_bug_found(struct dmi_system_id *d) return 0; } - static struct dmi_system_id sw_any_bug_dmi_table[] = { { .callback = sw_any_bug_found, @@ -405,7 +406,7 @@ static struct dmi_system_id sw_any_bug_dmi_table[] = { }, { } }; - +#endif /* * centrino_cpu_init_acpi - register with ACPI P-States library @@ -463,8 +464,9 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy) } for (i=0; i<p->state_count; i++) { - if (p->states[i].control != p->states[i].status) { - dprintk("Different control (%llu) and status values (%llu)\n", + if ((p->states[i].control & INTEL_MSR_RANGE) != + (p->states[i].status & INTEL_MSR_RANGE)) { + dprintk("Different MSR bits in control (%llu) and status (%llu)\n", p->states[i].control, p->states[i].status); result = -EINVAL; goto err_unreg; @@ -500,7 +502,7 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy) } for (i=0; i<p->state_count; i++) { - centrino_model[cpu]->op_points[i].index = p->states[i].control; + centrino_model[cpu]->op_points[i].index = p->states[i].control & INTEL_MSR_RANGE; centrino_model[cpu]->op_points[i].frequency = p->states[i].core_frequency * 1000; dprintk("adding state %i with frequency %u and control value %04x\n", i, centrino_model[cpu]->op_points[i].frequency, centrino_model[cpu]->op_points[i].index); @@ -531,6 +533,9 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy) /* notify BIOS that we exist */ acpi_processor_notify_smm(THIS_MODULE); + printk("speedstep-centrino with X86_SPEEDSTEP_CENTRINO_ACPI " + "config is deprecated.\n " + "Use X86_ACPI_CPUFREQ (acpi-cpufreq) instead.\n" ); return 0; diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c index 4f46cac155c4..d59277c00911 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c @@ -123,6 +123,36 @@ static unsigned int pentiumM_get_frequency(void) return (msr_tmp * 100 * 1000); } +static unsigned int pentium_core_get_frequency(void) +{ + u32 fsb = 0; + u32 msr_lo, msr_tmp; + + rdmsr(MSR_FSB_FREQ, msr_lo, msr_tmp); + /* see table B-2 of 25366920.pdf */ + switch (msr_lo & 0x07) { + case 5: + fsb = 100000; + break; + case 1: + fsb = 133333; + break; + case 3: + fsb = 166667; + break; + default: + printk(KERN_ERR "PCORE - MSR_FSB_FREQ undefined value"); + } + + rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); + dprintk("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp); + + msr_tmp = (msr_lo >> 22) & 0x1f; + dprintk("bits 22-26 are 0x%x, speed is %u\n", msr_tmp, (msr_tmp * fsb)); + + return (msr_tmp * fsb); +} + static unsigned int pentium4_get_frequency(void) { @@ -174,6 +204,8 @@ static unsigned int pentium4_get_frequency(void) unsigned int speedstep_get_processor_frequency(unsigned int processor) { switch (processor) { + case SPEEDSTEP_PROCESSOR_PCORE: + return pentium_core_get_frequency(); case SPEEDSTEP_PROCESSOR_PM: return pentiumM_get_frequency(); case SPEEDSTEP_PROCESSOR_P4D: diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h index b735429c50b4..b11bcc608cac 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h @@ -22,6 +22,7 @@ * the speedstep_get_processor_frequency() call. */ #define SPEEDSTEP_PROCESSOR_PM 0xFFFFFF03 /* Pentium M */ #define SPEEDSTEP_PROCESSOR_P4D 0xFFFFFF04 /* desktop P4 */ +#define SPEEDSTEP_PROCESSOR_PCORE 0xFFFFFF05 /* Core */ /* speedstep states -- only two of them */ diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c b/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c index c28333d53646..ff0d89806114 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c @@ -360,9 +360,6 @@ static int __init speedstep_init(void) case SPEEDSTEP_PROCESSOR_PIII_C: case SPEEDSTEP_PROCESSOR_PIII_C_EARLY: break; - case SPEEDSTEP_PROCESSOR_P4M: - printk(KERN_INFO "speedstep-smi: you're trying to use this cpufreq driver on a Pentium 4-based CPU. Most likely it will not work.\n"); - break; default: speedstep_processor = 0; } diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c index db6dd20c3589..51130b39cd2e 100644 --- a/arch/i386/kernel/cpuid.c +++ b/arch/i386/kernel/cpuid.c @@ -116,7 +116,7 @@ static ssize_t cpuid_read(struct file *file, char __user *buf, char __user *tmp = buf; u32 data[4]; u32 reg = *ppos; - int cpu = iminor(file->f_dentry->d_inode); + int cpu = iminor(file->f_path.dentry->d_inode); if (count % 16) return -EINVAL; /* Invalid chunk size */ @@ -134,7 +134,7 @@ static ssize_t cpuid_read(struct file *file, char __user *buf, static int cpuid_open(struct inode *inode, struct file *file) { - unsigned int cpu = iminor(file->f_dentry->d_inode); + unsigned int cpu = iminor(file->f_path.dentry->d_inode); struct cpuinfo_x86 *c = &(cpu_data)[cpu]; if (cpu >= NR_CPUS || !cpu_online(cpu)) diff --git a/arch/i386/kernel/e820.c b/arch/i386/kernel/e820.c index 2f7d0a92fd7c..f391abcf7da9 100644 --- a/arch/i386/kernel/e820.c +++ b/arch/i386/kernel/e820.c @@ -668,7 +668,7 @@ void __init register_bootmem_low_pages(unsigned long max_low_pfn) } } -void __init register_memory(void) +void __init e820_register_memory(void) { unsigned long gapstart, gapsize, round; unsigned long long last; diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c index b92c7f0a358a..8f9c624ace6f 100644 --- a/arch/i386/kernel/efi.c +++ b/arch/i386/kernel/efi.c @@ -473,6 +473,70 @@ static inline void __init check_range_for_systab(efi_memory_desc_t *md) } /* + * Wrap all the virtual calls in a way that forces the parameters on the stack. + */ + +#define efi_call_virt(f, args...) \ + ((efi_##f##_t __attribute__((regparm(0)))*)efi.systab->runtime->f)(args) + +static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) +{ + return efi_call_virt(get_time, tm, tc); +} + +static efi_status_t virt_efi_set_time (efi_time_t *tm) +{ + return efi_call_virt(set_time, tm); +} + +static efi_status_t virt_efi_get_wakeup_time (efi_bool_t *enabled, + efi_bool_t *pending, + efi_time_t *tm) +{ + return efi_call_virt(get_wakeup_time, enabled, pending, tm); +} + +static efi_status_t virt_efi_set_wakeup_time (efi_bool_t enabled, + efi_time_t *tm) +{ + return efi_call_virt(set_wakeup_time, enabled, tm); +} + +static efi_status_t virt_efi_get_variable (efi_char16_t *name, + efi_guid_t *vendor, u32 *attr, + unsigned long *data_size, void *data) +{ + return efi_call_virt(get_variable, name, vendor, attr, data_size, data); +} + +static efi_status_t virt_efi_get_next_variable (unsigned long *name_size, + efi_char16_t *name, + efi_guid_t *vendor) +{ + return efi_call_virt(get_next_variable, name_size, name, vendor); +} + +static efi_status_t virt_efi_set_variable (efi_char16_t *name, + efi_guid_t *vendor, + unsigned long attr, + unsigned long data_size, void *data) +{ + return efi_call_virt(set_variable, name, vendor, attr, data_size, data); +} + +static efi_status_t virt_efi_get_next_high_mono_count (u32 *count) +{ + return efi_call_virt(get_next_high_mono_count, count); +} + +static void virt_efi_reset_system (int reset_type, efi_status_t status, + unsigned long data_size, + efi_char16_t *data) +{ + efi_call_virt(reset_system, reset_type, status, data_size, data); +} + +/* * This function will switch the EFI runtime services to virtual mode. * Essentially, look through the EFI memmap and map every region that * has the runtime attribute bit set in its memory descriptor and update @@ -525,22 +589,15 @@ void __init efi_enter_virtual_mode(void) * pointers in the runtime service table to the new virtual addresses. */ - efi.get_time = (efi_get_time_t *) efi.systab->runtime->get_time; - efi.set_time = (efi_set_time_t *) efi.systab->runtime->set_time; - efi.get_wakeup_time = (efi_get_wakeup_time_t *) - efi.systab->runtime->get_wakeup_time; - efi.set_wakeup_time = (efi_set_wakeup_time_t *) - efi.systab->runtime->set_wakeup_time; - efi.get_variable = (efi_get_variable_t *) - efi.systab->runtime->get_variable; - efi.get_next_variable = (efi_get_next_variable_t *) - efi.systab->runtime->get_next_variable; - efi.set_variable = (efi_set_variable_t *) - efi.systab->runtime->set_variable; - efi.get_next_high_mono_count = (efi_get_next_high_mono_count_t *) - efi.systab->runtime->get_next_high_mono_count; - efi.reset_system = (efi_reset_system_t *) - efi.systab->runtime->reset_system; + efi.get_time = virt_efi_get_time; + efi.set_time = virt_efi_set_time; + efi.get_wakeup_time = virt_efi_get_wakeup_time; + efi.set_wakeup_time = virt_efi_set_wakeup_time; + efi.get_variable = virt_efi_get_variable; + efi.get_next_variable = virt_efi_get_next_variable; + efi.set_variable = virt_efi_set_variable; + efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count; + efi.reset_system = virt_efi_reset_system; } void __init diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index de34b7fed3c1..5e47683fc63a 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -302,12 +302,16 @@ sysenter_past_esp: pushl $(__USER_CS) CFI_ADJUST_CFA_OFFSET 4 /*CFI_REL_OFFSET cs, 0*/ +#ifndef CONFIG_COMPAT_VDSO /* * Push current_thread_info()->sysenter_return to the stack. * A tiny bit of offset fixup is necessary - 4*4 means the 4 words * pushed above; +8 corresponds to copy_thread's esp0 setting. */ pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) +#else + pushl $SYSENTER_RETURN +#endif CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET eip, 0 @@ -979,38 +983,6 @@ ENTRY(spurious_interrupt_bug) jmp error_code CFI_ENDPROC -#ifdef CONFIG_STACK_UNWIND -ENTRY(arch_unwind_init_running) - CFI_STARTPROC - movl 4(%esp), %edx - movl (%esp), %ecx - leal 4(%esp), %eax - movl %ebx, PT_EBX(%edx) - xorl %ebx, %ebx - movl %ebx, PT_ECX(%edx) - movl %ebx, PT_EDX(%edx) - movl %esi, PT_ESI(%edx) - movl %edi, PT_EDI(%edx) - movl %ebp, PT_EBP(%edx) - movl %ebx, PT_EAX(%edx) - movl $__USER_DS, PT_DS(%edx) - movl $__USER_DS, PT_ES(%edx) - movl $0, PT_GS(%edx) - movl %ebx, PT_ORIG_EAX(%edx) - movl %ecx, PT_EIP(%edx) - movl 12(%esp), %ecx - movl $__KERNEL_CS, PT_CS(%edx) - movl %ebx, PT_EFLAGS(%edx) - movl %eax, PT_OLDESP(%edx) - movl 8(%esp), %eax - movl %ecx, 8(%esp) - movl PT_EBX(%edx), %ebx - movl $__KERNEL_DS, PT_OLDSS(%edx) - jmpl *%eax - CFI_ENDPROC -ENDPROC(arch_unwind_init_running) -#endif - ENTRY(kernel_thread_helper) pushl $0 # fake return address for unwinder CFI_STARTPROC diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index e21dcde0790e..6a3875f81a0a 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -1227,26 +1227,32 @@ static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 } static int __assign_irq_vector(int irq) { - static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; - int vector; + static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; + int vector, offset, i; BUG_ON((unsigned)irq >= NR_IRQ_VECTORS); if (irq_vector[irq] > 0) return irq_vector[irq]; - current_vector += 8; - if (current_vector == SYSCALL_VECTOR) - current_vector += 8; - - if (current_vector >= FIRST_SYSTEM_VECTOR) { - offset++; - if (!(offset % 8)) - return -ENOSPC; - current_vector = FIRST_DEVICE_VECTOR + offset; - } - vector = current_vector; + offset = current_offset; +next: + vector += 8; + if (vector >= FIRST_SYSTEM_VECTOR) { + offset = (offset + 1) % 8; + vector = FIRST_DEVICE_VECTOR + offset; + } + if (vector == current_vector) + return -ENOSPC; + if (vector == SYSCALL_VECTOR) + goto next; + for (i = 0; i < NR_IRQ_VECTORS; i++) + if (irq_vector[i] == vector) + goto next; + + current_vector = vector; + current_offset = offset; irq_vector[irq] = vector; return vector; @@ -2485,7 +2491,7 @@ device_initcall(ioapic_init_sysfs); int create_irq(void) { /* Allocate an unused irq */ - int irq, new, vector; + int irq, new, vector = 0; unsigned long flags; irq = -ENOSPC; diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c index 972346604f9d..c8fa13721bcb 100644 --- a/arch/i386/kernel/microcode.c +++ b/arch/i386/kernel/microcode.c @@ -1,7 +1,7 @@ /* * Intel CPU Microcode Update Driver for Linux * - * Copyright (C) 2000-2004 Tigran Aivazian + * Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk> * 2006 Shaohua Li <shaohua.li@intel.com> * * This driver allows to upgrade microcode on Intel processors @@ -92,7 +92,7 @@ #include <asm/processor.h> MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver"); -MODULE_AUTHOR("Tigran Aivazian <tigran@veritas.com>"); +MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); MODULE_LICENSE("GPL"); #define MICROCODE_VERSION "1.14a" @@ -722,7 +722,7 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) return NOTIFY_OK; } -static struct notifier_block mc_cpu_notifier = { +static struct notifier_block __cpuinitdata mc_cpu_notifier = { .notifier_call = mc_cpu_callback, }; @@ -752,7 +752,7 @@ static int __init microcode_init (void) register_hotcpu_notifier(&mc_cpu_notifier); printk(KERN_INFO - "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@veritas.com>\n"); + "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@aivazian.fsnet.co.uk>\n"); return 0; } diff --git a/arch/i386/kernel/module.c b/arch/i386/kernel/module.c index d7d9c8b23f72..3db0a5442eb1 100644 --- a/arch/i386/kernel/module.c +++ b/arch/i386/kernel/module.c @@ -21,6 +21,7 @@ #include <linux/fs.h> #include <linux/string.h> #include <linux/kernel.h> +#include <linux/bug.h> #if 0 #define DEBUGP printk @@ -141,10 +142,11 @@ int module_finalize(const Elf_Ehdr *hdr, apply_paravirt(pseg, pseg + para->sh_size); } - return 0; + return module_bug_finalize(hdr, sechdrs, me); } void module_arch_cleanup(struct module *mod) { alternatives_smp_module_del(mod); + module_bug_cleanup(mod); } diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index 2ce67228dff8..49bff3596bff 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -36,7 +36,7 @@ /* Have we found an MP table */ int smp_found_config; -unsigned int __initdata maxcpus = NR_CPUS; +unsigned int __cpuinitdata maxcpus = NR_CPUS; /* * Various Linux-internal data structures created from the @@ -102,9 +102,9 @@ static int __init mpf_checksum(unsigned char *mp, int len) */ static int mpc_record; -static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __initdata; +static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __cpuinitdata; -static void __devinit MP_processor_info (struct mpc_config_processor *m) +static void __cpuinit MP_processor_info (struct mpc_config_processor *m) { int ver, apicid; physid_mask_t phys_cpu; @@ -822,7 +822,7 @@ void __init mp_register_lapic_address(u64 address) Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid); } -void __devinit mp_register_lapic (u8 id, u8 enabled) +void __cpuinit mp_register_lapic (u8 id, u8 enabled) { struct mpc_config_processor processor; int boot_cpu = 0; diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c index 1d1a56cae340..4a472a17d1c6 100644 --- a/arch/i386/kernel/msr.c +++ b/arch/i386/kernel/msr.c @@ -172,7 +172,7 @@ static ssize_t msr_read(struct file *file, char __user * buf, u32 __user *tmp = (u32 __user *) buf; u32 data[2]; u32 reg = *ppos; - int cpu = iminor(file->f_dentry->d_inode); + int cpu = iminor(file->f_path.dentry->d_inode); int err; if (count % 8) @@ -196,7 +196,7 @@ static ssize_t msr_write(struct file *file, const char __user *buf, const u32 __user *tmp = (const u32 __user *)buf; u32 data[2]; u32 reg = *ppos; - int cpu = iminor(file->f_dentry->d_inode); + int cpu = iminor(file->f_path.dentry->d_inode); int err; if (count % 8) @@ -216,7 +216,7 @@ static ssize_t msr_write(struct file *file, const char __user *buf, static int msr_open(struct inode *inode, struct file *file) { - unsigned int cpu = iminor(file->f_dentry->d_inode); + unsigned int cpu = iminor(file->f_path.dentry->d_inode); struct cpuinfo_x86 *c = &(cpu_data)[cpu]; if (cpu >= NR_CPUS || !cpu_online(cpu)) diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index f5bc7e1be801..1a6f8bb8881c 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -195,6 +195,8 @@ static __cpuinit inline int nmi_known_cpu(void) return 0; } +static int endflag __initdata = 0; + #ifdef CONFIG_SMP /* The performance counters used by NMI_LOCAL_APIC don't trigger when * the CPU is idle. To make sure the NMI watchdog really ticks on all @@ -202,7 +204,6 @@ static __cpuinit inline int nmi_known_cpu(void) */ static __init void nmi_cpu_busy(void *data) { - volatile int *endflag = data; local_irq_enable_in_hardirq(); /* Intentionally don't use cpu_relax here. This is to make sure that the performance counter really ticks, @@ -210,14 +211,13 @@ static __init void nmi_cpu_busy(void *data) pause instruction. On a real HT machine this is fine because all other CPUs are busy with "useless" delay loops and don't care if they get somewhat less cycles. */ - while (*endflag == 0) - barrier(); + while (endflag == 0) + mb(); } #endif static int __init check_nmi_watchdog(void) { - volatile int endflag = 0; unsigned int *prev_nmi_count; int cpu; @@ -310,13 +310,7 @@ static int __init setup_nmi_watchdog(char *str) if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE)) return 0; - /* - * If any other x86 CPU has a local APIC, then - * please test the NMI stuff there and send me the - * missing bits. Right now Intel P6/P4 and AMD K7 only. - */ - if ((nmi == NMI_LOCAL_APIC) && (nmi_known_cpu() == 0)) - return 0; /* no lapic support */ + nmi_watchdog = nmi; return 1; } diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c index 3dceab5828f1..e55fd05da0f5 100644 --- a/arch/i386/kernel/paravirt.c +++ b/arch/i386/kernel/paravirt.c @@ -566,4 +566,11 @@ struct paravirt_ops paravirt_ops = { .irq_enable_sysexit = native_irq_enable_sysexit, .iret = native_iret, }; -EXPORT_SYMBOL(paravirt_ops); + +/* + * NOTE: CONFIG_PARAVIRT is experimental and the paravirt_ops + * semantics are subject to change. Hence we only do this + * internal-only export of this, until it gets sorted out and + * all lowlevel CPU ops used by modules are separately exported. + */ +EXPORT_SYMBOL_GPL(paravirt_ops); diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 99308510a17c..c641056233a6 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -102,7 +102,12 @@ void default_idle(void) { if (!hlt_counter && boot_cpu_data.hlt_works_ok) { current_thread_info()->status &= ~TS_POLLING; - smp_mb__after_clear_bit(); + /* + * TS_POLLING-cleared state must be visible before we + * test NEED_RESCHED: + */ + smp_mb(); + local_irq_disable(); if (!need_resched()) safe_halt(); /* enables interrupts racelessly */ diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index f3f94ac5736a..af8aabe85800 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -45,7 +45,7 @@ /* * Offset of eflags on child stack.. */ -#define EFL_OFFSET ((EFL-2)*4-sizeof(struct pt_regs)) +#define EFL_OFFSET offsetof(struct pt_regs, eflags) static inline struct pt_regs *get_child_regs(struct task_struct *task) { @@ -54,24 +54,24 @@ static inline struct pt_regs *get_child_regs(struct task_struct *task) } /* - * this routine will get a word off of the processes privileged stack. - * the offset is how far from the base addr as stored in the TSS. - * this routine assumes that all the privileged stacks are in our + * This routine will get a word off of the processes privileged stack. + * the offset is bytes into the pt_regs structure on the stack. + * This routine assumes that all the privileged stacks are in our * data space. */ static inline int get_stack_long(struct task_struct *task, int offset) { unsigned char *stack; - stack = (unsigned char *)task->thread.esp0; + stack = (unsigned char *)task->thread.esp0 - sizeof(struct pt_regs); stack += offset; return (*((int *)stack)); } /* - * this routine will put a word on the processes privileged stack. - * the offset is how far from the base addr as stored in the TSS. - * this routine assumes that all the privileged stacks are in our + * This routine will put a word on the processes privileged stack. + * the offset is bytes into the pt_regs structure on the stack. + * This routine assumes that all the privileged stacks are in our * data space. */ static inline int put_stack_long(struct task_struct *task, int offset, @@ -79,7 +79,7 @@ static inline int put_stack_long(struct task_struct *task, int offset, { unsigned char * stack; - stack = (unsigned char *) task->thread.esp0; + stack = (unsigned char *)task->thread.esp0 - sizeof(struct pt_regs); stack += offset; *(unsigned long *) stack = data; return 0; @@ -114,7 +114,7 @@ static int putreg(struct task_struct *child, } if (regno > ES*4) regno -= 1*4; - put_stack_long(child, regno - sizeof(struct pt_regs), value); + put_stack_long(child, regno, value); return 0; } @@ -137,7 +137,6 @@ static unsigned long getreg(struct task_struct *child, default: if (regno > ES*4) regno -= 1*4; - regno = regno - sizeof(struct pt_regs); retval &= get_stack_long(child, regno); } return retval; diff --git a/arch/i386/kernel/quirks.c b/arch/i386/kernel/quirks.c index a01320a7b636..34874c398b44 100644 --- a/arch/i386/kernel/quirks.c +++ b/arch/i386/kernel/quirks.c @@ -10,13 +10,38 @@ #if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI) static void __devinit verify_quirk_intel_irqbalance(struct pci_dev *dev) { + u8 config, rev; + u32 word; + + /* BIOS may enable hardware IRQ balancing for + * E7520/E7320/E7525(revision ID 0x9 and below) + * based platforms. + * For those platforms, make sure that the genapic is set to 'flat' + */ + pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); + if (rev > 0x9) + return; + + /* enable access to config space*/ + pci_read_config_byte(dev, 0xf4, &config); + pci_write_config_byte(dev, 0xf4, config|0x2); + + /* read xTPR register */ + raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word); + + if (!(word & (1 << 13))) { #ifdef CONFIG_X86_64 - if (genapic != &apic_flat) - panic("APIC mode must be flat on this system\n"); + if (genapic != &apic_flat) + panic("APIC mode must be flat on this system\n"); #elif defined(CONFIG_X86_GENERICARCH) - if (genapic != &apic_default) - panic("APIC mode must be default(flat) on this system. Use apic=default\n"); + if (genapic != &apic_default) + panic("APIC mode must be default(flat) on this system. Use apic=default\n"); #endif + } + + /* put back the original value for config space*/ + if (!(config & 0x2)) + pci_write_config_byte(dev, 0xf4, config); } void __init quirk_intel_irqbalance(void) diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 79df6e612dbd..4b31ad70c1ac 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -77,7 +77,7 @@ extern struct resource code_resource; extern struct resource data_resource; /* cpu data as detected by the assembly code in head.S */ -struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; +struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; /* common cpu data for all cpus */ struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; EXPORT_SYMBOL(boot_cpu_data); @@ -495,7 +495,7 @@ static void set_mca_bus(int x) { } #endif /* Overridden in paravirt.c if CONFIG_PARAVIRT */ -char * __attribute__((weak)) memory_setup(void) +char * __init __attribute__((weak)) memory_setup(void) { return machine_specific_memory_setup(); } @@ -639,7 +639,7 @@ void __init setup_arch(char **cmdline_p) get_smp_config(); #endif - register_memory(); + e820_register_memory(); #ifdef CONFIG_VT #if defined(CONFIG_VGA_CONSOLE) diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 4bf0e3c83b8b..8c6c8c52b95c 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -69,9 +69,7 @@ static int __devinitdata smp_b_stepping; /* Number of siblings per CPU package */ int smp_num_siblings = 1; -#ifdef CONFIG_X86_HT EXPORT_SYMBOL(smp_num_siblings); -#endif /* Last level cache ID of each logical CPU */ int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; @@ -161,7 +159,7 @@ void __init smp_alloc_memory(void) * a given CPU */ -static void __devinit smp_store_cpu_info(int id) +static void __cpuinit smp_store_cpu_info(int id) { struct cpuinfo_x86 *c = cpu_data + id; @@ -229,7 +227,7 @@ static struct { atomic_t count_start; atomic_t count_stop; unsigned long long values[NR_CPUS]; -} tsc __initdata = { +} tsc __cpuinitdata = { .start_flag = ATOMIC_INIT(0), .count_start = ATOMIC_INIT(0), .count_stop = ATOMIC_INIT(0), @@ -334,7 +332,7 @@ static void __init synchronize_tsc_bp(void) printk("passed.\n"); } -static void __init synchronize_tsc_ap(void) +static void __cpuinit synchronize_tsc_ap(void) { int i; @@ -366,7 +364,7 @@ extern void calibrate_delay(void); static atomic_t init_deasserted; -static void __devinit smp_callin(void) +static void __cpuinit smp_callin(void) { int cpuid, phys_id; unsigned long timeout; @@ -540,7 +538,7 @@ set_cpu_sibling_map(int cpu) /* * Activate a secondary processor. */ -static void __devinit start_secondary(void *unused) +static void __cpuinit start_secondary(void *unused) { /* * Don't put *anything* before secondary_cpu_init(), SMP @@ -598,6 +596,12 @@ static void __devinit start_secondary(void *unused) void __devinit initialize_secondary(void) { /* + * switch to the per CPU GDT we already set up + * in do_boot_cpu() + */ + cpu_set_gdt(current_thread_info()->cpu); + + /* * We don't actually need to load the full TSS, * basically just the stack pointer and the eip. */ @@ -933,7 +937,7 @@ static inline struct task_struct * alloc_idle_task(int cpu) #define alloc_idle_task(cpu) fork_idle(cpu) #endif -static int __devinit do_boot_cpu(int apicid, int cpu) +static int __cpuinit do_boot_cpu(int apicid, int cpu) /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad * (ie clustered apic addressing mode), this is a LOGICAL apic ID. @@ -974,9 +978,6 @@ static int __devinit do_boot_cpu(int apicid, int cpu) /* Stack for startup_32 can be just as for start_secondary onwards */ stack_start.esp = (void *) idle->thread.esp; - start_pda = cpu_pda(cpu); - cpu_gdt_descr = per_cpu(cpu_gdt_descr, cpu); - irq_ctx_init(cpu); x86_cpu_to_apicid[cpu] = apicid; @@ -1118,7 +1119,7 @@ static int __cpuinit __smp_prepare_cpu(int cpu) /* init low mem mapping */ clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, - KERNEL_PGD_PTRS); + min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); flush_tlb_all(); schedule_work(&info.task); wait_for_completion(&done); @@ -1434,7 +1435,7 @@ void __cpu_die(unsigned int cpu) } #endif /* CONFIG_HOTPLUG_CPU */ -int __devinit __cpu_up(unsigned int cpu) +int __cpuinit __cpu_up(unsigned int cpu) { #ifdef CONFIG_HOTPLUG_CPU int ret=0; diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c index 7de9117b5a3a..5da744204d10 100644 --- a/arch/i386/kernel/sysenter.c +++ b/arch/i386/kernel/sysenter.c @@ -79,11 +79,6 @@ int __init sysenter_setup(void) #ifdef CONFIG_COMPAT_VDSO __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY); printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO)); -#else - /* - * In the non-compat case the ELF coredumping code needs the fixmap: - */ - __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_KERNEL_RO); #endif if (!boot_cpu_has(X86_FEATURE_SEP)) { @@ -100,6 +95,7 @@ int __init sysenter_setup(void) return 0; } +#ifndef CONFIG_COMPAT_VDSO static struct page *syscall_nopage(struct vm_area_struct *vma, unsigned long adr, int *type) { @@ -146,6 +142,13 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) vma->vm_end = addr + PAGE_SIZE; /* MAYWRITE to allow gdb to COW and set breakpoints */ vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE; + /* + * Make sure the vDSO gets into every core dump. + * Dumping its contents makes post-mortem fully interpretable later + * without matching up the same kernel and hardware config to see + * what PC values meant. + */ + vma->vm_flags |= VM_ALWAYSDUMP; vma->vm_flags |= mm->def_flags; vma->vm_page_prot = protection_map[vma->vm_flags & 7]; vma->vm_ops = &syscall_vm_ops; @@ -187,3 +190,4 @@ int in_gate_area_no_task(unsigned long addr) { return 0; } +#endif diff --git a/arch/i386/kernel/trampoline.S b/arch/i386/kernel/trampoline.S index fcce0e61b0e7..2f1814c5cfd7 100644 --- a/arch/i386/kernel/trampoline.S +++ b/arch/i386/kernel/trampoline.S @@ -38,6 +38,11 @@ .data +/* We can free up trampoline after bootup if cpu hotplug is not supported. */ +#ifndef CONFIG_HOTPLUG_CPU +.section ".init.data","aw",@progbits +#endif + .code16 ENTRY(trampoline_data) diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 68de48e498ca..0efad8aeb41a 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -30,6 +30,7 @@ #include <linux/unwind.h> #include <linux/uaccess.h> #include <linux/nmi.h> +#include <linux/bug.h> #ifdef CONFIG_EISA #include <linux/ioport.h> @@ -93,11 +94,6 @@ asmlinkage void spurious_interrupt_bug(void); asmlinkage void machine_check(void); int kstack_depth_to_print = 24; -#ifdef CONFIG_STACK_UNWIND -static int call_trace = 1; -#else -#define call_trace (-1) -#endif ATOMIC_NOTIFIER_HEAD(i386die_chain); int register_die_notifier(struct notifier_block *nb) @@ -151,33 +147,6 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo, return ebp; } -struct ops_and_data { - struct stacktrace_ops *ops; - void *data; -}; - -static asmlinkage int -dump_trace_unwind(struct unwind_frame_info *info, void *data) -{ - struct ops_and_data *oad = (struct ops_and_data *)data; - int n = 0; - unsigned long sp = UNW_SP(info); - - if (arch_unw_user_mode(info)) - return -1; - while (unwind(info) == 0 && UNW_PC(info)) { - n++; - oad->ops->address(oad->data, UNW_PC(info)); - if (arch_unw_user_mode(info)) - break; - if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1)) - && sp > UNW_SP(info)) - break; - sp = UNW_SP(info); - } - return n; -} - #define MSG(msg) ops->warning(data, msg) void dump_trace(struct task_struct *task, struct pt_regs *regs, @@ -189,41 +158,6 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (!task) task = current; - if (call_trace >= 0) { - int unw_ret = 0; - struct unwind_frame_info info; - struct ops_and_data oad = { .ops = ops, .data = data }; - - if (regs) { - if (unwind_init_frame_info(&info, task, regs) == 0) - unw_ret = dump_trace_unwind(&info, &oad); - } else if (task == current) - unw_ret = unwind_init_running(&info, dump_trace_unwind, - &oad); - else { - if (unwind_init_blocked(&info, task) == 0) - unw_ret = dump_trace_unwind(&info, &oad); - } - if (unw_ret > 0) { - if (call_trace == 1 && !arch_unw_user_mode(&info)) { - ops->warning_symbol(data, - "DWARF2 unwinder stuck at %s", - UNW_PC(&info)); - if (UNW_SP(&info) >= PAGE_OFFSET) { - MSG("Leftover inexact backtrace:"); - stack = (void *)UNW_SP(&info); - if (!stack) - return; - ebp = UNW_FP(&info); - } else - MSG("Full inexact backtrace again:"); - } else if (call_trace >= 1) - return; - else - MSG("Full inexact backtrace again:"); - } else - MSG("Inexact backtrace:"); - } if (!stack) { unsigned long dummy; stack = &dummy; @@ -420,43 +354,22 @@ void show_registers(struct pt_regs *regs) printk("\n"); } -static void handle_BUG(struct pt_regs *regs) +int is_valid_bugaddr(unsigned long eip) { - unsigned long eip = regs->eip; unsigned short ud2; if (eip < PAGE_OFFSET) - return; + return 0; if (probe_kernel_address((unsigned short *)eip, ud2)) - return; - if (ud2 != 0x0b0f) - return; - - printk(KERN_EMERG "------------[ cut here ]------------\n"); - -#ifdef CONFIG_DEBUG_BUGVERBOSE - do { - unsigned short line; - char *file; - char c; - - if (probe_kernel_address((unsigned short *)(eip + 2), line)) - break; - if (probe_kernel_address((char **)(eip + 4), file) || - (unsigned long)file < PAGE_OFFSET || - probe_kernel_address(file, c)) - file = "<bad filename>"; + return 0; - printk(KERN_EMERG "kernel BUG at %s:%d!\n", file, line); - return; - } while (0); -#endif - printk(KERN_EMERG "Kernel BUG at [verbose debug info unavailable]\n"); + return ud2 == 0x0b0f; } -/* This is gone through when something in the kernel - * has done something bad and is about to be terminated. -*/ +/* + * This is gone through when something in the kernel has done something bad and + * is about to be terminated. + */ void die(const char * str, struct pt_regs * regs, long err) { static struct { @@ -488,7 +401,8 @@ void die(const char * str, struct pt_regs * regs, long err) unsigned long esp; unsigned short ss; - handle_BUG(regs); + report_bug(regs->eip); + printk(KERN_EMERG "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); #ifdef CONFIG_PREEMPT printk(KERN_EMERG "PREEMPT "); @@ -1277,19 +1191,3 @@ static int __init kstack_setup(char *s) return 1; } __setup("kstack=", kstack_setup); - -#ifdef CONFIG_STACK_UNWIND -static int __init call_trace_setup(char *s) -{ - if (strcmp(s, "old") == 0) - call_trace = -1; - else if (strcmp(s, "both") == 0) - call_trace = 0; - else if (strcmp(s, "newfallback") == 0) - call_trace = 1; - else if (strcmp(s, "new") == 2) - call_trace = 2; - return 1; -} -__setup("call_trace=", call_trace_setup); -#endif diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c index 1bbe45dca7a0..2cfc7b09b925 100644 --- a/arch/i386/kernel/tsc.c +++ b/arch/i386/kernel/tsc.c @@ -24,7 +24,7 @@ */ unsigned int tsc_khz; -int tsc_disable __cpuinitdata = 0; +int tsc_disable; #ifdef CONFIG_X86_TSC static int __init tsc_setup(char *str) diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index 56e6ad5cb045..a53c8b1854b5 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -26,6 +26,7 @@ OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") OUTPUT_ARCH(i386) ENTRY(phys_startup_32) jiffies = jiffies_64; +_proxy_pda = 0; PHDRS { text PT_LOAD FLAGS(5); /* R_E */ @@ -57,6 +58,8 @@ SECTIONS RODATA + BUG_TABLE + . = ALIGN(4); .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) { __tracedata_start = .; diff --git a/arch/i386/mach-default/setup.c b/arch/i386/mach-default/setup.c index c511705c386c..cc2f519b2f7f 100644 --- a/arch/i386/mach-default/setup.c +++ b/arch/i386/mach-default/setup.c @@ -102,7 +102,7 @@ void __init time_init_hook(void) * along the MCA bus. Use this to hook into that chain if you will need * it. **/ -void __init mca_nmi_hook(void) +void mca_nmi_hook(void) { /* If I recall correctly, there's a whole bunch of other things that * we can do to check for NMI problems, but that's all I know about diff --git a/arch/i386/mach-generic/bigsmp.c b/arch/i386/mach-generic/bigsmp.c index 33d9f93557ba..8a210fa915b5 100644 --- a/arch/i386/mach-generic/bigsmp.c +++ b/arch/i386/mach-generic/bigsmp.c @@ -45,7 +45,7 @@ static struct dmi_system_id __initdata bigsmp_dmi_table[] = { }; -static __init int probe_bigsmp(void) +static int probe_bigsmp(void) { if (def_to_bigsmp) dmi_bigsmp = 1; diff --git a/arch/i386/mach-generic/default.c b/arch/i386/mach-generic/default.c index 96c19821e47d..8685208d8512 100644 --- a/arch/i386/mach-generic/default.c +++ b/arch/i386/mach-generic/default.c @@ -18,7 +18,7 @@ #include <asm/mach-default/mach_mpparse.h> /* should be called last. */ -static __init int probe_default(void) +static int probe_default(void) { return 1; } diff --git a/arch/i386/mach-generic/es7000.c b/arch/i386/mach-generic/es7000.c index aa144d82334d..b8963a5a3b25 100644 --- a/arch/i386/mach-generic/es7000.c +++ b/arch/i386/mach-generic/es7000.c @@ -19,7 +19,7 @@ #include <asm/mach-es7000/mach_mpparse.h> #include <asm/mach-es7000/mach_wakecpu.h> -static __init int probe_es7000(void) +static int probe_es7000(void) { /* probed later in mptable/ACPI hooks */ return 0; diff --git a/arch/i386/mach-generic/summit.c b/arch/i386/mach-generic/summit.c index f7e5d66648dc..74883ccb8f73 100644 --- a/arch/i386/mach-generic/summit.c +++ b/arch/i386/mach-generic/summit.c @@ -18,7 +18,7 @@ #include <asm/mach-summit/mach_ipi.h> #include <asm/mach-summit/mach_mpparse.h> -static __init int probe_summit(void) +static int probe_summit(void) { /* probed later in mptable/ACPI hooks */ return 0; diff --git a/arch/i386/mach-visws/setup.c b/arch/i386/mach-visws/setup.c index 885c7cbfd478..233ee20907b9 100644 --- a/arch/i386/mach-visws/setup.c +++ b/arch/i386/mach-visws/setup.c @@ -6,6 +6,7 @@ #include <linux/smp.h> #include <linux/init.h> #include <linux/interrupt.h> +#include <linux/module.h> #include <asm/fixmap.h> #include <asm/arch_hooks.h> @@ -142,6 +143,8 @@ void __init time_init_hook(void) unsigned long sgivwfb_mem_phys; unsigned long sgivwfb_mem_size; +EXPORT_SYMBOL(sgivwfb_mem_phys); +EXPORT_SYMBOL(sgivwfb_mem_size); long long mem_size __initdata = 0; diff --git a/arch/i386/mach-voyager/voyager_smp.c b/arch/i386/mach-voyager/voyager_smp.c index 55428e656a3f..74aeedf277f4 100644 --- a/arch/i386/mach-voyager/voyager_smp.c +++ b/arch/i386/mach-voyager/voyager_smp.c @@ -773,6 +773,12 @@ initialize_secondary(void) #endif /* + * switch to the per CPU GDT we already set up + * in do_boot_cpu() + */ + cpu_set_gdt(current_thread_info()->cpu); + + /* * We don't actually need to load the full TSS, * basically just the stack pointer and the eip. */ diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index 103b76e56a94..e0c390d6ceb5 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c @@ -405,3 +405,31 @@ void __init set_highmem_pages_init(int bad_ppro) totalram_pages += totalhigh_pages; #endif } + +#ifdef CONFIG_MEMORY_HOTPLUG +int paddr_to_nid(u64 addr) +{ + int nid; + unsigned long pfn = PFN_DOWN(addr); + + for_each_node(nid) + if (node_start_pfn[nid] <= pfn && + pfn < node_end_pfn[nid]) + return nid; + + return -1; +} + +/* + * This function is used to ask node id BEFORE memmap and mem_section's + * initialization (pfn_to_nid() can't be used yet). + * If _PXM is not defined on ACPI's DSDT, node id must be found by this. + */ +int memory_add_physaddr_to_nid(u64 addr) +{ + int nid = paddr_to_nid(addr); + return (nid >= 0) ? nid : 0; +} + +EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); +#endif diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 84697dfc7348..c5c5ea700cc7 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -283,7 +283,7 @@ void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro) SetPageReserved(page); } -static int add_one_highpage_hotplug(struct page *page, unsigned long pfn) +static int __meminit add_one_highpage_hotplug(struct page *page, unsigned long pfn) { free_new_highpage(page); totalram_pages++; @@ -300,7 +300,7 @@ static int add_one_highpage_hotplug(struct page *page, unsigned long pfn) * has been added dynamically that would be * onlined here is in HIGHMEM */ -void online_page(struct page *page) +void __meminit online_page(struct page *page) { ClearPageReserved(page); add_one_highpage_hotplug(page, page_to_pfn(page)); @@ -673,16 +673,10 @@ void __init mem_init(void) #endif } -/* - * this is for the non-NUMA, single node SMP system case. - * Specifically, in the case of x86, we will always add - * memory to the highmem for now. - */ #ifdef CONFIG_MEMORY_HOTPLUG -#ifndef CONFIG_NEED_MULTIPLE_NODES int arch_add_memory(int nid, u64 start, u64 size) { - struct pglist_data *pgdata = &contig_page_data; + struct pglist_data *pgdata = NODE_DATA(nid); struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM; unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; @@ -694,7 +688,7 @@ int remove_memory(u64 start, u64 size) { return -EINVAL; } -#endif +EXPORT_SYMBOL_GPL(remove_memory); #endif struct kmem_cache *pgd_cache; diff --git a/arch/i386/pci/fixup.c b/arch/i386/pci/fixup.c index cde1170b01a1..8053b17ab647 100644 --- a/arch/i386/pci/fixup.c +++ b/arch/i386/pci/fixup.c @@ -115,7 +115,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_3, pci #define VIA_8363_KL133_REVISION_ID 0x81 #define VIA_8363_KM133_REVISION_ID 0x84 -static void __devinit pci_fixup_via_northbridge_bug(struct pci_dev *d) +static void pci_fixup_via_northbridge_bug(struct pci_dev *d) { u8 v; u8 revision; @@ -151,6 +151,10 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8363_0, pci_fixup_ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8622, pci_fixup_via_northbridge_bug); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8361, pci_fixup_via_northbridge_bug); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8367_0, pci_fixup_via_northbridge_bug); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8363_0, pci_fixup_via_northbridge_bug); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8622, pci_fixup_via_northbridge_bug); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8361, pci_fixup_via_northbridge_bug); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8367_0, pci_fixup_via_northbridge_bug); /* * For some reasons Intel decided that certain parts of their @@ -181,7 +185,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_fixup_transparent_ * issue another HALT within 80 ns of the initial HALT, the failure condition * is avoided. */ -static void __init pci_fixup_nforce2(struct pci_dev *dev) +static void pci_fixup_nforce2(struct pci_dev *dev) { u32 val; @@ -204,6 +208,7 @@ static void __init pci_fixup_nforce2(struct pci_dev *dev) } } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE2, pci_fixup_nforce2); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE2, pci_fixup_nforce2); /* Max PCI Express root ports */ #define MAX_PCIEROOT 6 @@ -419,7 +424,7 @@ DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_TI, 0x8032, * Prevent the BIOS trapping accesses to the Cyrix CS5530A video device * configuration space. */ -static void __devinit pci_early_fixup_cyrix_5530(struct pci_dev *dev) +static void pci_early_fixup_cyrix_5530(struct pci_dev *dev) { u8 r; /* clear 'F4 Video Configuration Trap' bit */ @@ -429,3 +434,5 @@ static void __devinit pci_early_fixup_cyrix_5530(struct pci_dev *dev) } DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, pci_early_fixup_cyrix_5530); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, + pci_early_fixup_cyrix_5530); diff --git a/arch/i386/pci/mmconfig.c b/arch/i386/pci/mmconfig.c index c6b6d9bbc453..e2616a266e13 100644 --- a/arch/i386/pci/mmconfig.c +++ b/arch/i386/pci/mmconfig.c @@ -26,6 +26,7 @@ /* The base address of the last MMCONFIG device accessed */ static u32 mmcfg_last_accessed_device; +static int mmcfg_last_accessed_cpu; static DECLARE_BITMAP(fallback_slots, MAX_CHECK_BUS*32); @@ -73,8 +74,11 @@ static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn) static void pci_exp_set_dev_base(unsigned int base, int bus, int devfn) { u32 dev_base = base | (bus << 20) | (devfn << 12); - if (dev_base != mmcfg_last_accessed_device) { + int cpu = smp_processor_id(); + if (dev_base != mmcfg_last_accessed_device || + cpu != mmcfg_last_accessed_cpu) { mmcfg_last_accessed_device = dev_base; + mmcfg_last_accessed_cpu = cpu; set_fixmap_nocache(FIX_PCIE_MCFG, dev_base); } } |