From fc66c5210ec2539e800e87d7b3a985323c7be96e Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Sat, 19 Mar 2011 18:20:05 +0100 Subject: perf, x86: Fix Intel fixed counters base initialization The following patch solves the problems introduced by Robert's commit 41bf498 and reported by Arun Sharma. This commit gets rid of the base + index notation for reading and writing PMU msrs. The problem is that for fixed counters, the new calculation for the base did not take into account the fixed counter indexes, thus all fixed counters were read/written from fixed counter 0. Although all fixed counters share the same config MSR, they each have their own counter register. Without: $ task -e unhalted_core_cycles -e instructions_retired -e baclears noploop 1 noploop for 1 seconds 242202299 unhalted_core_cycles (0.00% scaling, ena=1000790892, run=1000790892) 2389685946 instructions_retired (0.00% scaling, ena=1000790892, run=1000790892) 49473 baclears (0.00% scaling, ena=1000790892, run=1000790892) With: $ task -e unhalted_core_cycles -e instructions_retired -e baclears noploop 1 noploop for 1 seconds 2392703238 unhalted_core_cycles (0.00% scaling, ena=1000840809, run=1000840809) 2389793744 instructions_retired (0.00% scaling, ena=1000840809, run=1000840809) 47863 baclears (0.00% scaling, ena=1000840809, run=1000840809) Signed-off-by: Stephane Eranian Cc: peterz@infradead.org Cc: ming.m.lin@intel.com Cc: robert.richter@amd.com Cc: asharma@fb.com Cc: perfmon2-devel@lists.sf.net LKML-Reference: <20110319172005.GB4978@quad> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index e8dbe179587f..ec46eea0c4ed 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -912,7 +912,7 @@ static inline void x86_assign_hw_event(struct perf_event *event, hwc->event_base = 0; } else if (hwc->idx >= X86_PMC_IDX_FIXED) { hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; - hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0; + hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - X86_PMC_IDX_FIXED); } else { hwc->config_base = x86_pmu_config_addr(hwc->idx); hwc->event_base = x86_pmu_event_addr(hwc->idx); -- cgit v1.2.1 From cbb84c4cc1ad0ab8faaffd899ccc9b14a88c91be Mon Sep 17 00:00:00 2001 From: Rakib Mullick Date: Tue, 22 Mar 2011 15:24:54 +0600 Subject: x86, mpparse: Move check_slot into CONFIG_X86_IO_APIC context When CONFIG_X86_MPPARSE=y and CONFIG_X86_IO_APIC=n, then we get the following warning: arch/x86/kernel/mpparse.c:723: warning: 'check_slot' defined but not used So, put check_slot into CONFIG_X86_IO_APIC context. Its only called from CONFIG_X86_IO_APIC=y context. Signed-off-by: Rakib Mullick LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 6f789a887c06..5a532ce646bf 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -714,10 +714,6 @@ static void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) *nr_m_spare += 1; } } -#else /* CONFIG_X86_IO_APIC */ -static -inline void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {} -#endif /* CONFIG_X86_IO_APIC */ static int check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count) @@ -731,6 +727,10 @@ check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count) return ret; } +#else /* CONFIG_X86_IO_APIC */ +static +inline void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {} +#endif /* CONFIG_X86_IO_APIC */ static int __init replace_intsrc_all(struct mpc_table *mpc, unsigned long mpc_new_phys, -- cgit v1.2.1 From f3c6ea1b06c71b43f751b36bd99345369fe911af Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 23 Mar 2011 22:15:54 +0100 Subject: x86: Use syscore_ops instead of sysdev classes and sysdevs Some subsystems in the x86 tree need to carry out suspend/resume and shutdown operations with one CPU on-line and interrupts disabled and they define sysdev classes and sysdevs or sysdev drivers for this purpose. This leads to unnecessarily complicated code and excessive memory usage, so switch them to using struct syscore_ops objects for this purpose instead. Generally, there are three categories of subsystems that use sysdevs for implementing PM operations: (1) subsystems whose suspend/resume callbacks ignore their arguments entirely (the majority), (2) subsystems whose suspend/resume callbacks use their struct sys_device argument, but don't really need to do that, because they can be implemented differently in an arguably simpler way (io_apic.c), and (3) subsystems whose suspend/resume callbacks use their struct sys_device argument, but the value of that argument is always the same and could be ignored (microcode_core.c). In all of these cases the subsystems in question may be readily converted to using struct syscore_ops objects for power management and shutdown. Signed-off-by: Rafael J. Wysocki Reviewed-by: Thomas Gleixner Acked-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 26 +++-------- arch/x86/kernel/apic/apic.c | 33 ++++---------- arch/x86/kernel/apic/io_apic.c | 97 +++++++++++++++++++--------------------- arch/x86/kernel/cpu/mcheck/mce.c | 21 +++++---- arch/x86/kernel/cpu/mtrr/main.c | 10 ++--- arch/x86/kernel/i8237.c | 30 +++---------- arch/x86/kernel/i8259.c | 33 +++++--------- arch/x86/kernel/microcode_core.c | 34 ++++++-------- arch/x86/kernel/pci-gart_64.c | 32 +++---------- arch/x86/oprofile/nmi_int.c | 44 +++++------------- 10 files changed, 128 insertions(+), 232 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 6e11c8134158..246d727b65b7 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include @@ -1260,7 +1260,7 @@ static void disable_iommus(void) * disable suspend until real resume implemented */ -static int amd_iommu_resume(struct sys_device *dev) +static void amd_iommu_resume(void) { struct amd_iommu *iommu; @@ -1276,11 +1276,9 @@ static int amd_iommu_resume(struct sys_device *dev) */ amd_iommu_flush_all_devices(); amd_iommu_flush_all_domains(); - - return 0; } -static int amd_iommu_suspend(struct sys_device *dev, pm_message_t state) +static int amd_iommu_suspend(void) { /* disable IOMMUs to go out of the way for BIOS */ disable_iommus(); @@ -1288,17 +1286,11 @@ static int amd_iommu_suspend(struct sys_device *dev, pm_message_t state) return 0; } -static struct sysdev_class amd_iommu_sysdev_class = { - .name = "amd_iommu", +static struct syscore_ops amd_iommu_syscore_ops = { .suspend = amd_iommu_suspend, .resume = amd_iommu_resume, }; -static struct sys_device device_amd_iommu = { - .id = 0, - .cls = &amd_iommu_sysdev_class, -}; - /* * This is the core init function for AMD IOMMU hardware in the system. * This function is called from the generic x86 DMA layer initialization @@ -1415,14 +1407,6 @@ static int __init amd_iommu_init(void) goto free; } - ret = sysdev_class_register(&amd_iommu_sysdev_class); - if (ret) - goto free; - - ret = sysdev_register(&device_amd_iommu); - if (ret) - goto free; - ret = amd_iommu_init_devices(); if (ret) goto free; @@ -1441,6 +1425,8 @@ static int __init amd_iommu_init(void) amd_iommu_init_notifier(); + register_syscore_ops(&amd_iommu_syscore_ops); + if (iommu_pass_through) goto out; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 966673f44141..fabf01eff771 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include @@ -2046,7 +2046,7 @@ static struct { unsigned int apic_thmr; } apic_pm_state; -static int lapic_suspend(struct sys_device *dev, pm_message_t state) +static int lapic_suspend(void) { unsigned long flags; int maxlvt; @@ -2084,23 +2084,21 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) return 0; } -static int lapic_resume(struct sys_device *dev) +static void lapic_resume(void) { unsigned int l, h; unsigned long flags; - int maxlvt; - int ret = 0; + int maxlvt, ret; struct IO_APIC_route_entry **ioapic_entries = NULL; if (!apic_pm_state.active) - return 0; + return; local_irq_save(flags); if (intr_remapping_enabled) { ioapic_entries = alloc_ioapic_entries(); if (!ioapic_entries) { WARN(1, "Alloc ioapic_entries in lapic resume failed."); - ret = -ENOMEM; goto restore; } @@ -2162,8 +2160,6 @@ static int lapic_resume(struct sys_device *dev) } restore: local_irq_restore(flags); - - return ret; } /* @@ -2171,17 +2167,11 @@ restore: * are needed on every CPU up until machine_halt/restart/poweroff. */ -static struct sysdev_class lapic_sysclass = { - .name = "lapic", +static struct syscore_ops lapic_syscore_ops = { .resume = lapic_resume, .suspend = lapic_suspend, }; -static struct sys_device device_lapic = { - .id = 0, - .cls = &lapic_sysclass, -}; - static void __cpuinit apic_pm_activate(void) { apic_pm_state.active = 1; @@ -2189,16 +2179,11 @@ static void __cpuinit apic_pm_activate(void) static int __init init_lapic_sysfs(void) { - int error; - - if (!cpu_has_apic) - return 0; /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ + if (cpu_has_apic) + register_syscore_ops(&lapic_syscore_ops); - error = sysdev_class_register(&lapic_sysclass); - if (!error) - error = sysdev_register(&device_lapic); - return error; + return 0; } /* local apic needs to resume before other devices access its registers. */ diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 180ca240e03c..68df09bba92e 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include #include #include @@ -2918,89 +2918,84 @@ static int __init io_apic_bug_finalize(void) late_initcall(io_apic_bug_finalize); -struct sysfs_ioapic_data { - struct sys_device dev; - struct IO_APIC_route_entry entry[0]; -}; -static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS]; +static struct IO_APIC_route_entry *ioapic_saved_data[MAX_IO_APICS]; -static int ioapic_suspend(struct sys_device *dev, pm_message_t state) +static void suspend_ioapic(int ioapic_id) { - struct IO_APIC_route_entry *entry; - struct sysfs_ioapic_data *data; + struct IO_APIC_route_entry *saved_data = ioapic_saved_data[ioapic_id]; int i; - data = container_of(dev, struct sysfs_ioapic_data, dev); - entry = data->entry; - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) - *entry = ioapic_read_entry(dev->id, i); + if (!saved_data) + return; + + for (i = 0; i < nr_ioapic_registers[ioapic_id]; i++) + saved_data[i] = ioapic_read_entry(ioapic_id, i); +} + +static int ioapic_suspend(void) +{ + int ioapic_id; + + for (ioapic_id = 0; ioapic_id < nr_ioapics; ioapic_id++) + suspend_ioapic(ioapic_id); return 0; } -static int ioapic_resume(struct sys_device *dev) +static void resume_ioapic(int ioapic_id) { - struct IO_APIC_route_entry *entry; - struct sysfs_ioapic_data *data; + struct IO_APIC_route_entry *saved_data = ioapic_saved_data[ioapic_id]; unsigned long flags; union IO_APIC_reg_00 reg_00; int i; - data = container_of(dev, struct sysfs_ioapic_data, dev); - entry = data->entry; + if (!saved_data) + return; raw_spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(dev->id, 0); - if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) { - reg_00.bits.ID = mp_ioapics[dev->id].apicid; - io_apic_write(dev->id, 0, reg_00.raw); + reg_00.raw = io_apic_read(ioapic_id, 0); + if (reg_00.bits.ID != mp_ioapics[ioapic_id].apicid) { + reg_00.bits.ID = mp_ioapics[ioapic_id].apicid; + io_apic_write(ioapic_id, 0, reg_00.raw); } raw_spin_unlock_irqrestore(&ioapic_lock, flags); - for (i = 0; i < nr_ioapic_registers[dev->id]; i++) - ioapic_write_entry(dev->id, i, entry[i]); + for (i = 0; i < nr_ioapic_registers[ioapic_id]; i++) + ioapic_write_entry(ioapic_id, i, saved_data[i]); +} - return 0; +static void ioapic_resume(void) +{ + int ioapic_id; + + for (ioapic_id = nr_ioapics - 1; ioapic_id >= 0; ioapic_id--) + resume_ioapic(ioapic_id); } -static struct sysdev_class ioapic_sysdev_class = { - .name = "ioapic", +static struct syscore_ops ioapic_syscore_ops = { .suspend = ioapic_suspend, .resume = ioapic_resume, }; -static int __init ioapic_init_sysfs(void) +static int __init ioapic_init_ops(void) { - struct sys_device * dev; - int i, size, error; + int i; - error = sysdev_class_register(&ioapic_sysdev_class); - if (error) - return error; + for (i = 0; i < nr_ioapics; i++) { + unsigned int size; - for (i = 0; i < nr_ioapics; i++ ) { - size = sizeof(struct sys_device) + nr_ioapic_registers[i] + size = nr_ioapic_registers[i] * sizeof(struct IO_APIC_route_entry); - mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL); - if (!mp_ioapic_data[i]) { - printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); - continue; - } - dev = &mp_ioapic_data[i]->dev; - dev->id = i; - dev->cls = &ioapic_sysdev_class; - error = sysdev_register(dev); - if (error) { - kfree(mp_ioapic_data[i]); - mp_ioapic_data[i] = NULL; - printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); - continue; - } + ioapic_saved_data[i] = kzalloc(size, GFP_KERNEL); + if (!ioapic_saved_data[i]) + pr_err("IOAPIC %d: suspend/resume impossible!\n", i); } + register_syscore_ops(&ioapic_syscore_ops); + return 0; } -device_initcall(ioapic_init_sysfs); +device_initcall(ioapic_init_ops); /* * Dynamic irq allocate and deallocation diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index ab1122998dba..5a05ef63eb4a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -1749,14 +1750,14 @@ static int mce_disable_error_reporting(void) return 0; } -static int mce_suspend(struct sys_device *dev, pm_message_t state) +static int mce_suspend(void) { return mce_disable_error_reporting(); } -static int mce_shutdown(struct sys_device *dev) +static void mce_shutdown(void) { - return mce_disable_error_reporting(); + mce_disable_error_reporting(); } /* @@ -1764,14 +1765,18 @@ static int mce_shutdown(struct sys_device *dev) * Only one CPU is active at this time, the others get re-added later using * CPU hotplug: */ -static int mce_resume(struct sys_device *dev) +static void mce_resume(void) { __mcheck_cpu_init_generic(); __mcheck_cpu_init_vendor(__this_cpu_ptr(&cpu_info)); - - return 0; } +static struct syscore_ops mce_syscore_ops = { + .suspend = mce_suspend, + .shutdown = mce_shutdown, + .resume = mce_resume, +}; + static void mce_cpu_restart(void *data) { del_timer_sync(&__get_cpu_var(mce_timer)); @@ -1808,9 +1813,6 @@ static void mce_enable_ce(void *all) } static struct sysdev_class mce_sysclass = { - .suspend = mce_suspend, - .shutdown = mce_shutdown, - .resume = mce_resume, .name = "machinecheck", }; @@ -2139,6 +2141,7 @@ static __init int mcheck_init_device(void) return err; } + register_syscore_ops(&mce_syscore_ops); register_hotcpu_notifier(&mce_cpu_notifier); misc_register(&mce_log_device); diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index bebabec5b448..307dfbbf4a8e 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -630,7 +631,7 @@ struct mtrr_value { static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES]; -static int mtrr_save(struct sys_device *sysdev, pm_message_t state) +static int mtrr_save(void) { int i; @@ -642,7 +643,7 @@ static int mtrr_save(struct sys_device *sysdev, pm_message_t state) return 0; } -static int mtrr_restore(struct sys_device *sysdev) +static void mtrr_restore(void) { int i; @@ -653,12 +654,11 @@ static int mtrr_restore(struct sys_device *sysdev) mtrr_value[i].ltype); } } - return 0; } -static struct sysdev_driver mtrr_sysdev_driver = { +static struct syscore_ops mtrr_syscore_ops = { .suspend = mtrr_save, .resume = mtrr_restore, }; @@ -839,7 +839,7 @@ static int __init mtrr_init_finialize(void) * TBD: is there any system with such CPU which supports * suspend/resume? If no, we should remove the code. */ - sysdev_driver_register(&cpu_sysdev_class, &mtrr_sysdev_driver); + register_syscore_ops(&mtrr_syscore_ops); return 0; } diff --git a/arch/x86/kernel/i8237.c b/arch/x86/kernel/i8237.c index b42ca694dc68..8eeaa81de066 100644 --- a/arch/x86/kernel/i8237.c +++ b/arch/x86/kernel/i8237.c @@ -10,7 +10,7 @@ */ #include -#include +#include #include @@ -21,7 +21,7 @@ * in asm/dma.h. */ -static int i8237A_resume(struct sys_device *dev) +static void i8237A_resume(void) { unsigned long flags; int i; @@ -41,31 +41,15 @@ static int i8237A_resume(struct sys_device *dev) enable_dma(4); release_dma_lock(flags); - - return 0; } -static int i8237A_suspend(struct sys_device *dev, pm_message_t state) -{ - return 0; -} - -static struct sysdev_class i8237_sysdev_class = { - .name = "i8237", - .suspend = i8237A_suspend, +static struct syscore_ops i8237_syscore_ops = { .resume = i8237A_resume, }; -static struct sys_device device_i8237A = { - .id = 0, - .cls = &i8237_sysdev_class, -}; - -static int __init i8237A_init_sysfs(void) +static int __init i8237A_init_ops(void) { - int error = sysdev_class_register(&i8237_sysdev_class); - if (!error) - error = sysdev_register(&device_i8237A); - return error; + register_syscore_ops(&i8237_syscore_ops); + return 0; } -device_initcall(i8237A_init_sysfs); +device_initcall(i8237A_init_ops); diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index d9ca749c123b..65b8f5c2eebf 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include @@ -245,20 +245,19 @@ static void save_ELCR(char *trigger) trigger[1] = inb(0x4d1) & 0xDE; } -static int i8259A_resume(struct sys_device *dev) +static void i8259A_resume(void) { init_8259A(i8259A_auto_eoi); restore_ELCR(irq_trigger); - return 0; } -static int i8259A_suspend(struct sys_device *dev, pm_message_t state) +static int i8259A_suspend(void) { save_ELCR(irq_trigger); return 0; } -static int i8259A_shutdown(struct sys_device *dev) +static void i8259A_shutdown(void) { /* Put the i8259A into a quiescent state that * the kernel initialization code can get it @@ -266,21 +265,14 @@ static int i8259A_shutdown(struct sys_device *dev) */ outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ - return 0; } -static struct sysdev_class i8259_sysdev_class = { - .name = "i8259", +static struct syscore_ops i8259_syscore_ops = { .suspend = i8259A_suspend, .resume = i8259A_resume, .shutdown = i8259A_shutdown, }; -static struct sys_device device_i8259A = { - .id = 0, - .cls = &i8259_sysdev_class, -}; - static void mask_8259A(void) { unsigned long flags; @@ -399,17 +391,12 @@ struct legacy_pic default_legacy_pic = { struct legacy_pic *legacy_pic = &default_legacy_pic; -static int __init i8259A_init_sysfs(void) +static int __init i8259A_init_ops(void) { - int error; - - if (legacy_pic != &default_legacy_pic) - return 0; + if (legacy_pic == &default_legacy_pic) + register_syscore_ops(&i8259_syscore_ops); - error = sysdev_class_register(&i8259_sysdev_class); - if (!error) - error = sysdev_register(&device_i8259A); - return error; + return 0; } -device_initcall(i8259A_init_sysfs); +device_initcall(i8259A_init_ops); diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 87af68e0e1e1..5ed0ab549eb8 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -82,6 +82,7 @@ #include #include #include +#include #include #include @@ -438,33 +439,25 @@ static int mc_sysdev_remove(struct sys_device *sys_dev) return 0; } -static int mc_sysdev_resume(struct sys_device *dev) +static struct sysdev_driver mc_sysdev_driver = { + .add = mc_sysdev_add, + .remove = mc_sysdev_remove, +}; + +/** + * mc_bp_resume - Update boot CPU microcode during resume. + */ +static void mc_bp_resume(void) { - int cpu = dev->id; + int cpu = smp_processor_id(); struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - if (!cpu_online(cpu)) - return 0; - - /* - * All non-bootup cpus are still disabled, - * so only CPU 0 will apply ucode here. - * - * Moreover, there can be no concurrent - * updates from any other places at this point. - */ - WARN_ON(cpu != 0); - if (uci->valid && uci->mc) microcode_ops->apply_microcode(cpu); - - return 0; } -static struct sysdev_driver mc_sysdev_driver = { - .add = mc_sysdev_add, - .remove = mc_sysdev_remove, - .resume = mc_sysdev_resume, +static struct syscore_ops mc_syscore_ops = { + .resume = mc_bp_resume, }; static __cpuinit int @@ -542,6 +535,7 @@ static int __init microcode_init(void) if (error) return error; + register_syscore_ops(&mc_syscore_ops); register_hotcpu_notifier(&mc_cpu_notifier); pr_info("Microcode Update Driver: v" MICROCODE_VERSION diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index c01ffa5b9b87..82ada01625b9 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include #include @@ -589,7 +589,7 @@ void set_up_gart_resume(u32 aper_order, u32 aper_alloc) aperture_alloc = aper_alloc; } -static void gart_fixup_northbridges(struct sys_device *dev) +static void gart_fixup_northbridges(void) { int i; @@ -613,33 +613,20 @@ static void gart_fixup_northbridges(struct sys_device *dev) } } -static int gart_resume(struct sys_device *dev) +static void gart_resume(void) { pr_info("PCI-DMA: Resuming GART IOMMU\n"); - gart_fixup_northbridges(dev); + gart_fixup_northbridges(); enable_gart_translations(); - - return 0; } -static int gart_suspend(struct sys_device *dev, pm_message_t state) -{ - return 0; -} - -static struct sysdev_class gart_sysdev_class = { - .name = "gart", - .suspend = gart_suspend, +static struct syscore_ops gart_syscore_ops = { .resume = gart_resume, }; -static struct sys_device device_gart = { - .cls = &gart_sysdev_class, -}; - /* * Private Northbridge GATT initialization in case we cannot use the * AGP driver for some reason. @@ -650,7 +637,7 @@ static __init int init_amd_gatt(struct agp_kern_info *info) unsigned aper_base, new_aper_base; struct pci_dev *dev; void *gatt; - int i, error; + int i; pr_info("PCI-DMA: Disabling AGP.\n"); @@ -685,12 +672,7 @@ static __init int init_amd_gatt(struct agp_kern_info *info) agp_gatt_table = gatt; - error = sysdev_class_register(&gart_sysdev_class); - if (!error) - error = sysdev_register(&device_gart); - if (error) - panic("Could not register gart_sysdev -- " - "would corrupt data on next suspend"); + register_syscore_ops(&gart_syscore_ops); flush_gart(); diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index e2b7b0c06cdf..8dace181c88e 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include @@ -536,7 +536,7 @@ static void nmi_shutdown(void) #ifdef CONFIG_PM -static int nmi_suspend(struct sys_device *dev, pm_message_t state) +static int nmi_suspend(void) { /* Only one CPU left, just stop that one */ if (nmi_enabled == 1) @@ -544,49 +544,31 @@ static int nmi_suspend(struct sys_device *dev, pm_message_t state) return 0; } -static int nmi_resume(struct sys_device *dev) +static void nmi_resume(void) { if (nmi_enabled == 1) nmi_cpu_start(NULL); - return 0; } -static struct sysdev_class oprofile_sysclass = { - .name = "oprofile", +static struct syscore_ops oprofile_syscore_ops = { .resume = nmi_resume, .suspend = nmi_suspend, }; -static struct sys_device device_oprofile = { - .id = 0, - .cls = &oprofile_sysclass, -}; - -static int __init init_sysfs(void) +static void __init init_suspend_resume(void) { - int error; - - error = sysdev_class_register(&oprofile_sysclass); - if (error) - return error; - - error = sysdev_register(&device_oprofile); - if (error) - sysdev_class_unregister(&oprofile_sysclass); - - return error; + register_syscore_ops(&oprofile_syscore_ops); } -static void exit_sysfs(void) +static void exit_suspend_resume(void) { - sysdev_unregister(&device_oprofile); - sysdev_class_unregister(&oprofile_sysclass); + unregister_syscore_ops(&oprofile_syscore_ops); } #else -static inline int init_sysfs(void) { return 0; } -static inline void exit_sysfs(void) { } +static inline void init_suspend_resume(void) { } +static inline void exit_suspend_resume(void) { } #endif /* CONFIG_PM */ @@ -789,9 +771,7 @@ int __init op_nmi_init(struct oprofile_operations *ops) mux_init(ops); - ret = init_sysfs(); - if (ret) - return ret; + init_suspend_resume(); printk(KERN_INFO "oprofile: using NMI interrupt.\n"); return 0; @@ -799,5 +779,5 @@ int __init op_nmi_init(struct oprofile_operations *ops) void op_nmi_exit(void) { - exit_sysfs(); + exit_suspend_resume(); } -- cgit v1.2.1 From d47d81c0e9abdc3c88653fabff5beae82c949b09 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 23 Mar 2011 22:16:41 +0100 Subject: Introduce ARCH_NO_SYSDEV_OPS config option (v2) Introduce Kconfig option allowing architectures where sysdev operations used during system suspend, resume and shutdown have been completely replaced with struct sycore_ops operations to avoid building sysdev code that will never be used. Make callbacks in struct sys_device and struct sysdev_driver depend on ARCH_NO_SYSDEV_OPS to allows us to verify if all of the references have been actually removed from the code the given architecture depends on. Make x86 select ARCH_NO_SYSDEV_OPS. Signed-off-by: Rafael J. Wysocki --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d57ddd7573cc..b1cd5a96a511 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -71,6 +71,7 @@ config X86 select GENERIC_IRQ_SHOW select IRQ_FORCED_THREADING select USE_GENERIC_SMP_HELPERS if SMP + select ARCH_NO_SYSDEV_OPS config INSTRUCTION_DECODER def_bool (KPROBES || PERF_EVENTS) -- cgit v1.2.1 From 71f9e59800e5ad4e6b683348424c9fe54306cd43 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 24 Mar 2011 11:42:30 +0900 Subject: x86, dumpstack: Use %pB format specifier for stack trace Improve noreturn function entries in call traces: Before: Call Trace: [] panic+0x8c/0x18d [] deep01+0x0/0x38 [test_panic] <--- bad [] proc_file_write+0x73/0x8d [] proc_reg_write+0x8d/0xac [] vfs_write+0xa1/0xc5 [] sys_write+0x45/0x6c [] system_call_fastpath+0x16/0x1b After: Call Trace: [] panic+0x8c/0x18d [] panic_write+0x20/0x20 [test_panic] <--- good [] proc_file_write+0x73/0x8d [] proc_reg_write+0x8d/0xac [] vfs_write+0xa1/0xc5 [] sys_write+0x45/0x6c [] system_call_fastpath+0x16/0x1b Signed-off-by: Namhyung Kim Acked-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Linus Torvalds Cc: Andrew Morton LKML-Reference: <1300934550-21394-2-git-send-email-namhyung@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 999e2793590b..24d0479025f9 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -27,7 +27,7 @@ static int die_counter; void printk_address(unsigned long address, int reliable) { - printk(" [<%p>] %s%pS\n", (void *) address, + printk(" [<%p>] %s%pB\n", (void *) address, reliable ? "" : "? ", (void *) address); } -- cgit v1.2.1 From 242214f9c1eeaae40eca11e3b4d37bfce960a7cd Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Thu, 24 Mar 2011 23:36:25 +0300 Subject: perf, x86: P4 PMU - Read proper MSR register to catch unflagged overflows The read of a proper MSR register was missed and instead of counter the configration register was tested (it has ARCH_P4_UNFLAGGED_BIT always cleared) leading to unknown NMI hitting the system. As result the user may obtain "Dazed and confused, but trying to continue" message. Fix it by reading a proper MSR register. When an NMI happens on a P4, the perf nmi handler checks the configuration register to see if the overflow bit is set or not before taking appropriate action. Unfortunately, various P4 machines had a broken overflow bit, so a backup mechanism was implemented. This mechanism checked to see if the counter rolled over or not. A previous commit that implemented this backup mechanism was broken. Instead of reading the counter register, it used the configuration register to determine if the counter rolled over or not. Reading that bit would give incorrect results. This would lead to 'Dazed and confused' messages for the end user when using the perf tool (or if the nmi watchdog is running). The fix is to read the counter register before determining if the counter rolled over or not. Signed-off-by: Don Zickus Signed-off-by: Cyrill Gorcunov Cc: Lin Ming LKML-Reference: <4D8BAB49.3080701@openvz.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_p4.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 3769ac822f96..d3d7b59841e5 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -777,6 +777,7 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) * the counter has reached zero value and continued counting before * real NMI signal was received: */ + rdmsrl(hwc->event_base, v); if (!(v & ARCH_P4_UNFLAGGED_BIT)) return 1; -- cgit v1.2.1 From 00a30b254b88d2d4f5af00835a9b7f70326def9b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 24 Mar 2011 22:53:10 +0100 Subject: x86: DT: Fix return condition in irq_create_of_mapping() The xlate() function returns 0 or a negative error code. Returning the error code blindly will be seen as an huge irq number by the calling function because irq_create_of_mapping() returns an unsigned value. Return 0 (NO_IRQ) as required. Signed-off-by: Thomas Gleixner Cc: Sebastian Andrzej Siewior --- arch/x86/kernel/devicetree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 7a8cebc9ff29..9c91badb6ca9 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -65,7 +65,7 @@ unsigned int irq_create_of_mapping(struct device_node *controller, return 0; ret = ih->xlate(ih, intspec, intsize, &virq, &type); if (ret) - return ret; + return 0; if (type == IRQ_TYPE_NONE) return virq; /* set the mask if it is different from current */ -- cgit v1.2.1 From 07611dbda5ccbd9a628f29686d62bafdd007db7b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 24 Mar 2011 21:41:57 +0100 Subject: x86: DT: Cleanup namespace and call irq_set_irq_type() unconditional That call escaped the name space cleanup. Fix it up. We really want to call there. The chip might have changed since the irq was setup initially. So let the core code and the chip decide what to do. The status is just an unreliable snapshot. Signed-off-by: Thomas Gleixner Cc: Sebastian Andrzej Siewior --- arch/x86/kernel/devicetree.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 9c91badb6ca9..706a9fb46a58 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -68,9 +68,7 @@ unsigned int irq_create_of_mapping(struct device_node *controller, return 0; if (type == IRQ_TYPE_NONE) return virq; - /* set the mask if it is different from current */ - if (type == (irq_to_desc(virq)->status & IRQF_TRIGGER_MASK)) - set_irq_type(virq, type); + irq_set_irq_type(virq, type); return virq; } EXPORT_SYMBOL_GPL(irq_create_of_mapping); -- cgit v1.2.1 From 45daae575e08bbf7405c5a3633e956fa364d1b4f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 25 Mar 2011 10:24:23 +0100 Subject: perf, x86: Complain louder about BIOSen corrupting CPU/PMU state and continue Eric Dumazet reported that hardware PMU events do not work on his system, due to the BIOS corrupting PMU state: Performance Events: PEBS fmt0+, Core2 events, Broken BIOS detected, using software events only. [Firmware Bug]: the BIOS has corrupted hw-PMU resources (MSR 186 is 43003c) Linus suggested that we continue in the face of such BIOS-induced CPU state corruption: http://lkml.org/lkml/2011/3/24/608 Such BIOSes will have to be fixed - Linux developers rely on a working and fully capable PMU and the BIOS interfering with the CPU's PMU state is simply not acceptable. So this patch changes perf to continue when it detects such BIOS interaction, some hardware events may be unreliable due to the BIOS writing and re-writing them - there's not much the kernel can do about that but to detect the corruption and report it. Reported-and-tested-by: Eric Dumazet Suggested-by: Linus Torvalds Acked-by: Peter Zijlstra Cc: Thomas Gleixner Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Steven Rostedt LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index ec46eea0c4ed..eb00677ee2ae 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -500,12 +500,17 @@ static bool check_hw_exists(void) return true; bios_fail: - printk(KERN_CONT "Broken BIOS detected, using software events only.\n"); + /* + * We still allow the PMU driver to operate: + */ + printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n"); printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val); - return false; + + return true; msr_fail: printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n"); + return false; } -- cgit v1.2.1 From 21431c2900a0b669080b5bfaae2a7d9d9c026e9b Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Mon, 15 Mar 2010 07:28:00 -0500 Subject: kgdb,x86_64: fix compile warning found with sparse Fix sparse warning: arch/x86/kernel/kgdb.c:123:9: warning: switch with no cases Reported-by: Namhyung Kim Signed-off-by: Jason Wessel --- arch/x86/kernel/kgdb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index a4130005028a..3c2fb0f25abd 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -121,8 +121,8 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) memcpy(mem, (void *)regs + dbg_reg_def[regno].offset, dbg_reg_def[regno].size); - switch (regno) { #ifdef CONFIG_X86_32 + switch (regno) { case GDB_SS: if (!user_mode_vm(regs)) *(unsigned long *)mem = __KERNEL_DS; @@ -135,8 +135,8 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) case GDB_FS: *(unsigned long *)mem = 0xFFFF; break; -#endif } +#endif return dbg_reg_def[regno].name; } -- cgit v1.2.1 From fa1df691688f34cbcd5bf77bd084bbe47e9d6bfe Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Mon, 21 Mar 2011 19:19:35 -0700 Subject: mfd: Add mfd_clone_cell(), convert cs5535-mfd/olpc-xo1 to it Replace mfd_shared_platform_driver_register with mfd_clone_cell. The former was called by an mfd client, and registered both a platform driver and device. The latter is called by an mfd driver, and registers only a platform device. The downside of this is that mfd drivers need to be modified whenever new clients are added that share a cell; the upside is that it fits Linux's driver model better. It's also simpler. This also converts cs5535-mfd/olpc-xo1 from the old API. cs5535-mfd now creates the olpc-xo1-{acpi,pms} devices, while olpc-xo1 binds to them via platform drivers. Signed-off-by: Andres Salomon Signed-off-by: Samuel Ortiz --- arch/x86/platform/olpc/olpc-xo1.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/olpc/olpc-xo1.c b/arch/x86/platform/olpc/olpc-xo1.c index 99513642a0e6..386e3a159cca 100644 --- a/arch/x86/platform/olpc/olpc-xo1.c +++ b/arch/x86/platform/olpc/olpc-xo1.c @@ -121,22 +121,21 @@ static int __init olpc_xo1_init(void) { int r; - r = mfd_shared_platform_driver_register(&cs5535_pms_drv, "cs5535-pms"); + r = platform_driver_register(&cs5535_pms_drv); if (r) return r; - r = mfd_shared_platform_driver_register(&cs5535_acpi_drv, - "cs5535-acpi"); + r = platform_driver_register(&cs5535_acpi_drv); if (r) - mfd_shared_platform_driver_unregister(&cs5535_pms_drv); + platform_driver_unregister(&cs5535_pms_drv); return r; } static void __exit olpc_xo1_exit(void) { - mfd_shared_platform_driver_unregister(&cs5535_acpi_drv); - mfd_shared_platform_driver_unregister(&cs5535_pms_drv); + platform_driver_unregister(&cs5535_acpi_drv); + platform_driver_unregister(&cs5535_pms_drv); } MODULE_AUTHOR("Daniel Drake "); -- cgit v1.2.1 From adfa4bd4a8bfc53ca7370c57be240d35c2ec28e2 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Tue, 22 Mar 2011 13:50:39 -0700 Subject: mfd: OLPC: Clean up names to match what OLPC actually uses The cs5535-pms cell doesn't actually need to be cloned, so we can drop that and simply have the olpc-xo1.c driver use "cs5535-pms" directly. Also, rename the cs5535-acpi clones to what we actually use for the (currently out-of-tree) SCI driver. In the process, that fixes a subtle bug in olpc-xo1.c which broke powerdown on XO-1s.. olpc-xo1-ac-acpi was a typo, not something that actually existed. Signed-off-by: Daniel Drake Signed-off-by: Andres Salomon Signed-off-by: Samuel Ortiz --- arch/x86/platform/olpc/olpc-xo1.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/olpc/olpc-xo1.c b/arch/x86/platform/olpc/olpc-xo1.c index 386e3a159cca..ab81fb271760 100644 --- a/arch/x86/platform/olpc/olpc-xo1.c +++ b/arch/x86/platform/olpc/olpc-xo1.c @@ -72,9 +72,9 @@ static int __devinit olpc_xo1_probe(struct platform_device *pdev) dev_err(&pdev->dev, "can't fetch device resource info\n"); return -EIO; } - if (strcmp(pdev->name, "olpc-xo1-pms") == 0) + if (strcmp(pdev->name, "cs5535-pms") == 0) pms_base = res->start; - else if (strcmp(pdev->name, "olpc-xo1-ac-acpi") == 0) + else if (strcmp(pdev->name, "olpc-xo1-pm-acpi") == 0) acpi_base = res->start; /* If we have both addresses, we can override the poweroff hook */ @@ -90,9 +90,9 @@ static int __devexit olpc_xo1_remove(struct platform_device *pdev) { mfd_cell_disable(pdev); - if (strcmp(pdev->name, "olpc-xo1-pms") == 0) + if (strcmp(pdev->name, "cs5535-pms") == 0) pms_base = 0; - else if (strcmp(pdev->name, "olpc-xo1-acpi") == 0) + else if (strcmp(pdev->name, "olpc-xo1-pm-acpi") == 0) acpi_base = 0; pm_power_off = NULL; @@ -101,7 +101,7 @@ static int __devexit olpc_xo1_remove(struct platform_device *pdev) static struct platform_driver cs5535_pms_drv = { .driver = { - .name = "olpc-xo1-pms", + .name = "cs5535-pms", .owner = THIS_MODULE, }, .probe = olpc_xo1_probe, @@ -110,7 +110,7 @@ static struct platform_driver cs5535_pms_drv = { static struct platform_driver cs5535_acpi_drv = { .driver = { - .name = "olpc-xo1-acpi", + .name = "olpc-xo1-pm-acpi", .owner = THIS_MODULE, }, .probe = olpc_xo1_probe, -- cgit v1.2.1 From d7c3f8cee81f4548de0513403b74131aee655576 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sat, 26 Mar 2011 20:57:18 -0500 Subject: percpu: Omit segment prefix in the UP case for cmpxchg_double Omit the segment prefix in the UP case. GS is not used then and we will generate segfaults if cmpxchg16b is used otherwise. Signed-off-by: Christoph Lameter Signed-off-by: Linus Torvalds --- arch/x86/include/asm/percpu.h | 10 ++++++---- arch/x86/lib/cmpxchg16b_emu.S | 14 ++++++++++---- 2 files changed, 16 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index a09e1f052d84..d475b4398d8b 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -45,7 +45,7 @@ #include #ifdef CONFIG_SMP -#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x +#define __percpu_prefix "%%"__stringify(__percpu_seg)":" #define __my_cpu_offset percpu_read(this_cpu_off) /* @@ -62,9 +62,11 @@ (typeof(*(ptr)) __kernel __force *)tcp_ptr__; \ }) #else -#define __percpu_arg(x) "%P" #x +#define __percpu_prefix "" #endif +#define __percpu_arg(x) __percpu_prefix "%P" #x + /* * Initialized pointers to per-cpu variables needed for the boot * processor need to use these macros to get the proper address @@ -516,11 +518,11 @@ do { \ typeof(o2) __n2 = n2; \ typeof(o2) __dummy; \ alternative_io("call this_cpu_cmpxchg16b_emu\n\t" P6_NOP4, \ - "cmpxchg16b %%gs:(%%rsi)\n\tsetz %0\n\t", \ + "cmpxchg16b " __percpu_prefix "(%%rsi)\n\tsetz %0\n\t", \ X86_FEATURE_CX16, \ ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \ "S" (&pcp1), "b"(__n1), "c"(__n2), \ - "a"(__o1), "d"(__o2)); \ + "a"(__o1), "d"(__o2) : "memory"); \ __ret; \ }) diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S index 3e8b08a6de2b..1e572c507d06 100644 --- a/arch/x86/lib/cmpxchg16b_emu.S +++ b/arch/x86/lib/cmpxchg16b_emu.S @@ -10,6 +10,12 @@ #include #include +#ifdef CONFIG_SMP +#define SEG_PREFIX %gs: +#else +#define SEG_PREFIX +#endif + .text /* @@ -37,13 +43,13 @@ this_cpu_cmpxchg16b_emu: pushf cli - cmpq %gs:(%rsi), %rax + cmpq SEG_PREFIX(%rsi), %rax jne not_same - cmpq %gs:8(%rsi), %rdx + cmpq SEG_PREFIX 8(%rsi), %rdx jne not_same - movq %rbx, %gs:(%rsi) - movq %rcx, %gs:8(%rsi) + movq %rbx, SEG_PREFIX(%rsi) + movq %rcx, SEG_PREFIX 8(%rsi) popf mov $1, %al -- cgit v1.2.1 From 5f55924deaa62d6df687c131fb92aebe071ec787 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 28 Mar 2011 18:06:58 +0200 Subject: percpu: Avoid extra NOP in percpu_cmpxchg16b_double percpu_cmpxchg16b_double() uses alternative_io() and looks like : e8 .. .. .. .. call this_cpu_cmpxchg16b_emu X bytes NOPX or, once patched (if cpu supports native instruction) on SMP build : 65 48 0f c7 0e cmpxchg16b %gs:(%rsi) 0f 94 c0 sete %al on !SMP build : 48 0f c7 0e cmpxchg16b (%rsi) 0f 94 c0 sete %al Therefore, NOPX should be : P6_NOP3 on SMP P6_NOP2 on !SMP Signed-off-by: Eric Dumazet Acked-by: Christoph Lameter Cc: Ingo Molnar Cc: Pekka Enberg Signed-off-by: Tejun Heo --- arch/x86/include/asm/percpu.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index d475b4398d8b..d68fca61ad91 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -509,6 +509,11 @@ do { \ * it in software. The address used in the cmpxchg16 instruction must be * aligned to a 16 byte boundary. */ +#ifdef CONFIG_SMP +#define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" P6_NOP3 +#else +#define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" P6_NOP2 +#endif #define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) \ ({ \ char __ret; \ @@ -517,7 +522,7 @@ do { \ typeof(o2) __o2 = o2; \ typeof(o2) __n2 = n2; \ typeof(o2) __dummy; \ - alternative_io("call this_cpu_cmpxchg16b_emu\n\t" P6_NOP4, \ + alternative_io(CMPXCHG16B_EMU_CALL, \ "cmpxchg16b " __percpu_prefix "(%%rsi)\n\tsetz %0\n\t", \ X86_FEATURE_CX16, \ ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \ -- cgit v1.2.1