diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-04-08 17:02:50 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-04-08 17:02:57 +0200 |
commit | ff96e612cba32510e263e17b213235fe5746397e (patch) | |
tree | a8df57d76b10e0901a4fb76cd2987eb9826a560a /arch/x86/kernel | |
parent | cd84a42f315e50edd454c27a3da3951ccd3d735a (diff) | |
parent | 577c9c456f0e1371cbade38eaf91ae8e8a308555 (diff) | |
download | blackbird-op-linux-ff96e612cba32510e263e17b213235fe5746397e.tar.gz blackbird-op-linux-ff96e612cba32510e263e17b213235fe5746397e.zip |
Merge commit 'v2.6.30-rc1' into core/urgent
Merge reason: need latest upstream to queue up dependent fix
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel')
43 files changed, 874 insertions, 528 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index c611ad64137f..145cce75cda7 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -66,7 +66,8 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o obj-y += apic/ obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o -obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o +obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o +obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index a18eb7ce2236..723989d7f802 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -230,6 +230,35 @@ static void __cpuinit acpi_register_lapic(int id, u8 enabled) } static int __init +acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end) +{ + struct acpi_madt_local_x2apic *processor = NULL; + + processor = (struct acpi_madt_local_x2apic *)header; + + if (BAD_MADT_ENTRY(processor, end)) + return -EINVAL; + + acpi_table_print_madt_entry(header); + +#ifdef CONFIG_X86_X2APIC + /* + * We need to register disabled CPU as well to permit + * counting disabled CPUs. This allows us to size + * cpus_possible_map more accurately, to permit + * to not preallocating memory for all NR_CPUS + * when we use CPU hotplug. + */ + acpi_register_lapic(processor->local_apic_id, /* APIC ID */ + processor->lapic_flags & ACPI_MADT_ENABLED); +#else + printk(KERN_WARNING PREFIX "x2apic entry ignored\n"); +#endif + + return 0; +} + +static int __init acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end) { struct acpi_madt_local_apic *processor = NULL; @@ -289,6 +318,25 @@ acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header, } static int __init +acpi_parse_x2apic_nmi(struct acpi_subtable_header *header, + const unsigned long end) +{ + struct acpi_madt_local_x2apic_nmi *x2apic_nmi = NULL; + + x2apic_nmi = (struct acpi_madt_local_x2apic_nmi *)header; + + if (BAD_MADT_ENTRY(x2apic_nmi, end)) + return -EINVAL; + + acpi_table_print_madt_entry(header); + + if (x2apic_nmi->lint != 1) + printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n"); + + return 0; +} + +static int __init acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long end) { struct acpi_madt_local_apic_nmi *lapic_nmi = NULL; @@ -793,6 +841,7 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void) static int __init acpi_parse_madt_lapic_entries(void) { int count; + int x2count = 0; if (!cpu_has_apic) return -ENODEV; @@ -816,22 +865,28 @@ static int __init acpi_parse_madt_lapic_entries(void) count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC, acpi_parse_sapic, MAX_APICS); - if (!count) + if (!count) { + x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC, + acpi_parse_x2apic, MAX_APICS); count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, acpi_parse_lapic, MAX_APICS); - if (!count) { + } + if (!count && !x2count) { printk(KERN_ERR PREFIX "No LAPIC entries present\n"); /* TBD: Cleanup to allow fallback to MPS */ return -ENODEV; - } else if (count < 0) { + } else if (count < 0 || x2count < 0) { printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n"); /* TBD: Cleanup to allow fallback to MPS */ return count; } + x2count = + acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC_NMI, + acpi_parse_x2apic_nmi, 0); count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI, acpi_parse_lapic_nmi, 0); - if (count < 0) { + if (count < 0 || x2count < 0) { printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n"); /* TBD: Cleanup to allow fallback to MPS */ return count; @@ -1470,7 +1525,7 @@ static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d) /* * If your system is blacklisted here, but you find that acpi=force - * works for you, please contact acpi-devel@sourceforge.net + * works for you, please contact linux-acpi@vger.kernel.org */ static struct dmi_system_id __initdata acpi_dmi_table[] = { /* diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 4c80f1557433..f57658702571 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -5,6 +5,7 @@ #include <linux/kprobes.h> #include <linux/mm.h> #include <linux/vmalloc.h> +#include <linux/memory.h> #include <asm/alternative.h> #include <asm/sections.h> #include <asm/pgtable.h> @@ -12,7 +13,9 @@ #include <asm/nmi.h> #include <asm/vsyscall.h> #include <asm/cacheflush.h> +#include <asm/tlbflush.h> #include <asm/io.h> +#include <asm/fixmap.h> #define MAX_PATCH_LEN (255-1) @@ -226,6 +229,7 @@ static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) { u8 **ptr; + mutex_lock(&text_mutex); for (ptr = start; ptr < end; ptr++) { if (*ptr < text) continue; @@ -234,6 +238,7 @@ static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) /* turn DS segment override prefix into lock prefix */ text_poke(*ptr, ((unsigned char []){0xf0}), 1); }; + mutex_unlock(&text_mutex); } static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) @@ -243,6 +248,7 @@ static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end if (noreplace_smp) return; + mutex_lock(&text_mutex); for (ptr = start; ptr < end; ptr++) { if (*ptr < text) continue; @@ -251,6 +257,7 @@ static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end /* turn lock prefix into DS segment override prefix */ text_poke(*ptr, ((unsigned char []){0x3E}), 1); }; + mutex_unlock(&text_mutex); } struct smp_alt_module { @@ -500,15 +507,16 @@ void *text_poke_early(void *addr, const void *opcode, size_t len) * It means the size must be writable atomically and the address must be aligned * in a way that permits an atomic write. It also makes sure we fit on a single * page. + * + * Note: Must be called under text_mutex. */ void *__kprobes text_poke(void *addr, const void *opcode, size_t len) { + unsigned long flags; char *vaddr; - int nr_pages = 2; struct page *pages[2]; int i; - might_sleep(); if (!core_kernel_text((unsigned long)addr)) { pages[0] = vmalloc_to_page(addr); pages[1] = vmalloc_to_page(addr + PAGE_SIZE); @@ -518,18 +526,21 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len) pages[1] = virt_to_page(addr + PAGE_SIZE); } BUG_ON(!pages[0]); - if (!pages[1]) - nr_pages = 1; - vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); - BUG_ON(!vaddr); - local_irq_disable(); + local_irq_save(flags); + set_fixmap(FIX_TEXT_POKE0, page_to_phys(pages[0])); + if (pages[1]) + set_fixmap(FIX_TEXT_POKE1, page_to_phys(pages[1])); + vaddr = (char *)fix_to_virt(FIX_TEXT_POKE0); memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len); - local_irq_enable(); - vunmap(vaddr); + clear_fixmap(FIX_TEXT_POKE0); + if (pages[1]) + clear_fixmap(FIX_TEXT_POKE1); + local_flush_tlb(); sync_core(); /* Could also do a CLFLUSH here to speed up CPU recovery; but that causes hangs on some VIA CPUs. */ for (i = 0; i < len; i++) BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]); + local_irq_restore(flags); return addr; } diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index c5962fe3796f..a97db99dad52 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1928,6 +1928,12 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, return paddr; } +static int amd_iommu_domain_has_cap(struct iommu_domain *domain, + unsigned long cap) +{ + return 0; +} + static struct iommu_ops amd_iommu_ops = { .domain_init = amd_iommu_domain_init, .domain_destroy = amd_iommu_domain_destroy, @@ -1936,5 +1942,6 @@ static struct iommu_ops amd_iommu_ops = { .map = amd_iommu_map_range, .unmap = amd_iommu_unmap_range, .iova_to_phys = amd_iommu_iova_to_phys, + .domain_has_cap = amd_iommu_domain_has_cap, }; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 85eb8e100818..098ec84b8c00 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1304,6 +1304,7 @@ void __init enable_IR_x2apic(void) #ifdef CONFIG_INTR_REMAP int ret; unsigned long flags; + struct IO_APIC_route_entry **ioapic_entries = NULL; if (!cpu_has_x2apic) return; @@ -1334,17 +1335,23 @@ void __init enable_IR_x2apic(void) return; } - ret = save_IO_APIC_setup(); + ioapic_entries = alloc_ioapic_entries(); + if (!ioapic_entries) { + pr_info("Allocate ioapic_entries failed: %d\n", ret); + goto end; + } + + ret = save_IO_APIC_setup(ioapic_entries); if (ret) { pr_info("Saving IO-APIC state failed: %d\n", ret); goto end; } local_irq_save(flags); - mask_IO_APIC_setup(); + mask_IO_APIC_setup(ioapic_entries); mask_8259A(); - ret = enable_intr_remapping(1); + ret = enable_intr_remapping(EIM_32BIT_APIC_ID); if (ret && x2apic_preenabled) { local_irq_restore(flags); @@ -1364,9 +1371,9 @@ end_restore: /* * IR enabling failed */ - restore_IO_APIC_setup(); + restore_IO_APIC_setup(ioapic_entries); else - reinit_intr_remapped_IO_APIC(x2apic_preenabled); + reinit_intr_remapped_IO_APIC(x2apic_preenabled, ioapic_entries); unmask_8259A(); local_irq_restore(flags); @@ -1379,6 +1386,8 @@ end: pr_info("Enabled Interrupt-remapping\n"); } else pr_err("Failed to enable Interrupt-remapping and x2apic\n"); + if (ioapic_entries) + free_ioapic_entries(ioapic_entries); #else if (!cpu_has_x2apic) return; @@ -1954,6 +1963,10 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) local_irq_save(flags); disable_local_APIC(); +#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) + disable_intr_remapping(); +#endif local_irq_restore(flags); return 0; } @@ -1964,15 +1977,41 @@ static int lapic_resume(struct sys_device *dev) unsigned long flags; int maxlvt; +#ifdef CONFIG_INTR_REMAP + int ret; + struct IO_APIC_route_entry **ioapic_entries = NULL; + if (!apic_pm_state.active) return 0; - maxlvt = lapic_get_maxlvt(); - local_irq_save(flags); + if (x2apic) { + ioapic_entries = alloc_ioapic_entries(); + if (!ioapic_entries) { + WARN(1, "Alloc ioapic_entries in lapic resume failed."); + return -ENOMEM; + } + + ret = save_IO_APIC_setup(ioapic_entries); + if (ret) { + WARN(1, "Saving IO-APIC state failed: %d\n", ret); + free_ioapic_entries(ioapic_entries); + return ret; + } + + mask_IO_APIC_setup(ioapic_entries); + mask_8259A(); + enable_x2apic(); + } +#else + if (!apic_pm_state.active) + return 0; + local_irq_save(flags); if (x2apic) enable_x2apic(); +#endif + else { /* * Make sure the APICBASE points to the right address @@ -1986,6 +2025,7 @@ static int lapic_resume(struct sys_device *dev) wrmsr(MSR_IA32_APICBASE, l, h); } + maxlvt = lapic_get_maxlvt(); apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_ID, apic_pm_state.apic_id); apic_write(APIC_DFR, apic_pm_state.apic_dfr); @@ -2009,8 +2049,20 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_ESR, 0); apic_read(APIC_ESR); +#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) + reenable_intr_remapping(EIM_32BIT_APIC_ID); + + if (x2apic) { + unmask_8259A(); + restore_IO_APIC_setup(ioapic_entries); + free_ioapic_entries(ioapic_entries); + } +#endif + local_irq_restore(flags); + return 0; } @@ -2048,7 +2100,9 @@ static int __init init_lapic_sysfs(void) error = sysdev_register(&device_lapic); return error; } -device_initcall(init_lapic_sysfs); + +/* local apic needs to resume before other devices access its registers. */ +core_initcall(init_lapic_sysfs); #else /* CONFIG_PM */ diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index d806ecaa948f..676cdac385c0 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -26,12 +26,12 @@ static int bigsmp_apic_id_registered(void) return 1; } -static const cpumask_t *bigsmp_target_cpus(void) +static const struct cpumask *bigsmp_target_cpus(void) { #ifdef CONFIG_SMP - return &cpu_online_map; + return cpu_online_mask; #else - return &cpumask_of_cpu(0); + return cpumask_of(0); #endif } @@ -118,9 +118,9 @@ static int bigsmp_check_phys_apicid_present(int boot_cpu_physical_apicid) } /* As we are using single CPU as destination, pick only one CPU here */ -static unsigned int bigsmp_cpu_mask_to_apicid(const cpumask_t *cpumask) +static unsigned int bigsmp_cpu_mask_to_apicid(const struct cpumask *cpumask) { - return bigsmp_cpu_to_logical_apicid(first_cpu(*cpumask)); + return bigsmp_cpu_to_logical_apicid(cpumask_first(cpumask)); } static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask, @@ -188,10 +188,10 @@ static const struct dmi_system_id bigsmp_dmi_table[] = { { } /* NULL entry stops DMI scanning */ }; -static void bigsmp_vector_allocation_domain(int cpu, cpumask_t *retmask) +static void bigsmp_vector_allocation_domain(int cpu, struct cpumask *retmask) { - cpus_clear(*retmask); - cpu_set(cpu, *retmask); + cpumask_clear(retmask); + cpumask_set_cpu(cpu, retmask); } static int probe_bigsmp(void) diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 19588f2770ee..1c11b819f245 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -410,7 +410,7 @@ static void es7000_enable_apic_mode(void) WARN(1, "Command failed, status = %x\n", mip_status); } -static void es7000_vector_allocation_domain(int cpu, cpumask_t *retmask) +static void es7000_vector_allocation_domain(int cpu, struct cpumask *retmask) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -420,7 +420,8 @@ static void es7000_vector_allocation_domain(int cpu, cpumask_t *retmask) * deliver interrupts to the wrong hyperthread when only one * hyperthread was specified in the interrupt desitination. */ - *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; + cpumask_clear(retmask); + cpumask_bits(retmask)[0] = APIC_ALL_CPUS; } @@ -455,14 +456,14 @@ static int es7000_apic_id_registered(void) return 1; } -static const cpumask_t *target_cpus_cluster(void) +static const struct cpumask *target_cpus_cluster(void) { - return &CPU_MASK_ALL; + return cpu_all_mask; } -static const cpumask_t *es7000_target_cpus(void) +static const struct cpumask *es7000_target_cpus(void) { - return &cpumask_of_cpu(smp_processor_id()); + return cpumask_of(smp_processor_id()); } static unsigned long @@ -517,7 +518,7 @@ static void es7000_setup_apic_routing(void) "Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", (apic_version[apic] == 0x14) ? "Physical Cluster" : "Logical Cluster", - nr_ioapics, cpus_addr(*es7000_target_cpus())[0]); + nr_ioapics, cpumask_bits(es7000_target_cpus())[0]); } static int es7000_apicid_to_node(int logical_apicid) @@ -572,7 +573,7 @@ static int es7000_check_phys_apicid_present(int cpu_physical_apicid) return 1; } -static unsigned int es7000_cpu_mask_to_apicid(const cpumask_t *cpumask) +static unsigned int es7000_cpu_mask_to_apicid(const struct cpumask *cpumask) { unsigned int round = 0; int cpu, uninitialized_var(apicid); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 1bb5c6cee3eb..767fe7e46d68 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -851,63 +851,74 @@ __setup("pirq=", ioapic_pirq_setup); #endif /* CONFIG_X86_32 */ #ifdef CONFIG_INTR_REMAP -/* I/O APIC RTE contents at the OS boot up */ -static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS]; +struct IO_APIC_route_entry **alloc_ioapic_entries(void) +{ + int apic; + struct IO_APIC_route_entry **ioapic_entries; + + ioapic_entries = kzalloc(sizeof(*ioapic_entries) * nr_ioapics, + GFP_ATOMIC); + if (!ioapic_entries) + return 0; + + for (apic = 0; apic < nr_ioapics; apic++) { + ioapic_entries[apic] = + kzalloc(sizeof(struct IO_APIC_route_entry) * + nr_ioapic_registers[apic], GFP_ATOMIC); + if (!ioapic_entries[apic]) + goto nomem; + } + + return ioapic_entries; + +nomem: + while (--apic >= 0) + kfree(ioapic_entries[apic]); + kfree(ioapic_entries); + + return 0; +} /* * Saves all the IO-APIC RTE's */ -int save_IO_APIC_setup(void) +int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries) { - union IO_APIC_reg_01 reg_01; - unsigned long flags; int apic, pin; - /* - * The number of IO-APIC IRQ registers (== #pins): - */ - for (apic = 0; apic < nr_ioapics; apic++) { - spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(apic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - nr_ioapic_registers[apic] = reg_01.bits.entries+1; - } + if (!ioapic_entries) + return -ENOMEM; for (apic = 0; apic < nr_ioapics; apic++) { - early_ioapic_entries[apic] = - kzalloc(sizeof(struct IO_APIC_route_entry) * - nr_ioapic_registers[apic], GFP_KERNEL); - if (!early_ioapic_entries[apic]) - goto nomem; - } + if (!ioapic_entries[apic]) + return -ENOMEM; - for (apic = 0; apic < nr_ioapics; apic++) for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) - early_ioapic_entries[apic][pin] = + ioapic_entries[apic][pin] = ioapic_read_entry(apic, pin); + } return 0; - -nomem: - while (apic >= 0) - kfree(early_ioapic_entries[apic--]); - memset(early_ioapic_entries, 0, - ARRAY_SIZE(early_ioapic_entries)); - - return -ENOMEM; } -void mask_IO_APIC_setup(void) +/* + * Mask all IO APIC entries. + */ +void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries) { int apic, pin; + if (!ioapic_entries) + return; + for (apic = 0; apic < nr_ioapics; apic++) { - if (!early_ioapic_entries[apic]) + if (!ioapic_entries[apic]) break; + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { struct IO_APIC_route_entry entry; - entry = early_ioapic_entries[apic][pin]; + entry = ioapic_entries[apic][pin]; if (!entry.mask) { entry.mask = 1; ioapic_write_entry(apic, pin, entry); @@ -916,22 +927,30 @@ void mask_IO_APIC_setup(void) } } -void restore_IO_APIC_setup(void) +/* + * Restore IO APIC entries which was saved in ioapic_entries. + */ +int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries) { int apic, pin; + if (!ioapic_entries) + return -ENOMEM; + for (apic = 0; apic < nr_ioapics; apic++) { - if (!early_ioapic_entries[apic]) - break; + if (!ioapic_entries[apic]) + return -ENOMEM; + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) ioapic_write_entry(apic, pin, - early_ioapic_entries[apic][pin]); - kfree(early_ioapic_entries[apic]); - early_ioapic_entries[apic] = NULL; + ioapic_entries[apic][pin]); } + return 0; } -void reinit_intr_remapped_IO_APIC(int intr_remapping) +void reinit_intr_remapped_IO_APIC(int intr_remapping, + struct IO_APIC_route_entry **ioapic_entries) + { /* * for now plain restore of previous settings. @@ -940,7 +959,17 @@ void reinit_intr_remapped_IO_APIC(int intr_remapping) * table entries. for now, do a plain restore, and wait for * the setup_IO_APIC_irqs() to do proper initialization. */ - restore_IO_APIC_setup(); + restore_IO_APIC_setup(ioapic_entries); +} + +void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries) +{ + int apic; + + for (apic = 0; apic < nr_ioapics; apic++) + kfree(ioapic_entries[apic]); + + kfree(ioapic_entries); } #endif @@ -2495,7 +2524,7 @@ static void irq_complete_move(struct irq_desc **descp) static inline void irq_complete_move(struct irq_desc **descp) {} #endif -#ifdef CONFIG_INTR_REMAP +#ifdef CONFIG_X86_X2APIC static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) { int apic, pin; @@ -2540,7 +2569,6 @@ static void ack_x2apic_edge(unsigned int irq) { ack_x2APIC_irq(); } - #endif static void ack_apic_edge(unsigned int irq) @@ -2651,6 +2679,26 @@ static void ack_apic_level(unsigned int irq) #endif } +#ifdef CONFIG_INTR_REMAP +static void ir_ack_apic_edge(unsigned int irq) +{ +#ifdef CONFIG_X86_X2APIC + if (x2apic_enabled()) + return ack_x2apic_edge(irq); +#endif + return ack_apic_edge(irq); +} + +static void ir_ack_apic_level(unsigned int irq) +{ +#ifdef CONFIG_X86_X2APIC + if (x2apic_enabled()) + return ack_x2apic_level(irq); +#endif + return ack_apic_level(irq); +} +#endif /* CONFIG_INTR_REMAP */ + static struct irq_chip ioapic_chip __read_mostly = { .name = "IO-APIC", .startup = startup_ioapic_irq, @@ -2670,8 +2718,8 @@ static struct irq_chip ir_ioapic_chip __read_mostly = { .mask = mask_IO_APIC_irq, .unmask = unmask_IO_APIC_irq, #ifdef CONFIG_INTR_REMAP - .ack = ack_x2apic_edge, - .eoi = ack_x2apic_level, + .ack = ir_ack_apic_edge, + .eoi = ir_ack_apic_level, #ifdef CONFIG_SMP .set_affinity = set_ir_ioapic_affinity_irq, #endif @@ -3397,7 +3445,7 @@ static struct irq_chip msi_ir_chip = { .unmask = unmask_msi_irq, .mask = mask_msi_irq, #ifdef CONFIG_INTR_REMAP - .ack = ack_x2apic_edge, + .ack = ir_ack_apic_edge, #ifdef CONFIG_SMP .set_affinity = ir_set_msi_irq_affinity, #endif diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index bdfad80c3cf1..d6bd62407152 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -39,7 +39,7 @@ int unknown_nmi_panic; int nmi_watchdog_enabled; -static cpumask_t backtrace_mask = CPU_MASK_NONE; +static cpumask_var_t backtrace_mask; /* nmi_active: * >0: the lapic NMI watchdog is active, but can be disabled @@ -138,6 +138,7 @@ int __init check_nmi_watchdog(void) if (!prev_nmi_count) goto error; + alloc_cpumask_var(&backtrace_mask, GFP_KERNEL); printk(KERN_INFO "Testing NMI watchdog ... "); #ifdef CONFIG_SMP @@ -413,14 +414,14 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) touched = 1; } - if (cpu_isset(cpu, backtrace_mask)) { + if (cpumask_test_cpu(cpu, backtrace_mask)) { static DEFINE_SPINLOCK(lock); /* Serialise the printks */ spin_lock(&lock); printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); dump_stack(); spin_unlock(&lock); - cpu_clear(cpu, backtrace_mask); + cpumask_clear_cpu(cpu, backtrace_mask); } /* Could check oops_in_progress here too, but it's safer not to */ @@ -554,10 +555,10 @@ void __trigger_all_cpu_backtrace(void) { int i; - backtrace_mask = cpu_online_map; + cpumask_copy(backtrace_mask, cpu_online_mask); /* Wait for up to 10 seconds for all CPUs to do the backtrace */ for (i = 0; i < 10 * 1000; i++) { - if (cpus_empty(backtrace_mask)) + if (cpumask_empty(backtrace_mask)) break; mdelay(1); } diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index ba2fc6465534..533e59c6fc82 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -334,9 +334,9 @@ static inline void numaq_smp_callin_clear_local_apic(void) clear_local_APIC(); } -static inline const cpumask_t *numaq_target_cpus(void) +static inline const struct cpumask *numaq_target_cpus(void) { - return &CPU_MASK_ALL; + return cpu_all_mask; } static inline unsigned long @@ -427,7 +427,7 @@ static inline int numaq_check_phys_apicid_present(int boot_cpu_physical_apicid) * We use physical apicids here, not logical, so just return the default * physical broadcast to stop people from breaking us */ -static inline unsigned int numaq_cpu_mask_to_apicid(const cpumask_t *cpumask) +static unsigned int numaq_cpu_mask_to_apicid(const struct cpumask *cpumask) { return 0x0F; } @@ -462,7 +462,7 @@ static int probe_numaq(void) return found_numaq; } -static void numaq_vector_allocation_domain(int cpu, cpumask_t *retmask) +static void numaq_vector_allocation_domain(int cpu, struct cpumask *retmask) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -472,7 +472,8 @@ static void numaq_vector_allocation_domain(int cpu, cpumask_t *retmask) * deliver interrupts to the wrong hyperthread when only one * hyperthread was specified in the interrupt desitination. */ - *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; + cpumask_clear(retmask); + cpumask_bits(retmask)[0] = APIC_ALL_CPUS; } static void numaq_setup_portio_remap(void) diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 141c99a1c264..01eda2ac65e4 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -83,7 +83,8 @@ static void default_vector_allocation_domain(int cpu, struct cpumask *retmask) * deliver interrupts to the wrong hyperthread when only one * hyperthread was specified in the interrupt desitination. */ - *retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } }; + cpumask_clear(retmask); + cpumask_bits(retmask)[0] = APIC_ALL_CPUS; } /* should be called last. */ diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index aac52fa873ff..9cfe1f415d81 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c @@ -53,23 +53,19 @@ static unsigned summit_get_apic_id(unsigned long x) return (x >> 24) & 0xFF; } -static inline void summit_send_IPI_mask(const cpumask_t *mask, int vector) +static inline void summit_send_IPI_mask(const struct cpumask *mask, int vector) { default_send_IPI_mask_sequence_logical(mask, vector); } static void summit_send_IPI_allbutself(int vector) { - cpumask_t mask = cpu_online_map; - cpu_clear(smp_processor_id(), mask); - - if (!cpus_empty(mask)) - summit_send_IPI_mask(&mask, vector); + default_send_IPI_mask_allbutself_logical(cpu_online_mask, vector); } static void summit_send_IPI_all(int vector) { - summit_send_IPI_mask(&cpu_online_map, vector); + summit_send_IPI_mask(cpu_online_mask, vector); } #include <asm/tsc.h> @@ -186,13 +182,13 @@ static inline int is_WPEG(struct rio_detail *rio){ #define SUMMIT_APIC_DFR_VALUE (APIC_DFR_CLUSTER) -static const cpumask_t *summit_target_cpus(void) +static const struct cpumask *summit_target_cpus(void) { /* CPU_MASK_ALL (0xff) has undefined behaviour with * dest_LowestPrio mode logical clustered apic interrupt routing * Just start on cpu 0. IRQ balancing will spread load */ - return &cpumask_of_cpu(0); + return cpumask_of(0); } static unsigned long summit_check_apicid_used(physid_mask_t bitmap, int apicid) @@ -289,7 +285,7 @@ static int summit_check_phys_apicid_present(int boot_cpu_physical_apicid) return 1; } -static unsigned int summit_cpu_mask_to_apicid(const cpumask_t *cpumask) +static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask) { unsigned int round = 0; int cpu, apicid = 0; @@ -346,7 +342,7 @@ static int probe_summit(void) return 0; } -static void summit_vector_allocation_domain(int cpu, cpumask_t *retmask) +static void summit_vector_allocation_domain(int cpu, struct cpumask *retmask) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -356,7 +352,8 @@ static void summit_vector_allocation_domain(int cpu, cpumask_t *retmask) * deliver interrupts to the wrong hyperthread when only one * hyperthread was specified in the interrupt desitination. */ - *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; + cpumask_clear(retmask); + cpumask_bits(retmask)[0] = APIC_ALL_CPUS; } #ifdef CONFIG_X86_SUMMIT_NUMA diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 1bd6da1f8fad..1248318436e8 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -118,17 +118,12 @@ static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) static void uv_send_IPI_one(int cpu, int vector) { - unsigned long val, apicid; + unsigned long apicid; int pnode; apicid = per_cpu(x86_cpu_to_apicid, cpu); pnode = uv_apicid_to_pnode(apicid); - - val = (1UL << UVH_IPI_INT_SEND_SHFT) | - (apicid << UVH_IPI_INT_APIC_ID_SHFT) | - (vector << UVH_IPI_INT_VECTOR_SHFT); - - uv_write_global_mmr64(pnode, UVH_IPI_INT, val); + uv_hub_send_ipi(pnode, apicid, vector); } static void uv_send_IPI_mask(const struct cpumask *mask, int vector) diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index ac7783a67432..49e0939bac42 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -466,7 +466,7 @@ static const lookup_t error_table[] = { * @err: APM BIOS return code * * Write a meaningful log entry to the kernel log in the event of - * an APM error. + * an APM error. Note that this also handles (negative) kernel errors. */ static void apm_error(char *str, int err) @@ -478,43 +478,14 @@ static void apm_error(char *str, int err) break; if (i < ERROR_COUNT) printk(KERN_NOTICE "apm: %s: %s\n", str, error_table[i].msg); + else if (err < 0) + printk(KERN_NOTICE "apm: %s: linux error code %i\n", str, err); else printk(KERN_NOTICE "apm: %s: unknown error code %#2.2x\n", str, err); } /* - * Lock APM functionality to physical CPU 0 - */ - -#ifdef CONFIG_SMP - -static cpumask_t apm_save_cpus(void) -{ - cpumask_t x = current->cpus_allowed; - /* Some bioses don't like being called from CPU != 0 */ - set_cpus_allowed(current, cpumask_of_cpu(0)); - BUG_ON(smp_processor_id() != 0); - return x; -} - -static inline void apm_restore_cpus(cpumask_t mask) -{ - set_cpus_allowed(current, mask); -} - -#else - -/* - * No CPU lockdown needed on a uniprocessor - */ - -#define apm_save_cpus() (current->cpus_allowed) -#define apm_restore_cpus(x) (void)(x) - -#endif - -/* * These are the actual BIOS calls. Depending on APM_ZERO_SEGS and * apm_info.allow_ints, we are being really paranoid here! Not only * are interrupts disabled, but all the segment registers (except SS) @@ -568,16 +539,23 @@ static inline void apm_irq_restore(unsigned long flags) # define APM_DO_RESTORE_SEGS #endif +struct apm_bios_call { + u32 func; + /* In and out */ + u32 ebx; + u32 ecx; + /* Out only */ + u32 eax; + u32 edx; + u32 esi; + + /* Error: -ENOMEM, or bits 8-15 of eax */ + int err; +}; + /** - * apm_bios_call - Make an APM BIOS 32bit call - * @func: APM function to execute - * @ebx_in: EBX register for call entry - * @ecx_in: ECX register for call entry - * @eax: EAX register return - * @ebx: EBX register return - * @ecx: ECX register return - * @edx: EDX register return - * @esi: ESI register return + * __apm_bios_call - Make an APM BIOS 32bit call + * @_call: pointer to struct apm_bios_call. * * Make an APM call using the 32bit protected mode interface. The * caller is responsible for knowing if APM BIOS is configured and @@ -586,80 +564,142 @@ static inline void apm_irq_restore(unsigned long flags) * flag is loaded into AL. If there is an error, then the error * code is returned in AH (bits 8-15 of eax) and this function * returns non-zero. + * + * Note: this makes the call on the current CPU. */ - -static u8 apm_bios_call(u32 func, u32 ebx_in, u32 ecx_in, - u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, u32 *esi) +static long __apm_bios_call(void *_call) { APM_DECL_SEGS unsigned long flags; - cpumask_t cpus; int cpu; struct desc_struct save_desc_40; struct desc_struct *gdt; - - cpus = apm_save_cpus(); + struct apm_bios_call *call = _call; cpu = get_cpu(); + BUG_ON(cpu != 0); gdt = get_cpu_gdt_table(cpu); save_desc_40 = gdt[0x40 / 8]; gdt[0x40 / 8] = bad_bios_desc; apm_irq_save(flags); APM_DO_SAVE_SEGS; - apm_bios_call_asm(func, ebx_in, ecx_in, eax, ebx, ecx, edx, esi); + apm_bios_call_asm(call->func, call->ebx, call->ecx, + &call->eax, &call->ebx, &call->ecx, &call->edx, + &call->esi); APM_DO_RESTORE_SEGS; apm_irq_restore(flags); gdt[0x40 / 8] = save_desc_40; put_cpu(); - apm_restore_cpus(cpus); - return *eax & 0xff; + return call->eax & 0xff; +} + +/* Run __apm_bios_call or __apm_bios_call_simple on CPU 0 */ +static int on_cpu0(long (*fn)(void *), struct apm_bios_call *call) +{ + int ret; + + /* Don't bother with work_on_cpu in the common case, so we don't + * have to worry about OOM or overhead. */ + if (get_cpu() == 0) { + ret = fn(call); + put_cpu(); + } else { + put_cpu(); + ret = work_on_cpu(0, fn, call); + } + + /* work_on_cpu can fail with -ENOMEM */ + if (ret < 0) + call->err = ret; + else + call->err = (call->eax >> 8) & 0xff; + + return ret; } /** - * apm_bios_call_simple - make a simple APM BIOS 32bit call - * @func: APM function to invoke - * @ebx_in: EBX register value for BIOS call - * @ecx_in: ECX register value for BIOS call - * @eax: EAX register on return from the BIOS call + * apm_bios_call - Make an APM BIOS 32bit call (on CPU 0) + * @call: the apm_bios_call registers. + * + * If there is an error, it is returned in @call.err. + */ +static int apm_bios_call(struct apm_bios_call *call) +{ + return on_cpu0(__apm_bios_call, call); +} + +/** + * __apm_bios_call_simple - Make an APM BIOS 32bit call (on CPU 0) + * @_call: pointer to struct apm_bios_call. * * Make a BIOS call that returns one value only, or just status. * If there is an error, then the error code is returned in AH - * (bits 8-15 of eax) and this function returns non-zero. This is - * used for simpler BIOS operations. This call may hold interrupts - * off for a long time on some laptops. + * (bits 8-15 of eax) and this function returns non-zero (it can + * also return -ENOMEM). This is used for simpler BIOS operations. + * This call may hold interrupts off for a long time on some laptops. + * + * Note: this makes the call on the current CPU. */ - -static u8 apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax) +static long __apm_bios_call_simple(void *_call) { u8 error; APM_DECL_SEGS unsigned long flags; - cpumask_t cpus; int cpu; struct desc_struct save_desc_40; struct desc_struct *gdt; - - cpus = apm_save_cpus(); + struct apm_bios_call *call = _call; cpu = get_cpu(); + BUG_ON(cpu != 0); gdt = get_cpu_gdt_table(cpu); save_desc_40 = gdt[0x40 / 8]; gdt[0x40 / 8] = bad_bios_desc; apm_irq_save(flags); APM_DO_SAVE_SEGS; - error = apm_bios_call_simple_asm(func, ebx_in, ecx_in, eax); + error = apm_bios_call_simple_asm(call->func, call->ebx, call->ecx, + &call->eax); APM_DO_RESTORE_SEGS; apm_irq_restore(flags); gdt[0x40 / 8] = save_desc_40; put_cpu(); - apm_restore_cpus(cpus); return error; } /** + * apm_bios_call_simple - make a simple APM BIOS 32bit call + * @func: APM function to invoke + * @ebx_in: EBX register value for BIOS call + * @ecx_in: ECX register value for BIOS call + * @eax: EAX register on return from the BIOS call + * @err: bits + * + * Make a BIOS call that returns one value only, or just status. + * If there is an error, then the error code is returned in @err + * and this function returns non-zero. This is used for simpler + * BIOS operations. This call may hold interrupts off for a long + * time on some laptops. + */ +static int apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax, + int *err) +{ + struct apm_bios_call call; + int ret; + + call.func = func; + call.ebx = ebx_in; + call.ecx = ecx_in; + + ret = on_cpu0(__apm_bios_call_simple, &call); + *eax = call.eax; + *err = call.err; + return ret; +} + +/** * apm_driver_version - APM driver version * @val: loaded with the APM version on return * @@ -678,9 +718,10 @@ static u8 apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax) static int apm_driver_version(u_short *val) { u32 eax; + int err; - if (apm_bios_call_simple(APM_FUNC_VERSION, 0, *val, &eax)) - return (eax >> 8) & 0xff; + if (apm_bios_call_simple(APM_FUNC_VERSION, 0, *val, &eax, &err)) + return err; *val = eax; return APM_SUCCESS; } @@ -701,22 +742,21 @@ static int apm_driver_version(u_short *val) * that APM 1.2 is in use. If no messges are pending the value 0x80 * is returned (No power management events pending). */ - static int apm_get_event(apm_event_t *event, apm_eventinfo_t *info) { - u32 eax; - u32 ebx; - u32 ecx; - u32 dummy; + struct apm_bios_call call; - if (apm_bios_call(APM_FUNC_GET_EVENT, 0, 0, &eax, &ebx, &ecx, - &dummy, &dummy)) - return (eax >> 8) & 0xff; - *event = ebx; + call.func = APM_FUNC_GET_EVENT; + call.ebx = call.ecx = 0; + + if (apm_bios_call(&call)) + return call.err; + + *event = call.ebx; if (apm_info.connection_version < 0x0102) *info = ~0; /* indicate info not valid */ else - *info = ecx; + *info = call.ecx; return APM_SUCCESS; } @@ -737,9 +777,10 @@ static int apm_get_event(apm_event_t *event, apm_eventinfo_t *info) static int set_power_state(u_short what, u_short state) { u32 eax; + int err; - if (apm_bios_call_simple(APM_FUNC_SET_STATE, what, state, &eax)) - return (eax >> 8) & 0xff; + if (apm_bios_call_simple(APM_FUNC_SET_STATE, what, state, &eax, &err)) + return err; return APM_SUCCESS; } @@ -770,6 +811,7 @@ static int apm_do_idle(void) u8 ret = 0; int idled = 0; int polling; + int err; polling = !!(current_thread_info()->status & TS_POLLING); if (polling) { @@ -782,7 +824,7 @@ static int apm_do_idle(void) } if (!need_resched()) { idled = 1; - ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax); + ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax, &err); } if (polling) current_thread_info()->status |= TS_POLLING; @@ -797,8 +839,7 @@ static int apm_do_idle(void) * Only report the failure the first 5 times. */ if (++t < 5) { - printk(KERN_DEBUG "apm_do_idle failed (%d)\n", - (eax >> 8) & 0xff); + printk(KERN_DEBUG "apm_do_idle failed (%d)\n", err); t = jiffies; } return -1; @@ -816,9 +857,10 @@ static int apm_do_idle(void) static void apm_do_busy(void) { u32 dummy; + int err; if (clock_slowed || ALWAYS_CALL_BUSY) { - (void)apm_bios_call_simple(APM_FUNC_BUSY, 0, 0, &dummy); + (void)apm_bios_call_simple(APM_FUNC_BUSY, 0, 0, &dummy, &err); clock_slowed = 0; } } @@ -937,7 +979,7 @@ static void apm_power_off(void) /* Some bioses don't like being called from CPU != 0 */ if (apm_info.realmode_power_off) { - (void)apm_save_cpus(); + set_cpus_allowed_ptr(current, cpumask_of(0)); machine_real_restart(po_bios_call, sizeof(po_bios_call)); } else { (void)set_system_power_state(APM_STATE_OFF); @@ -956,12 +998,13 @@ static void apm_power_off(void) static int apm_enable_power_management(int enable) { u32 eax; + int err; if ((enable == 0) && (apm_info.bios.flags & APM_BIOS_DISENGAGED)) return APM_NOT_ENGAGED; if (apm_bios_call_simple(APM_FUNC_ENABLE_PM, APM_DEVICE_BALL, - enable, &eax)) - return (eax >> 8) & 0xff; + enable, &eax, &err)) + return err; if (enable) apm_info.bios.flags &= ~APM_BIOS_DISABLED; else @@ -986,24 +1029,23 @@ static int apm_enable_power_management(int enable) static int apm_get_power_status(u_short *status, u_short *bat, u_short *life) { - u32 eax; - u32 ebx; - u32 ecx; - u32 edx; - u32 dummy; + struct apm_bios_call call; + + call.func = APM_FUNC_GET_STATUS; + call.ebx = APM_DEVICE_ALL; + call.ecx = 0; if (apm_info.get_power_status_broken) return APM_32_UNSUPPORTED; - if (apm_bios_call(APM_FUNC_GET_STATUS, APM_DEVICE_ALL, 0, - &eax, &ebx, &ecx, &edx, &dummy)) - return (eax >> 8) & 0xff; - *status = ebx; - *bat = ecx; + if (apm_bios_call(&call)) + return call.err; + *status = call.ebx; + *bat = call.ecx; if (apm_info.get_power_status_swabinminutes) { - *life = swab16((u16)edx); + *life = swab16((u16)call.edx); *life |= 0x8000; } else - *life = edx; + *life = call.edx; return APM_SUCCESS; } @@ -1048,12 +1090,14 @@ static int apm_get_battery_status(u_short which, u_short *status, static int apm_engage_power_management(u_short device, int enable) { u32 eax; + int err; if ((enable == 0) && (device == APM_DEVICE_ALL) && (apm_info.bios.flags & APM_BIOS_DISABLED)) return APM_DISABLED; - if (apm_bios_call_simple(APM_FUNC_ENGAGE_PM, device, enable, &eax)) - return (eax >> 8) & 0xff; + if (apm_bios_call_simple(APM_FUNC_ENGAGE_PM, device, enable, + &eax, &err)) + return err; if (device == APM_DEVICE_ALL) { if (enable) apm_info.bios.flags &= ~APM_BIOS_DISENGAGED; @@ -1689,16 +1733,14 @@ static int apm(void *unused) char *power_stat; char *bat_stat; -#ifdef CONFIG_SMP /* 2002/08/01 - WT * This is to avoid random crashes at boot time during initialization * on SMP systems in case of "apm=power-off" mode. Seen on ASUS A7M266D. * Some bioses don't like being called from CPU != 0. * Method suggested by Ingo Molnar. */ - set_cpus_allowed(current, cpumask_of_cpu(0)); + set_cpus_allowed_ptr(current, cpumask_of(0)); BUG_ON(smp_processor_id() != 0); -#endif if (apm_info.connection_version == 0) { apm_info.connection_version = apm_info.bios.version; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e2962cc1e27b..c4f667896c28 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -41,8 +41,6 @@ #include "cpu.h" -#ifdef CONFIG_X86_64 - /* all of these masks are initialized in setup_cpu_local_masks() */ cpumask_var_t cpu_initialized_mask; cpumask_var_t cpu_callout_mask; @@ -60,16 +58,6 @@ void __init setup_cpu_local_masks(void) alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); } -#else /* CONFIG_X86_32 */ - -cpumask_t cpu_sibling_setup_map; -cpumask_t cpu_callout_map; -cpumask_t cpu_initialized; -cpumask_t cpu_callin_map; - -#endif /* CONFIG_X86_32 */ - - static const struct cpu_dev *this_cpu __cpuinitdata; DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { @@ -859,6 +847,7 @@ static void vgetcpu_set_mode(void) void __init identify_boot_cpu(void) { identify_cpu(&boot_cpu_data); + init_c1e_mask(); #ifdef CONFIG_X86_32 sysenter_setup(); enable_sep_cpu(); diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 23da96e57b17..19f6b9d27e83 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -33,7 +33,7 @@ #include <linux/cpufreq.h> #include <linux/compiler.h> #include <linux/dmi.h> -#include <linux/ftrace.h> +#include <trace/power.h> #include <linux/acpi.h> #include <linux/io.h> @@ -72,6 +72,8 @@ struct acpi_cpufreq_data { static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data); +DEFINE_TRACE(power_mark); + /* acpi_perf_data is a pointer to percpu data. */ static struct acpi_processor_performance *acpi_perf_data; @@ -680,6 +682,18 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) perf->states[i].transition_latency * 1000; } + /* Check for high latency (>20uS) from buggy BIOSes, like on T42 */ + if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE && + policy->cpuinfo.transition_latency > 20 * 1000) { + static int print_once; + policy->cpuinfo.transition_latency = 20 * 1000; + if (!print_once) { + print_once = 1; + printk(KERN_INFO "Capping off P-state tranision latency" + " at 20 uS\n"); + } + } + data->max_freq = perf->states[0].core_frequency * 1000; /* table init */ for (i = 0; i < perf->state_count; i++) { diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index f1c51aea064d..0bd48e65a0ca 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c @@ -305,7 +305,7 @@ retry_loop: outb(3, 0x22); } else if ((pr != NULL) && pr->flags.bm_control) { /* Disable bus master arbitration */ - acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1); + acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 1); } switch (longhaul_version) { @@ -328,7 +328,7 @@ retry_loop: case TYPE_POWERSAVER: if (longhaul_flags & USE_ACPI_C3) { /* Don't allow wakeup */ - acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); + acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, 0); do_powersaver(cx->address, mults_index, dir); } else { do_powersaver(0, mults_index, dir); @@ -341,7 +341,7 @@ retry_loop: outb(0, 0x22); } else if ((pr != NULL) && pr->flags.bm_control) { /* Enable bus master arbitration */ - acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0); + acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 0); } outb(pic2_mask, 0xA1); /* restore mask */ outb(pic1_mask, 0x21); diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 41ed94915f97..6ac55bd341ae 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -211,7 +211,7 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) unsigned int i; #ifdef CONFIG_SMP - cpumask_copy(policy->cpus, &per_cpu(cpu_sibling_map, policy->cpu)); + cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu)); #endif /* Errata workaround */ diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index a15ac94e0b9b..4709ead2db52 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -54,7 +54,10 @@ static DEFINE_PER_CPU(struct powernow_k8_data *, powernow_data); static int cpu_family = CPU_OPTERON; #ifndef CONFIG_SMP -DEFINE_PER_CPU(cpumask_t, cpu_core_map); +static inline const struct cpumask *cpu_core_mask(int cpu) +{ + return cpumask_of(0); +} #endif /* Return a frequency in MHz, given an input fid */ @@ -699,7 +702,7 @@ static int fill_powernow_table(struct powernow_k8_data *data, dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid); data->powernow_table = powernow_table; - if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu) + if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu) print_basics(data); for (j = 0; j < data->numps; j++) @@ -862,7 +865,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) /* fill in data */ data->numps = data->acpi_data.state_count; - if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu) + if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu) print_basics(data); powernow_k8_acpi_pst_values(data, 0); @@ -1300,7 +1303,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) if (cpu_family == CPU_HW_PSTATE) cpumask_copy(pol->cpus, cpumask_of(pol->cpu)); else - cpumask_copy(pol->cpus, &per_cpu(cpu_core_map, pol->cpu)); + cpumask_copy(pol->cpus, cpu_core_mask(pol->cpu)); data->available_cores = pol->cpus; if (cpu_family == CPU_HW_PSTATE) @@ -1365,7 +1368,7 @@ static unsigned int powernowk8_get(unsigned int cpu) unsigned int khz = 0; unsigned int first; - first = first_cpu(per_cpu(cpu_core_map, cpu)); + first = cpumask_first(cpu_core_mask(cpu)); data = per_cpu(powernow_data, first); if (!data) diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 8bbb11adb315..016c1a4fa3fc 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -321,7 +321,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) /* only run on CPU to be set, or on its sibling */ #ifdef CONFIG_SMP - cpumask_copy(policy->cpus, &per_cpu(cpu_sibling_map, policy->cpu)); + cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu)); #endif cpus_allowed = current->cpus_allowed; diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index c471eb1a389c..483eda96e102 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -159,7 +159,7 @@ struct _cpuid4_info_regs { unsigned long can_disable; }; -#ifdef CONFIG_PCI +#if defined(CONFIG_PCI) && defined(CONFIG_SYSFS) static struct pci_device_id k8_nb_id[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, @@ -324,15 +324,6 @@ __cpuinit cpuid4_cache_lookup_regs(int index, return 0; } -static int -__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) -{ - struct _cpuid4_info_regs *leaf_regs = - (struct _cpuid4_info_regs *)this_leaf; - - return cpuid4_cache_lookup_regs(index, leaf_regs); -} - static int __cpuinit find_num_cache_leaves(void) { unsigned int eax, ebx, ecx, edx; @@ -508,6 +499,8 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) return l2; } +#ifdef CONFIG_SYSFS + /* pointer to _cpuid4_info array (for each cache leaf) */ static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info); #define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y])) @@ -571,6 +564,15 @@ static void __cpuinit free_cache_attributes(unsigned int cpu) per_cpu(cpuid4_info, cpu) = NULL; } +static int +__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) +{ + struct _cpuid4_info_regs *leaf_regs = + (struct _cpuid4_info_regs *)this_leaf; + + return cpuid4_cache_lookup_regs(index, leaf_regs); +} + static void __cpuinit get_cpu_leaves(void *_retval) { int j, *retval = _retval, cpu = smp_processor_id(); @@ -612,8 +614,6 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) return retval; } -#ifdef CONFIG_SYSFS - #include <linux/kobject.h> #include <linux/sysfs.h> diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index ca14604611ec..863f89568b1a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -990,7 +990,7 @@ static struct sysdev_attribute *mce_attributes[] = { NULL }; -static cpumask_t mce_device_initialized = CPU_MASK_NONE; +static cpumask_var_t mce_device_initialized; /* Per cpu sysdev init. All of the cpus still share the same ctl bank */ static __cpuinit int mce_create_device(unsigned int cpu) @@ -1021,7 +1021,7 @@ static __cpuinit int mce_create_device(unsigned int cpu) if (err) goto error2; } - cpu_set(cpu, mce_device_initialized); + cpumask_set_cpu(cpu, mce_device_initialized); return 0; error2: @@ -1043,7 +1043,7 @@ static __cpuinit void mce_remove_device(unsigned int cpu) { int i; - if (!cpu_isset(cpu, mce_device_initialized)) + if (!cpumask_test_cpu(cpu, mce_device_initialized)) return; for (i = 0; mce_attributes[i]; i++) @@ -1053,7 +1053,7 @@ static __cpuinit void mce_remove_device(unsigned int cpu) sysdev_remove_file(&per_cpu(device_mce, cpu), &bank_attrs[i]); sysdev_unregister(&per_cpu(device_mce,cpu)); - cpu_clear(cpu, mce_device_initialized); + cpumask_clear_cpu(cpu, mce_device_initialized); } /* Make sure there are no machine checks on offlined CPUs. */ @@ -1162,6 +1162,8 @@ static __init int mce_init_device(void) if (!mce_available(&boot_cpu_data)) return -EIO; + alloc_cpumask_var(&mce_device_initialized, GFP_KERNEL); + err = mce_init_banks(); if (err) return err; diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 7d01be868870..56dde9c4bc96 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -485,7 +485,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) #ifdef CONFIG_SMP if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ - i = cpumask_first(&per_cpu(cpu_core_map, cpu)); + i = cpumask_first(cpu_core_mask(cpu)); /* first core not up yet */ if (cpu_data(i).cpu_core_id) @@ -505,7 +505,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) if (err) goto out; - cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu)); + cpumask_copy(b->cpus, cpu_core_mask(cpu)); per_cpu(threshold_banks, cpu)[bank] = b; goto out; } @@ -529,7 +529,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) #ifndef CONFIG_SMP cpumask_setall(b->cpus); #else - cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu)); + cpumask_copy(b->cpus, cpu_core_mask(cpu)); #endif per_cpu(threshold_banks, cpu)[bank] = b; diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index 57df3d383470..d6b72df89d69 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -249,7 +249,7 @@ void cmci_rediscover(int dying) for_each_online_cpu (cpu) { if (cpu == dying) continue; - if (set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu))) + if (set_cpus_allowed_ptr(current, cpumask_of(cpu))) continue; /* Recheck banks in case CPUs don't all have the same */ if (cmci_supported(&banks)) diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 37f28fc7cf95..0b776c09aff3 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -462,9 +462,6 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT; *type = base_lo & 0xff; - printk(KERN_DEBUG " get_mtrr: cpu%d reg%02d base=%010lx size=%010lx %s\n", - cpu, reg, *base, *size, - mtrr_attrib_to_str(*type & 0xff)); out_put_cpu: put_cpu(); } diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index d67e0e48bc2d..f93047fed791 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -14,7 +14,7 @@ static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c, if (c->x86_max_cores * smp_num_siblings > 1) { seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); seq_printf(m, "siblings\t: %d\n", - cpus_weight(per_cpu(cpu_core_map, cpu))); + cpumask_weight(cpu_sibling_mask(cpu))); seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); seq_printf(m, "apicid\t\t: %d\n", c->apicid); @@ -143,9 +143,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) static void *c_start(struct seq_file *m, loff_t *pos) { if (*pos == 0) /* just in case, cpu 0 is not the first */ - *pos = first_cpu(cpu_online_map); + *pos = cpumask_first(cpu_online_mask); else - *pos = next_cpu_nr(*pos - 1, cpu_online_map); + *pos = cpumask_next(*pos - 1, cpu_online_mask); if ((*pos) < nr_cpu_ids) return &cpu_data(*pos); return NULL; diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index dd2130b0fb3e..95ea5fa7d444 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -15,6 +15,7 @@ #include <linux/bug.h> #include <linux/nmi.h> #include <linux/sysfs.h> +#include <linux/ftrace.h> #include <asm/stacktrace.h> @@ -196,6 +197,11 @@ unsigned __kprobes long oops_begin(void) int cpu; unsigned long flags; + /* notify the hw-branch tracer so it may disable tracing and + add the last trace to the trace buffer - + the earlier this happens, the more useful the trace. */ + trace_hw_branch_oops(); + oops_enter(); /* racy, but better than risking deadlock. */ diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 76f7141e0f91..70a10ca100f6 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -18,14 +18,26 @@ #include <linux/init.h> #include <linux/list.h> +#include <asm/cacheflush.h> #include <asm/ftrace.h> -#include <linux/ftrace.h> #include <asm/nops.h> #include <asm/nmi.h> #ifdef CONFIG_DYNAMIC_FTRACE +int ftrace_arch_code_modify_prepare(void) +{ + set_kernel_text_rw(); + return 0; +} + +int ftrace_arch_code_modify_post_process(void) +{ + set_kernel_text_ro(); + return 0; +} + union ftrace_code_union { char code[MCOUNT_INSN_SIZE]; struct { @@ -66,11 +78,11 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) * * 1) Put the instruction pointer into the IP buffer * and the new code into the "code" buffer. - * 2) Set a flag that says we are modifying code - * 3) Wait for any running NMIs to finish. - * 4) Write the code - * 5) clear the flag. - * 6) Wait for any running NMIs to finish. + * 2) Wait for any running NMIs to finish and set a flag that says + * we are modifying code, it is done in an atomic operation. + * 3) Write the code + * 4) clear the flag. + * 5) Wait for any running NMIs to finish. * * If an NMI is executed, the first thing it does is to call * "ftrace_nmi_enter". This will check if the flag is set to write @@ -82,9 +94,9 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) * are the same as what exists. */ -static atomic_t in_nmi = ATOMIC_INIT(0); +#define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */ +static atomic_t nmi_running = ATOMIC_INIT(0); static int mod_code_status; /* holds return value of text write */ -static int mod_code_write; /* set when NMI should do the write */ static void *mod_code_ip; /* holds the IP to write to */ static void *mod_code_newcode; /* holds the text to write to the IP */ @@ -101,6 +113,20 @@ int ftrace_arch_read_dyn_info(char *buf, int size) return r; } +static void clear_mod_flag(void) +{ + int old = atomic_read(&nmi_running); + + for (;;) { + int new = old & ~MOD_CODE_WRITE_FLAG; + + if (old == new) + break; + + old = atomic_cmpxchg(&nmi_running, old, new); + } +} + static void ftrace_mod_code(void) { /* @@ -111,37 +137,52 @@ static void ftrace_mod_code(void) */ mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, MCOUNT_INSN_SIZE); + + /* if we fail, then kill any new writers */ + if (mod_code_status) + clear_mod_flag(); } void ftrace_nmi_enter(void) { - atomic_inc(&in_nmi); - /* Must have in_nmi seen before reading write flag */ - smp_mb(); - if (mod_code_write) { + if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) { + smp_rmb(); ftrace_mod_code(); atomic_inc(&nmi_update_count); } + /* Must have previous changes seen before executions */ + smp_mb(); } void ftrace_nmi_exit(void) { - /* Finish all executions before clearing in_nmi */ - smp_wmb(); - atomic_dec(&in_nmi); + /* Finish all executions before clearing nmi_running */ + smp_mb(); + atomic_dec(&nmi_running); +} + +static void wait_for_nmi_and_set_mod_flag(void) +{ + if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG)) + return; + + do { + cpu_relax(); + } while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG)); + + nmi_wait_count++; } static void wait_for_nmi(void) { - int waited = 0; + if (!atomic_read(&nmi_running)) + return; - while (atomic_read(&in_nmi)) { - waited = 1; + do { cpu_relax(); - } + } while (atomic_read(&nmi_running)); - if (waited) - nmi_wait_count++; + nmi_wait_count++; } static int @@ -151,14 +192,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code) mod_code_newcode = new_code; /* The buffers need to be visible before we let NMIs write them */ - smp_wmb(); - - mod_code_write = 1; - - /* Make sure write bit is visible before we wait on NMIs */ smp_mb(); - wait_for_nmi(); + wait_for_nmi_and_set_mod_flag(); /* Make sure all running NMIs have finished before we write the code */ smp_mb(); @@ -166,13 +202,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code) ftrace_mod_code(); /* Make sure the write happens before clearing the bit */ - smp_wmb(); - - mod_code_write = 0; - - /* make sure NMIs see the cleared bit */ smp_mb(); + clear_mod_flag(); wait_for_nmi(); return mod_code_status; @@ -368,25 +400,6 @@ int ftrace_disable_ftrace_graph_caller(void) return ftrace_mod_jmp(ip, old_offset, new_offset); } -#else /* CONFIG_DYNAMIC_FTRACE */ - -/* - * These functions are picked from those used on - * this page for dynamic ftrace. They have been - * simplified to ignore all traces in NMI context. - */ -static atomic_t in_nmi; - -void ftrace_nmi_enter(void) -{ - atomic_inc(&in_nmi); -} - -void ftrace_nmi_exit(void) -{ - atomic_dec(&in_nmi); -} - #endif /* !CONFIG_DYNAMIC_FTRACE */ /* @@ -396,14 +409,13 @@ void ftrace_nmi_exit(void) void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) { unsigned long old; - unsigned long long calltime; int faulted; struct ftrace_graph_ent trace; unsigned long return_hooker = (unsigned long) &return_to_handler; /* Nmi's are currently unsupported */ - if (unlikely(atomic_read(&in_nmi))) + if (unlikely(in_nmi())) return; if (unlikely(atomic_read(¤t->tracing_graph_pause))) @@ -439,17 +451,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) return; } - if (unlikely(!__kernel_text_address(old))) { - ftrace_graph_stop(); - *parent = old; - WARN_ON(1); - return; - } - - calltime = cpu_clock(raw_smp_processor_id()); - - if (ftrace_push_return_trace(old, calltime, - self_addr, &trace.depth) == -EBUSY) { + if (ftrace_push_return_trace(old, self_addr, &trace.depth) == -EBUSY) { *parent = old; return; } @@ -463,3 +465,66 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) } } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + +#ifdef CONFIG_FTRACE_SYSCALLS + +extern unsigned long __start_syscalls_metadata[]; +extern unsigned long __stop_syscalls_metadata[]; +extern unsigned long *sys_call_table; + +static struct syscall_metadata **syscalls_metadata; + +static struct syscall_metadata *find_syscall_meta(unsigned long *syscall) +{ + struct syscall_metadata *start; + struct syscall_metadata *stop; + char str[KSYM_SYMBOL_LEN]; + + + start = (struct syscall_metadata *)__start_syscalls_metadata; + stop = (struct syscall_metadata *)__stop_syscalls_metadata; + kallsyms_lookup((unsigned long) syscall, NULL, NULL, NULL, str); + + for ( ; start < stop; start++) { + if (start->name && !strcmp(start->name, str)) + return start; + } + return NULL; +} + +struct syscall_metadata *syscall_nr_to_meta(int nr) +{ + if (!syscalls_metadata || nr >= FTRACE_SYSCALL_MAX || nr < 0) + return NULL; + + return syscalls_metadata[nr]; +} + +void arch_init_ftrace_syscalls(void) +{ + int i; + struct syscall_metadata *meta; + unsigned long **psys_syscall_table = &sys_call_table; + static atomic_t refs; + + if (atomic_inc_return(&refs) != 1) + goto end; + + syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * + FTRACE_SYSCALL_MAX, GFP_KERNEL); + if (!syscalls_metadata) { + WARN_ON(1); + return; + } + + for (i = 0; i < FTRACE_SYSCALL_MAX; i++) { + meta = find_syscall_meta(psys_syscall_table[i]); + syscalls_metadata[i] = meta; + } + return; + + /* Paranoid: avoid overflow */ +end: + atomic_dec(&refs); +} +#endif diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 55b94614e348..7b5169d2b000 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -638,13 +638,13 @@ static void __used __kprobes kretprobe_trampoline_holder(void) #else " pushf\n" /* - * Skip cs, ip, orig_ax. + * Skip cs, ip, orig_ax and gs. * trampoline_handler() will plug in these values */ - " subl $12, %esp\n" + " subl $16, %esp\n" " pushl %fs\n" - " pushl %ds\n" " pushl %es\n" + " pushl %ds\n" " pushl %eax\n" " pushl %ebp\n" " pushl %edi\n" @@ -655,10 +655,10 @@ static void __used __kprobes kretprobe_trampoline_holder(void) " movl %esp, %eax\n" " call trampoline_handler\n" /* Move flags to cs */ - " movl 52(%esp), %edx\n" - " movl %edx, 48(%esp)\n" + " movl 56(%esp), %edx\n" + " movl %edx, 52(%esp)\n" /* Replace saved flags with true return address. */ - " movl %eax, 52(%esp)\n" + " movl %eax, 56(%esp)\n" " popl %ebx\n" " popl %ecx\n" " popl %edx\n" @@ -666,8 +666,8 @@ static void __used __kprobes kretprobe_trampoline_holder(void) " popl %edi\n" " popl %ebp\n" " popl %eax\n" - /* Skip ip, orig_ax, es, ds, fs */ - " addl $20, %esp\n" + /* Skip ds, es, fs, gs, orig_ax and ip */ + " addl $24, %esp\n" " popf\n" #endif " ret\n"); @@ -691,6 +691,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) regs->cs = __KERNEL_CS; #else regs->cs = __KERNEL_CS | get_kernel_rpl(); + regs->gs = 0; #endif regs->ip = trampoline_address; regs->orig_ax = ~0UL; diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index c25fdb382292..453b5795a5c6 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -12,31 +12,30 @@ * * Licensed under the terms of the GNU General Public * License version 2. See file COPYING for details. -*/ - + */ +#include <linux/platform_device.h> #include <linux/capability.h> -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/sched.h> -#include <linux/cpumask.h> -#include <linux/module.h> -#include <linux/slab.h> -#include <linux/vmalloc.h> #include <linux/miscdevice.h> +#include <linux/firmware.h> #include <linux/spinlock.h> -#include <linux/mm.h> -#include <linux/fs.h> +#include <linux/cpumask.h> +#include <linux/pci_ids.h> +#include <linux/uaccess.h> +#include <linux/vmalloc.h> +#include <linux/kernel.h> +#include <linux/module.h> #include <linux/mutex.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/slab.h> #include <linux/cpu.h> -#include <linux/firmware.h> -#include <linux/platform_device.h> #include <linux/pci.h> -#include <linux/pci_ids.h> -#include <linux/uaccess.h> +#include <linux/fs.h> +#include <linux/mm.h> -#include <asm/msr.h> -#include <asm/processor.h> #include <asm/microcode.h> +#include <asm/processor.h> +#include <asm/msr.h> MODULE_DESCRIPTION("AMD Microcode Update Driver"); MODULE_AUTHOR("Peter Oruba"); @@ -72,8 +71,8 @@ struct microcode_header_amd { } __attribute__((packed)); struct microcode_amd { - struct microcode_header_amd hdr; - unsigned int mpb[0]; + struct microcode_header_amd hdr; + unsigned int mpb[0]; }; #define UCODE_MAX_SIZE 2048 @@ -184,8 +183,8 @@ static int get_ucode_data(void *to, const u8 *from, size_t n) return 0; } -static void *get_next_ucode(const u8 *buf, unsigned int size, - unsigned int *mc_size) +static void * +get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) { unsigned int total_size; u8 section_hdr[UCODE_CONTAINER_SECTION_HDR]; @@ -223,7 +222,6 @@ static void *get_next_ucode(const u8 *buf, unsigned int size, return mc; } - static int install_equiv_cpu_table(const u8 *buf) { u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE]; @@ -372,4 +370,3 @@ struct microcode_ops * __init init_amd_microcode(void) { return µcode_amd_ops; } - diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index c9b721ba968c..a0f3851ef310 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -70,67 +70,78 @@ * Fix sigmatch() macro to handle old CPUs with pf == 0. * Thanks to Stuart Swales for pointing out this bug. */ +#include <linux/platform_device.h> #include <linux/capability.h> -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/sched.h> +#include <linux/miscdevice.h> +#include <linux/firmware.h> #include <linux/smp_lock.h> +#include <linux/spinlock.h> #include <linux/cpumask.h> -#include <linux/module.h> -#include <linux/slab.h> +#include <linux/uaccess.h> #include <linux/vmalloc.h> -#include <linux/miscdevice.h> -#include <linux/spinlock.h> -#include <linux/mm.h> -#include <linux/fs.h> +#include <linux/kernel.h> +#include <linux/module.h> #include <linux/mutex.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/slab.h> #include <linux/cpu.h> -#include <linux/firmware.h> -#include <linux/platform_device.h> +#include <linux/fs.h> +#include <linux/mm.h> -#include <asm/msr.h> -#include <asm/uaccess.h> -#include <asm/processor.h> #include <asm/microcode.h> +#include <asm/processor.h> +#include <asm/msr.h> MODULE_DESCRIPTION("Microcode Update Driver"); MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); MODULE_LICENSE("GPL"); -#define MICROCODE_VERSION "2.00" +#define MICROCODE_VERSION "2.00" -static struct microcode_ops *microcode_ops; +static struct microcode_ops *microcode_ops; /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ static DEFINE_MUTEX(microcode_mutex); -struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; +struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; EXPORT_SYMBOL_GPL(ucode_cpu_info); #ifdef CONFIG_MICROCODE_OLD_INTERFACE +struct update_for_cpu { + const void __user *buf; + size_t size; +}; + +static long update_for_cpu(void *_ufc) +{ + struct update_for_cpu *ufc = _ufc; + int error; + + error = microcode_ops->request_microcode_user(smp_processor_id(), + ufc->buf, ufc->size); + if (error < 0) + return error; + if (!error) + microcode_ops->apply_microcode(smp_processor_id()); + return error; +} + static int do_microcode_update(const void __user *buf, size_t size) { - cpumask_t old; int error = 0; int cpu; - - old = current->cpus_allowed; + struct update_for_cpu ufc = { .buf = buf, .size = size }; for_each_online_cpu(cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; if (!uci->valid) continue; - - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); - error = microcode_ops->request_microcode_user(cpu, buf, size); + error = work_on_cpu(cpu, update_for_cpu, &ufc); if (error < 0) - goto out; - if (!error) - microcode_ops->apply_microcode(cpu); + break; } -out: - set_cpus_allowed_ptr(current, &old); return error; } @@ -198,18 +209,33 @@ static void microcode_dev_exit(void) MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); #else -#define microcode_dev_init() 0 -#define microcode_dev_exit() do { } while (0) +#define microcode_dev_init() 0 +#define microcode_dev_exit() do { } while (0) #endif /* fake device for request_firmware */ -static struct platform_device *microcode_pdev; +static struct platform_device *microcode_pdev; + +static long reload_for_cpu(void *unused) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + smp_processor_id(); + int err = 0; + + mutex_lock(µcode_mutex); + if (uci->valid) { + err = microcode_ops->request_microcode_fw(smp_processor_id(), + µcode_pdev->dev); + if (!err) + microcode_ops->apply_microcode(smp_processor_id()); + } + mutex_unlock(µcode_mutex); + return err; +} static ssize_t reload_store(struct sys_device *dev, struct sysdev_attribute *attr, const char *buf, size_t sz) { - struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; char *end; unsigned long val = simple_strtoul(buf, &end, 0); int err = 0; @@ -218,21 +244,9 @@ static ssize_t reload_store(struct sys_device *dev, if (end == buf) return -EINVAL; if (val == 1) { - cpumask_t old = current->cpus_allowed; - get_online_cpus(); - if (cpu_online(cpu)) { - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); - mutex_lock(µcode_mutex); - if (uci->valid) { - err = microcode_ops->request_microcode_fw(cpu, - µcode_pdev->dev); - if (!err) - microcode_ops->apply_microcode(cpu); - } - mutex_unlock(µcode_mutex); - set_cpus_allowed_ptr(current, &old); - } + if (cpu_online(cpu)) + err = work_on_cpu(cpu, reload_for_cpu, NULL); put_online_cpus(); } if (err) @@ -268,8 +282,8 @@ static struct attribute *mc_default_attrs[] = { }; static struct attribute_group mc_attr_group = { - .attrs = mc_default_attrs, - .name = "microcode", + .attrs = mc_default_attrs, + .name = "microcode", }; static void __microcode_fini_cpu(int cpu) @@ -328,9 +342,9 @@ static int microcode_resume_cpu(int cpu) return 0; } -static void microcode_update_cpu(int cpu) +static long microcode_update_cpu(void *unused) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + struct ucode_cpu_info *uci = ucode_cpu_info + smp_processor_id(); int err = 0; /* @@ -338,30 +352,27 @@ static void microcode_update_cpu(int cpu) * otherwise just request a firmware: */ if (uci->valid) { - err = microcode_resume_cpu(cpu); - } else { - collect_cpu_info(cpu); + err = microcode_resume_cpu(smp_processor_id()); + } else { + collect_cpu_info(smp_processor_id()); if (uci->valid && system_state == SYSTEM_RUNNING) - err = microcode_ops->request_microcode_fw(cpu, + err = microcode_ops->request_microcode_fw( + smp_processor_id(), µcode_pdev->dev); } if (!err) - microcode_ops->apply_microcode(cpu); + microcode_ops->apply_microcode(smp_processor_id()); + return err; } -static void microcode_init_cpu(int cpu) +static int microcode_init_cpu(int cpu) { - cpumask_t old = current->cpus_allowed; - - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); - /* We should bind the task to the CPU */ - BUG_ON(raw_smp_processor_id() != cpu); - + int err; mutex_lock(µcode_mutex); - microcode_update_cpu(cpu); + err = work_on_cpu(cpu, microcode_update_cpu, NULL); mutex_unlock(µcode_mutex); - set_cpus_allowed_ptr(current, &old); + return err; } static int mc_sysdev_add(struct sys_device *sys_dev) @@ -379,8 +390,11 @@ static int mc_sysdev_add(struct sys_device *sys_dev) if (err) return err; - microcode_init_cpu(cpu); - return 0; + err = microcode_init_cpu(cpu); + if (err) + sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); + + return err; } static int mc_sysdev_remove(struct sys_device *sys_dev) @@ -404,14 +418,14 @@ static int mc_sysdev_resume(struct sys_device *dev) return 0; /* only CPU 0 will apply ucode here */ - microcode_update_cpu(0); + microcode_update_cpu(NULL); return 0; } static struct sysdev_driver mc_sysdev_driver = { - .add = mc_sysdev_add, - .remove = mc_sysdev_remove, - .resume = mc_sysdev_resume, + .add = mc_sysdev_add, + .remove = mc_sysdev_remove, + .resume = mc_sysdev_resume, }; static __cpuinit int @@ -424,7 +438,9 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) switch (action) { case CPU_ONLINE: case CPU_ONLINE_FROZEN: - microcode_init_cpu(cpu); + if (microcode_init_cpu(cpu)) + printk(KERN_ERR "microcode: failed to init CPU%d\n", + cpu); case CPU_DOWN_FAILED: case CPU_DOWN_FAILED_FROZEN: pr_debug("microcode: CPU%d added\n", cpu); @@ -448,7 +464,7 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) } static struct notifier_block __refdata mc_cpu_notifier = { - .notifier_call = mc_cpu_callback, + .notifier_call = mc_cpu_callback, }; static int __init microcode_init(void) diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 5e9f4fc51385..149b9ec7c1ab 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -70,28 +70,28 @@ * Fix sigmatch() macro to handle old CPUs with pf == 0. * Thanks to Stuart Swales for pointing out this bug. */ +#include <linux/platform_device.h> #include <linux/capability.h> -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/sched.h> +#include <linux/miscdevice.h> +#include <linux/firmware.h> #include <linux/smp_lock.h> +#include <linux/spinlock.h> #include <linux/cpumask.h> -#include <linux/module.h> -#include <linux/slab.h> +#include <linux/uaccess.h> #include <linux/vmalloc.h> -#include <linux/miscdevice.h> -#include <linux/spinlock.h> -#include <linux/mm.h> -#include <linux/fs.h> +#include <linux/kernel.h> +#include <linux/module.h> #include <linux/mutex.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/slab.h> #include <linux/cpu.h> -#include <linux/firmware.h> -#include <linux/platform_device.h> -#include <linux/uaccess.h> +#include <linux/fs.h> +#include <linux/mm.h> -#include <asm/msr.h> -#include <asm/processor.h> #include <asm/microcode.h> +#include <asm/processor.h> +#include <asm/msr.h> MODULE_DESCRIPTION("Microcode Update Driver"); MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); @@ -129,12 +129,13 @@ struct extended_sigtable { struct extended_signature sigs[0]; }; -#define DEFAULT_UCODE_DATASIZE (2000) +#define DEFAULT_UCODE_DATASIZE (2000) #define MC_HEADER_SIZE (sizeof(struct microcode_header_intel)) #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) #define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) #define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) #define DWSIZE (sizeof(u32)) + #define get_totalsize(mc) \ (((struct microcode_intel *)mc)->hdr.totalsize ? \ ((struct microcode_intel *)mc)->hdr.totalsize : \ @@ -197,30 +198,31 @@ static inline int update_match_cpu(struct cpu_signature *csig, int sig, int pf) } static inline int -update_match_revision(struct microcode_header_intel *mc_header, int rev) +update_match_revision(struct microcode_header_intel *mc_header, int rev) { return (mc_header->rev <= rev) ? 0 : 1; } static int microcode_sanity_check(void *mc) { + unsigned long total_size, data_size, ext_table_size; struct microcode_header_intel *mc_header = mc; struct extended_sigtable *ext_header = NULL; - struct extended_signature *ext_sig; - unsigned long total_size, data_size, ext_table_size; int sum, orig_sum, ext_sigcount = 0, i; + struct extended_signature *ext_sig; total_size = get_totalsize(mc_header); data_size = get_datasize(mc_header); + if (data_size + MC_HEADER_SIZE > total_size) { printk(KERN_ERR "microcode: error! " - "Bad data size in microcode data file\n"); + "Bad data size in microcode data file\n"); return -EINVAL; } if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { printk(KERN_ERR "microcode: error! " - "Unknown microcode update format\n"); + "Unknown microcode update format\n"); return -EINVAL; } ext_table_size = total_size - (MC_HEADER_SIZE + data_size); @@ -318,11 +320,15 @@ get_matching_microcode(struct cpu_signature *cpu_sig, void *mc, int rev) static void apply_microcode(int cpu) { + struct microcode_intel *mc_intel; + struct ucode_cpu_info *uci; unsigned long flags; unsigned int val[2]; - int cpu_num = raw_smp_processor_id(); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - struct microcode_intel *mc_intel = uci->mc; + int cpu_num; + + cpu_num = raw_smp_processor_id(); + uci = ucode_cpu_info + cpu; + mc_intel = uci->mc; /* We should bind the task to the CPU */ BUG_ON(cpu_num != cpu); @@ -348,15 +354,17 @@ static void apply_microcode(int cpu) spin_unlock_irqrestore(µcode_update_lock, flags); if (val[1] != mc_intel->hdr.rev) { printk(KERN_ERR "microcode: CPU%d update from revision " - "0x%x to 0x%x failed\n", cpu_num, uci->cpu_sig.rev, val[1]); + "0x%x to 0x%x failed\n", + cpu_num, uci->cpu_sig.rev, val[1]); return; } printk(KERN_INFO "microcode: CPU%d updated from revision " - "0x%x to 0x%x, date = %04x-%02x-%02x \n", + "0x%x to 0x%x, date = %04x-%02x-%02x \n", cpu_num, uci->cpu_sig.rev, val[1], mc_intel->hdr.date & 0xffff, mc_intel->hdr.date >> 24, (mc_intel->hdr.date >> 16) & 0xff); + uci->cpu_sig.rev = val[1]; } @@ -404,18 +412,23 @@ static int generic_load_microcode(int cpu, void *data, size_t size, leftover -= mc_size; } - if (new_mc) { - if (!leftover) { - if (uci->mc) - vfree(uci->mc); - uci->mc = (struct microcode_intel *)new_mc; - pr_debug("microcode: CPU%d found a matching microcode update with" - " version 0x%x (current=0x%x)\n", - cpu, new_rev, uci->cpu_sig.rev); - } else - vfree(new_mc); + if (!new_mc) + goto out; + + if (leftover) { + vfree(new_mc); + goto out; } + if (uci->mc) + vfree(uci->mc); + uci->mc = (struct microcode_intel *)new_mc; + + pr_debug("microcode: CPU%d found a matching microcode update with" + " version 0x%x (current=0x%x)\n", + cpu, new_rev, uci->cpu_sig.rev); + + out: return (int)leftover; } diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 90f5b9ef5def..745579bc8256 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -40,7 +40,7 @@ EXPORT_SYMBOL(bad_dma_address); to older i386. */ struct device x86_dma_fallback_dev = { .init_name = "fallback device", - .coherent_dma_mask = DMA_32BIT_MASK, + .coherent_dma_mask = DMA_BIT_MASK(32), .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask, }; EXPORT_SYMBOL(x86_dma_fallback_dev); @@ -148,7 +148,7 @@ again: if (!is_buffer_dma_capable(dma_mask, addr, size)) { __free_pages(page, get_order(size)); - if (dma_mask < DMA_32BIT_MASK && !(flag & GFP_DMA)) { + if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) { flag = (flag & ~GFP_DMA32) | GFP_DMA; goto again; } @@ -243,7 +243,7 @@ int dma_supported(struct device *dev, u64 mask) /* Copied from i386. Doesn't make much sense, because it will only work for pci_alloc_coherent. The caller just has to use GFP_DMA in this case. */ - if (mask < DMA_24BIT_MASK) + if (mask < DMA_BIT_MASK(24)) return 0; /* Tell the device to use SAC when IOMMU force is on. This @@ -258,7 +258,7 @@ int dma_supported(struct device *dev, u64 mask) SAC for these. Assume all masks <= 40 bits are of this type. Normally this doesn't make any difference, but gives more gentle handling of IOMMU overflow. */ - if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) { + if (iommu_sac_force && (mask >= DMA_BIT_MASK(40))) { dev_info(dev, "Force SAC with mask %Lx\n", mask); return 0; } diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index c6d703b39326..71d412a09f30 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -15,7 +15,7 @@ static int check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) { if (hwdev && !is_buffer_dma_capable(*hwdev->dma_mask, bus, size)) { - if (*hwdev->dma_mask >= DMA_32BIT_MASK) + if (*hwdev->dma_mask >= DMA_BIT_MASK(32)) printk(KERN_ERR "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", name, (long long)bus, size, diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 156f87582c6c..ca989158e847 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -8,7 +8,7 @@ #include <linux/module.h> #include <linux/pm.h> #include <linux/clockchips.h> -#include <linux/ftrace.h> +#include <trace/power.h> #include <asm/system.h> #include <asm/apic.h> #include <asm/idle.h> @@ -22,6 +22,9 @@ EXPORT_SYMBOL(idle_nomwait); struct kmem_cache *task_xstate_cachep; +DEFINE_TRACE(power_start); +DEFINE_TRACE(power_end); + int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { *dst = *src; @@ -325,7 +328,7 @@ void stop_this_cpu(void *dummy) /* * Remove this CPU: */ - cpu_clear(smp_processor_id(), cpu_online_map); + set_cpu_online(smp_processor_id(), false); disable_local_APIC(); for (;;) { @@ -475,12 +478,13 @@ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) return 1; } -static cpumask_t c1e_mask = CPU_MASK_NONE; +static cpumask_var_t c1e_mask; static int c1e_detected; void c1e_remove_cpu(int cpu) { - cpu_clear(cpu, c1e_mask); + if (c1e_mask != NULL) + cpumask_clear_cpu(cpu, c1e_mask); } /* @@ -509,8 +513,8 @@ static void c1e_idle(void) if (c1e_detected) { int cpu = smp_processor_id(); - if (!cpu_isset(cpu, c1e_mask)) { - cpu_set(cpu, c1e_mask); + if (!cpumask_test_cpu(cpu, c1e_mask)) { + cpumask_set_cpu(cpu, c1e_mask); /* * Force broadcast so ACPI can not interfere. Needs * to run with interrupts enabled as it uses @@ -562,6 +566,15 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) pm_idle = default_idle; } +void __init init_c1e_mask(void) +{ + /* If we're using c1e_idle, we need to allocate c1e_mask. */ + if (pm_idle == c1e_idle) { + alloc_cpumask_var(&c1e_mask, GFP_KERNEL); + cpumask_clear(c1e_mask); + } +} + static int __init idle_setup(char *str) { if (!str) diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 14014d766cad..76f8f84043a2 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -245,7 +245,7 @@ void prepare_to_copy(struct task_struct *tsk) unlazy_fpu(tsk); } -int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, +int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index abb7e6a7f0c6..b751a41392b1 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -278,7 +278,7 @@ void prepare_to_copy(struct task_struct *tsk) unlazy_fpu(tsk); } -int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, +int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 19378715f415..fe9345c967de 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -21,6 +21,7 @@ #include <linux/audit.h> #include <linux/seccomp.h> #include <linux/signal.h> +#include <linux/ftrace.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -1415,6 +1416,9 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs) tracehook_report_syscall_entry(regs)) ret = -1L; + if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE))) + ftrace_syscall_enter(regs); + if (unlikely(current->audit_context)) { if (IS_IA32) audit_syscall_entry(AUDIT_ARCH_I386, @@ -1438,6 +1442,9 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs) if (unlikely(current->audit_context)) audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); + if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE))) + ftrace_syscall_exit(regs); + if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall_exit(regs, 0); @@ -1455,6 +1462,6 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs) * system call instruction. */ if (test_thread_flag(TIF_SINGLESTEP) && - tracehook_consider_fatal_signal(current, SIGTRAP, SIG_DFL)) + tracehook_consider_fatal_signal(current, SIGTRAP)) send_sigtrap(current, regs, 0, TRAP_BRKPT); } diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 400331b50a53..3a97a4cf1872 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -153,7 +153,6 @@ static struct page * __init pcpur_get_page(unsigned int cpu, int pageno) static ssize_t __init setup_pcpu_remap(size_t static_size) { static struct vm_struct vm; - pg_data_t *last; size_t ptrs_size, dyn_size; unsigned int cpu; ssize_t ret; @@ -162,22 +161,9 @@ static ssize_t __init setup_pcpu_remap(size_t static_size) * If large page isn't supported, there's no benefit in doing * this. Also, on non-NUMA, embedding is better. */ - if (!cpu_has_pse || pcpu_need_numa()) + if (!cpu_has_pse || !pcpu_need_numa()) return -EINVAL; - last = NULL; - for_each_possible_cpu(cpu) { - int node = early_cpu_to_node(cpu); - - if (node_online(node) && NODE_DATA(node) && - last && last != NODE_DATA(node)) - goto proceed; - - last = NODE_DATA(node); - } - return -EINVAL; - -proceed: /* * Currently supports only single page. Supporting multiple * pages won't be too difficult if it ever becomes necessary. diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index dfcc74ab0ab6..14425166b8e3 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -221,7 +221,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, if (!onsigstack) { /* This is the X/Open sanctioned signal stack switching. */ if (ka->sa.sa_flags & SA_ONSTACK) { - if (sas_ss_flags(sp) == 0) + if (current->sas_ss_size) sp = current->sas_ss_sp + current->sas_ss_size; } else { #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index ef7d10170c30..58d24ef917d8 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -101,11 +101,11 @@ EXPORT_SYMBOL(smp_num_siblings); DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; /* representing HT siblings of each logical CPU */ -DEFINE_PER_CPU(cpumask_t, cpu_sibling_map); +DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); /* representing HT and core siblings of each logical CPU */ -DEFINE_PER_CPU(cpumask_t, cpu_core_map); +DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); EXPORT_PER_CPU_SYMBOL(cpu_core_map); /* Per CPU bogomips and other parameters */ @@ -115,11 +115,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_info); atomic_t init_deasserted; #if defined(CONFIG_NUMA) && defined(CONFIG_X86_32) - -/* which logical CPUs are on which nodes */ -cpumask_t node_to_cpumask_map[MAX_NUMNODES] __read_mostly = - { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE }; -EXPORT_SYMBOL(node_to_cpumask_map); /* which node each logical CPU is on */ int cpu_to_node_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 }; EXPORT_SYMBOL(cpu_to_node_map); @@ -128,7 +123,7 @@ EXPORT_SYMBOL(cpu_to_node_map); static void map_cpu_to_node(int cpu, int node) { printk(KERN_INFO "Mapping cpu %d to node %d\n", cpu, node); - cpumask_set_cpu(cpu, &node_to_cpumask_map[node]); + cpumask_set_cpu(cpu, node_to_cpumask_map[node]); cpu_to_node_map[cpu] = node; } @@ -139,7 +134,7 @@ static void unmap_cpu_to_node(int cpu) printk(KERN_INFO "Unmapping cpu %d from all nodes\n", cpu); for (node = 0; node < MAX_NUMNODES; node++) - cpumask_clear_cpu(cpu, &node_to_cpumask_map[node]); + cpumask_clear_cpu(cpu, node_to_cpumask_map[node]); cpu_to_node_map[cpu] = 0; } #else /* !(CONFIG_NUMA && CONFIG_X86_32) */ @@ -301,7 +296,7 @@ notrace static void __cpuinit start_secondary(void *unused) __flush_tlb_all(); #endif - /* This must be done before setting cpu_online_map */ + /* This must be done before setting cpu_online_mask */ set_cpu_sibling_map(raw_smp_processor_id()); wmb(); @@ -334,6 +329,23 @@ notrace static void __cpuinit start_secondary(void *unused) cpu_idle(); } +#ifdef CONFIG_CPUMASK_OFFSTACK +/* In this case, llc_shared_map is a pointer to a cpumask. */ +static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst, + const struct cpuinfo_x86 *src) +{ + struct cpumask *llc = dst->llc_shared_map; + *dst = *src; + dst->llc_shared_map = llc; +} +#else +static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst, + const struct cpuinfo_x86 *src) +{ + *dst = *src; +} +#endif /* CONFIG_CPUMASK_OFFSTACK */ + /* * The bootstrap kernel entry code has set these up. Save them for * a given CPU @@ -343,7 +355,7 @@ void __cpuinit smp_store_cpu_info(int id) { struct cpuinfo_x86 *c = &cpu_data(id); - *c = boot_cpu_data; + copy_cpuinfo_x86(c, &boot_cpu_data); c->cpu_index = id; if (id != 0) identify_secondary_cpu(c); @@ -367,15 +379,15 @@ void __cpuinit set_cpu_sibling_map(int cpu) cpumask_set_cpu(cpu, cpu_sibling_mask(i)); cpumask_set_cpu(i, cpu_core_mask(cpu)); cpumask_set_cpu(cpu, cpu_core_mask(i)); - cpumask_set_cpu(i, &c->llc_shared_map); - cpumask_set_cpu(cpu, &o->llc_shared_map); + cpumask_set_cpu(i, c->llc_shared_map); + cpumask_set_cpu(cpu, o->llc_shared_map); } } } else { cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); } - cpumask_set_cpu(cpu, &c->llc_shared_map); + cpumask_set_cpu(cpu, c->llc_shared_map); if (current_cpu_data.x86_max_cores == 1) { cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu)); @@ -386,8 +398,8 @@ void __cpuinit set_cpu_sibling_map(int cpu) for_each_cpu(i, cpu_sibling_setup_mask) { if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { - cpumask_set_cpu(i, &c->llc_shared_map); - cpumask_set_cpu(cpu, &cpu_data(i).llc_shared_map); + cpumask_set_cpu(i, c->llc_shared_map); + cpumask_set_cpu(cpu, cpu_data(i).llc_shared_map); } if (c->phys_proc_id == cpu_data(i).phys_proc_id) { cpumask_set_cpu(i, cpu_core_mask(cpu)); @@ -425,12 +437,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu) if (sched_mc_power_savings || sched_smt_power_savings) return cpu_core_mask(cpu); else - return &c->llc_shared_map; -} - -cpumask_t cpu_coregroup_map(int cpu) -{ - return *cpu_coregroup_mask(cpu); + return c->llc_shared_map; } static void impress_friends(void) @@ -897,9 +904,8 @@ int __cpuinit native_cpu_up(unsigned int cpu) */ static __init void disable_smp(void) { - /* use the read/write pointers to the present and possible maps */ - cpumask_copy(&cpu_present_map, cpumask_of(0)); - cpumask_copy(&cpu_possible_map, cpumask_of(0)); + init_cpu_present(cpumask_of(0)); + init_cpu_possible(cpumask_of(0)); smpboot_clear_io_apic_irqs(); if (smp_found_config) @@ -1031,6 +1037,8 @@ static void __init smp_cpu_index_default(void) */ void __init native_smp_prepare_cpus(unsigned int max_cpus) { + unsigned int i; + preempt_disable(); smp_cpu_index_default(); current_cpu_data = boot_cpu_data; @@ -1044,6 +1052,14 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) boot_cpu_logical_apicid = logical_smp_processor_id(); #endif current_thread_info()->cpu = 0; /* needed? */ + for_each_possible_cpu(i) { + alloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); + alloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); + alloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL); + cpumask_clear(per_cpu(cpu_core_map, i)); + cpumask_clear(per_cpu(cpu_sibling_map, i)); + cpumask_clear(cpu_data(i).llc_shared_map); + } set_cpu_sibling_map(0); enable_IR_x2apic(); @@ -1132,11 +1148,11 @@ early_param("possible_cpus", _setup_possible_cpus); /* - * cpu_possible_map should be static, it cannot change as cpu's + * cpu_possible_mask should be static, it cannot change as cpu's * are onlined, or offlined. The reason is per-cpu data-structures * are allocated by some modules at init time, and dont expect to * do this dynamically on cpu arrival/departure. - * cpu_present_map on the other hand can change dynamically. + * cpu_present_mask on the other hand can change dynamically. * In case when cpu_hotplug is not compiled, then we resort to current * behaviour, which is cpu_possible == cpu_present. * - Ashok Raj diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index 3bdb64829b82..ff5c8736b491 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -332,3 +332,5 @@ ENTRY(sys_call_table) .long sys_dup3 /* 330 */ .long sys_pipe2 .long sys_inotify_init1 + .long sys_preadv + .long sys_pwritev diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 79c073247284..deb5ebb32c3b 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -275,6 +275,8 @@ const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade, return NULL; } +static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); + /** * uv_flush_tlb_others - globally purge translation cache of a virtual * address or all TLB's @@ -304,8 +306,7 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long va, unsigned int cpu) { - static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask); - struct cpumask *flush_mask = &__get_cpu_var(flush_tlb_mask); + struct cpumask *flush_mask = __get_cpu_var(uv_flush_tlb_mask); int i; int bit; int blade; @@ -755,6 +756,10 @@ static int __init uv_bau_init(void) if (!is_uv_system()) return 0; + for_each_possible_cpu(cur_cpu) + alloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu), + GFP_KERNEL, cpu_to_node(cur_cpu)); + uv_bau_retry_limit = 1; uv_nshift = uv_hub_info->n_val; uv_mmask = (1UL << uv_hub_info->n_val) - 1; |