From b615ebdac97c648a2ae7d23c5a0bbb3972adf928 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:00 +0100 Subject: [PATCH] x86: shorten lines in unwinder to be <= 80 characters Andrew complained about > 80 character lines in the new unwinder. Fix that. Signed-off-by: Andi Kleen --- arch/x86_64/kernel/traps.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 0d65b22f229c..d3f43c958ca1 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -235,6 +235,8 @@ static int dump_trace_unwind(struct unwind_frame_info *info, void *context) return n; } +#define MSG(txt) ops->warning(data, txt) + /* * x86-64 can have upto three kernel stacks: * process stack @@ -248,11 +250,12 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) return p > t && p < t + THREAD_SIZE - 3; } -void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack, +void dump_trace(struct task_struct *tsk, struct pt_regs *regs, + unsigned long *stack, struct stacktrace_ops *ops, void *data) { const unsigned cpu = smp_processor_id(); - unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; + unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr; unsigned used = 0; struct thread_info *tinfo; @@ -268,28 +271,30 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s if (unwind_init_frame_info(&info, tsk, regs) == 0) unw_ret = dump_trace_unwind(&info, &oad); } else if (tsk == current) - unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad); + unw_ret = unwind_init_running(&info, dump_trace_unwind, + &oad); else { if (unwind_init_blocked(&info, tsk) == 0) unw_ret = dump_trace_unwind(&info, &oad); } if (unw_ret > 0) { if (call_trace == 1 && !arch_unw_user_mode(&info)) { - ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n", + ops->warning_symbol(data, + "DWARF2 unwinder stuck at %s\n", UNW_PC(&info)); if ((long)UNW_SP(&info) < 0) { - ops->warning(data, "Leftover inexact backtrace:\n"); + MSG("Leftover inexact backtrace:"); stack = (unsigned long *)UNW_SP(&info); if (!stack) return; } else - ops->warning(data, "Full inexact backtrace again:\n"); + MSG("Full inexact backtrace again:\n"); } else if (call_trace >= 1) return; else - ops->warning(data, "Full inexact backtrace again:\n"); + MSG("Full inexact backtrace again:\n"); } else - ops->warning(data, "Inexact backtrace:\n"); + MSG("Inexact backtrace:\n"); } if (!stack) { unsigned long dummy; -- cgit v1.2.3 From dd315df1767cf56bd4fb8d730fdff4a3d7e15d84 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:00 +0100 Subject: [PATCH] x86: Compress stack unwinder output The unwinder has some extra newlines, which eat up loads of screen space when it spews. (See https://bugzilla.redhat.com/bugzilla/attachment.cgi?id=137900 for a nasty example). warning_symbol-> and warning-> already printk a newline, so don't add one in the strings passed to them. [AK: redone for new code] Signed-off-by: Dave Jones Signed-off-by: Andi Kleen --- arch/i386/kernel/traps.c | 10 +++++----- arch/x86_64/kernel/traps.c | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index fe81d89c50d3..396041a46e3d 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -202,22 +202,22 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (unw_ret > 0) { if (call_trace == 1 && !arch_unw_user_mode(&info)) { ops->warning_symbol(data, - "DWARF2 unwinder stuck at %s\n", + "DWARF2 unwinder stuck at %s", UNW_PC(&info)); if (UNW_SP(&info) >= PAGE_OFFSET) { - MSG("Leftover inexact backtrace:\n"); + MSG("Leftover inexact backtrace:"); stack = (void *)UNW_SP(&info); if (!stack) return; ebp = UNW_FP(&info); } else - MSG("Full inexact backtrace again:\n"); + MSG("Full inexact backtrace again:"); } else if (call_trace >= 1) return; else - MSG("Full inexact backtrace again:\n"); + MSG("Full inexact backtrace again:"); } else - MSG("Inexact backtrace:\n"); + MSG("Inexact backtrace:"); } if (!stack) { unsigned long dummy; diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index d3f43c958ca1..eedd4e759c3a 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -280,7 +280,7 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, if (unw_ret > 0) { if (call_trace == 1 && !arch_unw_user_mode(&info)) { ops->warning_symbol(data, - "DWARF2 unwinder stuck at %s\n", + "DWARF2 unwinder stuck at %s", UNW_PC(&info)); if ((long)UNW_SP(&info) < 0) { MSG("Leftover inexact backtrace:"); @@ -288,13 +288,13 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, if (!stack) return; } else - MSG("Full inexact backtrace again:\n"); + MSG("Full inexact backtrace again:"); } else if (call_trace >= 1) return; else - MSG("Full inexact backtrace again:\n"); + MSG("Full inexact backtrace again:"); } else - MSG("Inexact backtrace:\n"); + MSG("Inexact backtrace:"); } if (!stack) { unsigned long dummy; -- cgit v1.2.3 From 36b2a8d5aff4cb3ee83d5e40447a8f073bcfe2fb Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 7 Dec 2006 02:14:01 +0100 Subject: [PATCH] x86-64: add X86_FEATURE_PEBS and detection Here is a patch (used by perfmon2) to detect the presence of the Precise Event Based Sampling (PEBS) feature for Intel 64-bit processors. The patch also adds the cpu_has_pebs macro. changelog: - adds X86_FEATURE_PEBS - adds cpu_has_pebs to test for X86_FEATURE_PEBS Signed-off-by: stephane eranian Signed-off-by: Andi Kleen --- arch/x86_64/kernel/setup.c | 7 +++++++ include/asm-x86_64/cpufeature.h | 2 ++ 2 files changed, 9 insertions(+) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index fc944b5e8f4a..619af2e2fa26 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -835,6 +835,13 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability); } + if (cpu_has_ds) { + unsigned int l1, l2; + rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); + if (!(l1 & (1<<12))) + set_bit(X86_FEATURE_PEBS, c->x86_capability); + } + n = c->extended_cpuid_level; if (n >= 0x80000008) { unsigned eax = cpuid_eax(0x80000008); diff --git a/include/asm-x86_64/cpufeature.h b/include/asm-x86_64/cpufeature.h index 65eb39e8f3cc..d280384807ef 100644 --- a/include/asm-x86_64/cpufeature.h +++ b/include/asm-x86_64/cpufeature.h @@ -68,6 +68,7 @@ #define X86_FEATURE_FXSAVE_LEAK (3*32+7) /* FIP/FOP/FDP leaks through FXSAVE */ #define X86_FEATURE_UP (3*32+8) /* SMP kernel running on UP */ #define X86_FEATURE_ARCH_PERFMON (3*32+9) /* Intel Architectural PerfMon */ +#define X86_FEATURE_PEBS (3*32+10) /* Precise-Event Based Sampling */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ @@ -113,5 +114,6 @@ #define cpu_has_centaur_mcr 0 #define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLSH) #define cpu_has_ds boot_cpu_has(X86_FEATURE_DS) +#define cpu_has_pebs boot_cpu_has(X86_FEATURE_PEBS) #endif /* __ASM_X8664_CPUFEATURE_H */ -- cgit v1.2.3 From e2764a1e306c986053a52b33748c33463cf888de Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 7 Dec 2006 02:14:01 +0100 Subject: [PATCH] x86-64: use BUILD_BUG_ON in FPU code Signed-off-by: Alexey Dobriyan Signed-off-by: Andi Kleen Cc: Andi Kleen Signed-off-by: Andrew Morton --- arch/x86_64/kernel/i387.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/i387.c b/arch/x86_64/kernel/i387.c index 3aa1e9bb781d..1d58c13bc6bc 100644 --- a/arch/x86_64/kernel/i387.c +++ b/arch/x86_64/kernel/i387.c @@ -82,11 +82,8 @@ int save_i387(struct _fpstate __user *buf) struct task_struct *tsk = current; int err = 0; - { - extern void bad_user_i387_struct(void); - if (sizeof(struct user_i387_struct) != sizeof(tsk->thread.i387.fxsave)) - bad_user_i387_struct(); - } + BUILD_BUG_ON(sizeof(struct user_i387_struct) != + sizeof(tsk->thread.i387.fxsave)); if ((unsigned long)buf % 16) printk("save_i387: bad fpstate %p\n",buf); -- cgit v1.2.3 From bb81a09e55eaf7e5f798468ab971469b6f66a259 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 7 Dec 2006 02:14:01 +0100 Subject: [PATCH] x86: all cpu backtrace When a spinlock lockup occurs, arrange for the NMI code to emit an all-cpu backtrace, so we get to see which CPU is holding the lock, and where. Cc: Andi Kleen Cc: Ingo Molnar Cc: Badari Pulavarty Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- arch/i386/kernel/nmi.c | 26 ++++++++++++++++++++++++++ arch/x86_64/kernel/nmi.c | 29 ++++++++++++++++++++++++++++- include/asm-i386/nmi.h | 8 ++++++++ include/asm-x86_64/nmi.h | 3 +++ include/linux/nmi.h | 5 +++++ lib/spinlock_debug.c | 4 ++++ 6 files changed, 74 insertions(+), 1 deletion(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index eaafe233a5da..171194ccb7bc 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -42,6 +43,8 @@ int nmi_watchdog_enabled; static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner); static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]); +static cpumask_t backtrace_mask = CPU_MASK_NONE; + /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) */ @@ -907,6 +910,16 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) touched = 1; } + if (cpu_isset(cpu, backtrace_mask)) { + static DEFINE_SPINLOCK(lock); /* Serialise the printks */ + + spin_lock(&lock); + printk("NMI backtrace for cpu %d\n", cpu); + dump_stack(); + spin_unlock(&lock); + cpu_clear(cpu, backtrace_mask); + } + sum = per_cpu(irq_stat, cpu).apic_timer_irqs; /* if the apic timer isn't firing, this cpu isn't doing much */ @@ -1033,6 +1046,19 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, #endif +void __trigger_all_cpu_backtrace(void) +{ + int i; + + backtrace_mask = cpu_online_map; + /* Wait for up to 10 seconds for all CPUs to do the backtrace */ + for (i = 0; i < 10 * 1000; i++) { + if (cpus_empty(backtrace_mask)) + break; + mdelay(1); + } +} + EXPORT_SYMBOL(nmi_active); EXPORT_SYMBOL(nmi_watchdog); EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 7af9cb3e2d99..27e95e7922c1 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -12,14 +12,15 @@ * Mikael Pettersson : PM converted to driver model. Disable/enable API. */ +#include #include #include #include #include #include -#include #include #include +#include #include #include @@ -41,6 +42,8 @@ int panic_on_unrecovered_nmi; static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner); static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[2]); +static cpumask_t backtrace_mask = CPU_MASK_NONE; + /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) */ @@ -782,6 +785,7 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) { int sum; int touched = 0; + int cpu = smp_processor_id(); struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); u64 dummy; int rc=0; @@ -799,6 +803,16 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) touched = 1; } + if (cpu_isset(cpu, backtrace_mask)) { + static DEFINE_SPINLOCK(lock); /* Serialise the printks */ + + spin_lock(&lock); + printk("NMI backtrace for cpu %d\n", cpu); + dump_stack(); + spin_unlock(&lock); + cpu_clear(cpu, backtrace_mask); + } + #ifdef CONFIG_X86_MCE /* Could check oops_in_progress here too, but it's safer not too */ @@ -931,6 +945,19 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, #endif +void __trigger_all_cpu_backtrace(void) +{ + int i; + + backtrace_mask = cpu_online_map; + /* Wait for up to 10 seconds for all CPUs to do the backtrace */ + for (i = 0; i < 10 * 1000; i++) { + if (cpus_empty(backtrace_mask)) + break; + mdelay(1); + } +} + EXPORT_SYMBOL(nmi_active); EXPORT_SYMBOL(nmi_watchdog); EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); diff --git a/include/asm-i386/nmi.h b/include/asm-i386/nmi.h index 269d315719ca..b04333ea6f31 100644 --- a/include/asm-i386/nmi.h +++ b/include/asm-i386/nmi.h @@ -5,6 +5,9 @@ #define ASM_NMI_H #include +#include + +#ifdef ARCH_HAS_NMI_WATCHDOG /** * do_nmi_callback @@ -42,4 +45,9 @@ extern int proc_nmi_enabled(struct ctl_table *, int , struct file *, void __user *, size_t *, loff_t *); extern int unknown_nmi_panic; +void __trigger_all_cpu_backtrace(void); +#define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace() + +#endif + #endif /* ASM_NMI_H */ diff --git a/include/asm-x86_64/nmi.h b/include/asm-x86_64/nmi.h index f367d4014b42..72375e7d32a8 100644 --- a/include/asm-x86_64/nmi.h +++ b/include/asm-x86_64/nmi.h @@ -77,4 +77,7 @@ extern int proc_nmi_enabled(struct ctl_table *, int , struct file *, extern int unknown_nmi_panic; +void __trigger_all_cpu_backtrace(void); +#define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace() + #endif /* ASM_NMI_H */ diff --git a/include/linux/nmi.h b/include/linux/nmi.h index e16904e28c3a..acb4ed130247 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -15,9 +15,14 @@ * disables interrupts for a long time. This call is stateless. */ #ifdef ARCH_HAS_NMI_WATCHDOG +#include extern void touch_nmi_watchdog(void); #else # define touch_nmi_watchdog() touch_softlockup_watchdog() #endif +#ifndef trigger_all_cpu_backtrace +#define trigger_all_cpu_backtrace() do { } while (0) +#endif + #endif diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c index b6c4f898197c..479fd462eaa9 100644 --- a/lib/spinlock_debug.c +++ b/lib/spinlock_debug.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include @@ -117,6 +118,9 @@ static void __spin_lock_debug(spinlock_t *lock) raw_smp_processor_id(), current->comm, current->pid, lock); dump_stack(); +#ifdef CONFIG_SMP + trigger_all_cpu_backtrace(); +#endif } } } -- cgit v1.2.3 From 399287229c775a8962a852a761d65dc9475dec7c Mon Sep 17 00:00:00 2001 From: Aaron Durbin Date: Thu, 7 Dec 2006 02:14:01 +0100 Subject: [PATCH] x86-64: Insert Local and IO APIC(s) into resource map Insert the Local APIC and IO APIC(s) into the resource tree. It allows the APIC resources to be visible within /proc/iomem. The patch also takes into account IO APIC(s) mapped in the PCI space by deferring the insertion until after PCI has allocated its necessary resources. Signed-off-by: Aaron Durbin Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton --- arch/x86_64/kernel/apic.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index 4d9d5ed942b2..5c468971e645 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -45,6 +46,12 @@ int apic_calibrate_pmtmr __initdata; int disable_apic_timer __initdata; +static struct resource *ioapic_resources; +static struct resource lapic_resource = { + .name = "Local APIC", + .flags = IORESOURCE_MEM | IORESOURCE_BUSY, +}; + /* * cpu_mask that denotes the CPUs that needs timer interrupt coming in as * IPIs in place of local APIC timers @@ -585,6 +592,64 @@ static int __init detect_init_APIC (void) return 0; } +#ifdef CONFIG_X86_IO_APIC +static struct resource * __init ioapic_setup_resources(void) +{ +#define IOAPIC_RESOURCE_NAME_SIZE 11 + unsigned long n; + struct resource *res; + char *mem; + int i; + + if (nr_ioapics <= 0) + return NULL; + + n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource); + n *= nr_ioapics; + + mem = alloc_bootmem(n); + res = (void *)mem; + + if (mem != NULL) { + memset(mem, 0, n); + mem += sizeof(struct resource) * nr_ioapics; + + for (i = 0; i < nr_ioapics; i++) { + res[i].name = mem; + res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; + sprintf(mem, "IOAPIC %u", i); + mem += IOAPIC_RESOURCE_NAME_SIZE; + } + } + + ioapic_resources = res; + + return res; +} + +static int __init ioapic_insert_resources(void) +{ + int i; + struct resource *r = ioapic_resources; + + if (!r) { + printk("IO APIC resources could be not be allocated.\n"); + return -1; + } + + for (i = 0; i < nr_ioapics; i++) { + insert_resource(&iomem_resource, r); + r++; + } + + return 0; +} + +/* Insert the IO APIC resources after PCI initialization has occured to handle + * IO APICS that are mapped in on a BAR in PCI space. */ +late_initcall(ioapic_insert_resources); +#endif + void __init init_apic_mappings(void) { unsigned long apic_phys; @@ -604,6 +669,11 @@ void __init init_apic_mappings(void) apic_mapped = 1; apic_printk(APIC_VERBOSE,"mapped APIC to %16lx (%16lx)\n", APIC_BASE, apic_phys); + /* Put local APIC into the resource map. */ + lapic_resource.start = apic_phys; + lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1; + insert_resource(&iomem_resource, &lapic_resource); + /* * Fetch the APIC ID of the BSP in case we have a * default configuration (or the MP table is broken). @@ -613,7 +683,9 @@ void __init init_apic_mappings(void) { unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; int i; + struct resource *ioapic_res; + ioapic_res = ioapic_setup_resources(); for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { ioapic_phys = mp_ioapics[i].mpc_apicaddr; @@ -625,6 +697,12 @@ void __init init_apic_mappings(void) apic_printk(APIC_VERBOSE,"mapped IOAPIC to %016lx (%016lx)\n", __fix_to_virt(idx), ioapic_phys); idx++; + + if (ioapic_res != NULL) { + ioapic_res->start = ioapic_phys; + ioapic_res->end = ioapic_phys + (4 * 1024) - 1; + ioapic_res++; + } } } } -- cgit v1.2.3 From da68933e0a999fb13636653c710cca701b457ad2 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 7 Dec 2006 02:14:02 +0100 Subject: [PATCH] x86-64: dump_trace() atomicity fix Fix BUG: using smp_processor_id() in preemptible [00000001] code: in backtracer on preemptible debug kernels. Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- arch/x86_64/kernel/traps.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index eedd4e759c3a..e37b4d77d5a8 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -254,7 +254,7 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack, struct stacktrace_ops *ops, void *data) { - const unsigned cpu = smp_processor_id(); + const unsigned cpu = get_cpu(); unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr; unsigned used = 0; struct thread_info *tinfo; @@ -286,11 +286,11 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, MSG("Leftover inexact backtrace:"); stack = (unsigned long *)UNW_SP(&info); if (!stack) - return; + goto out; } else MSG("Full inexact backtrace again:"); } else if (call_trace >= 1) - return; + goto out; else MSG("Full inexact backtrace again:"); } else @@ -385,6 +385,8 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, tinfo = current_thread_info(); HANDLE_STACK (valid_stack_ptr(tinfo, stack)); #undef HANDLE_STACK +out: + put_cpu(); } EXPORT_SYMBOL(dump_trace); -- cgit v1.2.3 From bcddc0155f351ab3f06c6ede6d91fd399ef9e18f Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 7 Dec 2006 02:14:02 +0100 Subject: [PATCH] x86-64: miscellaneous entry.S adjustments This patch: - makes ret_from_sys_call no longer global (all external users were previously switched to use int_ret_from_sys_call) - adjusts placement of a CFI_{REMEMBER,RESTORE}_STATE pair to better fit logic flow - eliminates an unnecessary pair of CFI_{REMEMBER,RESTORE}_STATE - glues together function- and unwinder-wise the previously separate system_call and int_ret_from_sys_call function fragments Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen --- arch/x86_64/kernel/entry.S | 36 ++++++------------------------------ 1 file changed, 6 insertions(+), 30 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index 7d401b00d822..601d332c4b79 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S @@ -230,7 +230,6 @@ ENTRY(system_call) CFI_REL_OFFSET rip,RIP-ARGOFFSET GET_THREAD_INFO(%rcx) testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx) - CFI_REMEMBER_STATE jnz tracesys cmpq $__NR_syscall_max,%rax ja badsys @@ -241,7 +240,6 @@ ENTRY(system_call) * Syscall return path ending with SYSRET (fast path) * Has incomplete stack frame and undefined top of stack. */ - .globl ret_from_sys_call ret_from_sys_call: movl $_TIF_ALLWORK_MASK,%edi /* edi: flagmask */ @@ -251,8 +249,8 @@ sysret_check: TRACE_IRQS_OFF movl threadinfo_flags(%rcx),%edx andl %edi,%edx - CFI_REMEMBER_STATE jnz sysret_careful + CFI_REMEMBER_STATE /* * sysretq will re-enable interrupts: */ @@ -265,10 +263,10 @@ sysret_check: swapgs sysretq + CFI_RESTORE_STATE /* Handle reschedules */ /* edx: work, edi: workmask */ sysret_careful: - CFI_RESTORE_STATE bt $TIF_NEED_RESCHED,%edx jnc sysret_signal TRACE_IRQS_ON @@ -306,7 +304,6 @@ badsys: /* Do syscall tracing */ tracesys: - CFI_RESTORE_STATE SAVE_REST movq $-ENOSYS,RAX(%rsp) FIXUP_TOP_OF_STACK %rdi @@ -322,32 +319,13 @@ tracesys: call *sys_call_table(,%rax,8) 1: movq %rax,RAX-ARGOFFSET(%rsp) /* Use IRET because user could have changed frame */ - jmp int_ret_from_sys_call - CFI_ENDPROC -END(system_call) /* * Syscall return path ending with IRET. * Has correct top of stack, but partial stack frame. - */ -ENTRY(int_ret_from_sys_call) - CFI_STARTPROC simple - CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,SS+8-ARGOFFSET - /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/ - CFI_REL_OFFSET rsp,RSP-ARGOFFSET - /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ - /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/ - CFI_REL_OFFSET rip,RIP-ARGOFFSET - CFI_REL_OFFSET rdx,RDX-ARGOFFSET - CFI_REL_OFFSET rcx,RCX-ARGOFFSET - CFI_REL_OFFSET rax,RAX-ARGOFFSET - CFI_REL_OFFSET rdi,RDI-ARGOFFSET - CFI_REL_OFFSET rsi,RSI-ARGOFFSET - CFI_REL_OFFSET r8,R8-ARGOFFSET - CFI_REL_OFFSET r9,R9-ARGOFFSET - CFI_REL_OFFSET r10,R10-ARGOFFSET - CFI_REL_OFFSET r11,R11-ARGOFFSET + */ + .globl int_ret_from_sys_call +int_ret_from_sys_call: cli TRACE_IRQS_OFF testl $3,CS-ARGOFFSET(%rsp) @@ -394,8 +372,6 @@ int_very_careful: popq %rdi CFI_ADJUST_CFA_OFFSET -8 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi - cli - TRACE_IRQS_OFF jmp int_restore_rest int_signal: @@ -411,7 +387,7 @@ int_restore_rest: TRACE_IRQS_OFF jmp int_with_check CFI_ENDPROC -END(int_ret_from_sys_call) +END(system_call) /* * Certain special system calls that need to save a complete full stack frame. -- cgit v1.2.3 From 72690a21188586022a9e65cb6f1cc8845167555a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:03 +0100 Subject: [PATCH] x86: Don't use nested idle loops Currently the idle loop has two nested loops -- one high level in cpu_idle and in some low level idle functions another one. Looping in the low level idle functions breaks the idle notifiers because interrupts waking up sleep states need to execute exit_idle() which is only in cpu_idle(). So don't do that, only loop in cpu_idle(). This only removes code. In some cases e.g. poll_idle the idle loop is a little longer now because cpu_idle checks more things. I hope that isn't a problem ACPI idle doesn't change behaviour because it never looped anyways. Cc: len.brown@intel.com Cc: eranian@hpl.hp.com Signed-off-by: Andi Kleen --- arch/i386/kernel/process.c | 30 +++++++++--------------------- arch/x86_64/kernel/process.c | 30 +++++++++--------------------- 2 files changed, 18 insertions(+), 42 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 8749b10d3805..8f42659ef9d2 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -100,22 +100,18 @@ EXPORT_SYMBOL(enable_hlt); */ void default_idle(void) { - local_irq_enable(); - if (!hlt_counter && boot_cpu_data.hlt_works_ok) { current_thread_info()->status &= ~TS_POLLING; smp_mb__after_clear_bit(); - while (!need_resched()) { - local_irq_disable(); - if (!need_resched()) - safe_halt(); - else - local_irq_enable(); - } + local_irq_disable(); + if (!need_resched()) + safe_halt(); /* enables interrupts racelessly */ + else + local_irq_enable(); current_thread_info()->status |= TS_POLLING; } else { - while (!need_resched()) - cpu_relax(); + /* loop is done by the caller */ + cpu_relax(); } } #ifdef CONFIG_APM_MODULE @@ -129,14 +125,7 @@ EXPORT_SYMBOL(default_idle); */ static void poll_idle (void) { - local_irq_enable(); - - asm volatile( - "2:" - "testl %0, %1;" - "rep; nop;" - "je 2b;" - : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags)); + cpu_relax(); } #ifdef CONFIG_HOTPLUG_CPU @@ -257,8 +246,7 @@ void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) static void mwait_idle(void) { local_irq_enable(); - while (!need_resched()) - mwait_idle_with_hints(0, 0); + mwait_idle_with_hints(0, 0); } void __devinit select_idle_routine(const struct cpuinfo_x86 *c) diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 7451a4c43c16..0b7b4caa4f74 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -108,17 +108,15 @@ void exit_idle(void) */ static void default_idle(void) { - local_irq_enable(); - current_thread_info()->status &= ~TS_POLLING; smp_mb__after_clear_bit(); - while (!need_resched()) { - local_irq_disable(); - if (!need_resched()) - safe_halt(); - else - local_irq_enable(); - } + local_irq_disable(); + if (!need_resched()) { + /* Enables interrupts one instruction before HLT. + x86 special cases this so there is no race. */ + safe_halt(); + } else + local_irq_enable(); current_thread_info()->status |= TS_POLLING; } @@ -129,16 +127,7 @@ static void default_idle(void) */ static void poll_idle (void) { - local_irq_enable(); - - asm volatile( - "2:" - "testl %0,%1;" - "rep; nop;" - "je 2b;" - : : - "i" (_TIF_NEED_RESCHED), - "m" (current_thread_info()->flags)); + cpu_relax(); } void cpu_idle_wait(void) @@ -257,8 +246,7 @@ void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) static void mwait_idle(void) { local_irq_enable(); - while (!need_resched()) - mwait_idle_with_hints(0,0); + mwait_idle_with_hints(0,0); } void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) -- cgit v1.2.3 From 7cd8b6861eb586aabe4c725cc0c259ce2e653695 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 7 Dec 2006 02:14:03 +0100 Subject: [PATCH] x86: remove last two pci_find offenders in the core code Resending as I believe the discussion about them established they were correct. Signed-off-by: Alan Cox Signed-off-by: Andi Kleen Cc: Andi Kleen Signed-off-by: Andrew Morton --- arch/i386/pci/irq.c | 4 +++- arch/x86_64/kernel/pci-calgary.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c index e65551cd8216..f2cb942f8281 100644 --- a/arch/i386/pci/irq.c +++ b/arch/i386/pci/irq.c @@ -764,7 +764,7 @@ static void __init pirq_find_router(struct irq_router *r) DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for %04x:%04x\n", rt->rtr_vendor, rt->rtr_device); - pirq_router_dev = pci_find_slot(rt->rtr_bus, rt->rtr_devfn); + pirq_router_dev = pci_get_bus_and_slot(rt->rtr_bus, rt->rtr_devfn); if (!pirq_router_dev) { DBG(KERN_DEBUG "PCI: Interrupt router not found at " "%02x:%02x\n", rt->rtr_bus, rt->rtr_devfn); @@ -784,6 +784,8 @@ static void __init pirq_find_router(struct irq_router *r) pirq_router_dev->vendor, pirq_router_dev->device, pci_name(pirq_router_dev)); + + /* The device remains referenced for the kernel lifetime */ } static struct irq_info *pirq_get_info(struct pci_dev *dev) diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c index 37a770859e71..d2ea87a95268 100644 --- a/arch/x86_64/kernel/pci-calgary.c +++ b/arch/x86_64/kernel/pci-calgary.c @@ -921,7 +921,7 @@ static int __init calgary_init(void) error: do { - dev = pci_find_device_reverse(PCI_VENDOR_ID_IBM, + dev = pci_get_device_reverse(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CALGARY, dev); if (!dev) -- cgit v1.2.3 From 9c5f8be4625e73f17e28fea89399ed871a30e064 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:03 +0100 Subject: [PATCH] x86: Mention PCI instead of RAM in NMI parity error message On modern systems RAM errors don't cause NMIs, but it's usually caused by PCI SERR. Mention PCI instead of RAM in the printk. Reported by r_hayashi@ctc-g.co.jp (Ryutaro Hayashi) Cc: r_hayashi@ctc-g.co.jp Signed-off-by: Andi Kleen --- arch/i386/kernel/traps.c | 3 +-- arch/x86_64/kernel/traps.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 4a6fa2837df2..237f4884a1e1 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -708,8 +708,7 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs) { printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on " "CPU %d.\n", reason, smp_processor_id()); - printk(KERN_EMERG "You probably have a hardware problem with your RAM " - "chips\n"); + printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); if (panic_on_unrecovered_nmi) panic("NMI: Not continuing"); diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index e37b4d77d5a8..70bfaab9822c 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -793,8 +793,7 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs) { printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", reason); - printk(KERN_EMERG "You probably have a hardware problem with your " - "RAM chips\n"); + printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); if (panic_on_unrecovered_nmi) panic("NMI: Not continuing"); -- cgit v1.2.3 From 5df0287ecc4f53e68bbd188fa8258b555e6b734f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 7 Dec 2006 02:14:05 +0100 Subject: [PATCH] x86-64: Extend clear_irq_vector Clear the irq releated entries in irq_vector, irq_domain and vector_irq instead of clearing irq_vector only. So when new irq is created, it could reuse that vector. (actually is the second loop scanning from FIRST_DEVICE_VECTOR+8). This could avoid the vectors are used up with enough module inserting and removing Cc: Eric W. Biedierman Cc: Muli Ben-Yehuda Signed-off-By: Yinghai Lu Signed-off-by: Andi Kleen --- arch/x86_64/kernel/io_apic.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index c80081a6ba41..f71461b1f03d 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -750,6 +750,22 @@ static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result) return vector; } +static void __clear_irq_vector(int irq) +{ + cpumask_t mask; + int cpu, vector; + + BUG_ON(!irq_vector[irq]); + + vector = irq_vector[irq]; + cpus_and(mask, irq_domain[irq], cpu_online_map); + for_each_cpu_mask(cpu, mask) + per_cpu(vector_irq, cpu)[vector] = -1; + + irq_vector[irq] = 0; + irq_domain[irq] = CPU_MASK_NONE; +} + void __setup_vector_irq(int cpu) { /* Initialize vector_irq on a new cpu */ @@ -1837,7 +1853,7 @@ void destroy_irq(unsigned int irq) dynamic_irq_cleanup(irq); spin_lock_irqsave(&vector_lock, flags); - irq_vector[irq] = 0; + __clear_irq_vector(irq); spin_unlock_irqrestore(&vector_lock, flags); } -- cgit v1.2.3 From ab2bf0c1c689905b628dca94d0acd9c50e152468 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:06 +0100 Subject: [PATCH] x86-64: Use probe_kernel_address in arch/x86_64/* Instead of open coded __get_user Signed-off-by: Andi Kleen --- arch/x86_64/kernel/traps.c | 2 +- arch/x86_64/mm/fault.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 70bfaab9822c..264db33476ab 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -30,9 +30,9 @@ #include #include #include +#include #include -#include #include #include #include diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index 3751b4788e28..a65fc6f1dcaf 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -23,9 +23,9 @@ #include #include #include +#include #include -#include #include #include #include @@ -96,7 +96,7 @@ void bust_spinlocks(int yes) static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, unsigned long error_code) { - unsigned char __user *instr; + unsigned char *instr; int scan_more = 1; int prefetch = 0; unsigned char *max_instr; @@ -116,7 +116,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, unsigned char instr_hi; unsigned char instr_lo; - if (__get_user(opcode, (char __user *)instr)) + if (probe_kernel_address(instr, opcode)) break; instr_hi = opcode & 0xf0; @@ -154,7 +154,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, case 0x00: /* Prefetch instruction is 0x0F0D or 0x0F18 */ scan_more = 0; - if (__get_user(opcode, (char __user *)instr)) + if (probe_kernel_address(instr, opcode)) break; prefetch = (instr_lo == 0xF) && (opcode == 0x0D || opcode == 0x18); @@ -170,7 +170,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, static int bad_address(void *p) { unsigned long dummy; - return __get_user(dummy, (unsigned long __user *)p); + return probe_kernel_address((unsigned long *)p, dummy); } void dump_pagetable(unsigned long address) -- cgit v1.2.3 From b026872601976f666bae77b609dc490d1834bf77 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:06 +0100 Subject: [PATCH] x86-64: Try multiple timer variants in check_timer Instead of adding all kinds of more quirks try various timer routing variants in check_timer. In particular this tries to handle quirks from: - Nvidia NF2-4 reference BIOS: wrong timer override - Asus: Wrong timer override but no HPET table - ATI: require timer disabled in 8259 - Some boards: require timer enabled in 8259 We just try many of the the known variants in the hopefully right order in check_timer. Trying pin 0/2 on Nvidia suggested by Tim Hockin. TBD Experimental. Needs a lot of testing Signed-off-by: Andi Kleen --- Documentation/x86_64/boot-options.txt | 4 -- arch/x86_64/kernel/early-quirks.c | 5 -- arch/x86_64/kernel/io_apic.c | 124 ++++++++++++++-------------------- 3 files changed, 51 insertions(+), 82 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt index f3c57f43ba64..ab9b1c046c00 100644 --- a/Documentation/x86_64/boot-options.txt +++ b/Documentation/x86_64/boot-options.txt @@ -52,10 +52,6 @@ APICs apicmaintimer. Useful when your PIT timer is totally broken. - disable_8254_timer / enable_8254_timer - Enable interrupt 0 timer routing over the 8254 in addition to over - the IO-APIC. The kernel tries to set a sensible default. - Early Console syntax: earlyprintk=vga diff --git a/arch/x86_64/kernel/early-quirks.c b/arch/x86_64/kernel/early-quirks.c index 68273bff58cc..fb0c6da41b7e 100644 --- a/arch/x86_64/kernel/early-quirks.c +++ b/arch/x86_64/kernel/early-quirks.c @@ -69,11 +69,6 @@ static void nvidia_bugs(void) static void ati_bugs(void) { - if (timer_over_8254 == 1) { - timer_over_8254 = 0; - printk(KERN_INFO - "ATI board detected. Disabling timer routing over 8254.\n"); - } } struct chipset { diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index f71461b1f03d..88fcc4ebbf6e 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -55,10 +55,6 @@ int sis_apic_bug; /* not actually supported, dummy for compile */ static int no_timer_check; -static int disable_timer_pin_1 __initdata; - -int timer_over_8254 __initdata = 1; - /* Where if anywhere is the i8259 connect in external int mode */ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; @@ -348,29 +344,6 @@ static int __init disable_ioapic_setup(char *str) } early_param("noapic", disable_ioapic_setup); -/* Actually the next is obsolete, but keep it for paranoid reasons -AK */ -static int __init disable_timer_pin_setup(char *arg) -{ - disable_timer_pin_1 = 1; - return 1; -} -__setup("disable_timer_pin_1", disable_timer_pin_setup); - -static int __init setup_disable_8254_timer(char *s) -{ - timer_over_8254 = -1; - return 1; -} -static int __init setup_enable_8254_timer(char *s) -{ - timer_over_8254 = 2; - return 1; -} - -__setup("disable_8254_timer", setup_disable_8254_timer); -__setup("enable_8254_timer", setup_enable_8254_timer); - - /* * Find the IRQ entry number of a certain pin. */ @@ -1579,10 +1552,33 @@ static inline void unlock_ExtINT_logic(void) * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ * is so screwy. Thanks to Brian Perkins for testing/hacking this beast * fanatically on his truly buggy board. - * - * FIXME: really need to revamp this for modern platforms only. */ -static inline void check_timer(void) + +static int try_apic_pin(int apic, int pin, char *msg) +{ + apic_printk(APIC_VERBOSE, KERN_INFO + "..TIMER: trying IO-APIC=%d PIN=%d %s", + apic, pin, msg); + + /* + * Ok, does IRQ0 through the IOAPIC work? + */ + if (!no_timer_check && timer_irq_works()) { + nmi_watchdog_default(); + if (nmi_watchdog == NMI_IO_APIC) { + disable_8259A_irq(0); + setup_nmi(); + enable_8259A_irq(0); + } + return 1; + } + clear_IO_APIC_pin(apic, pin); + apic_printk(APIC_QUIET, KERN_ERR " .. failed\n"); + return 0; +} + +/* The function from hell */ +static void check_timer(void) { int apic1, pin1, apic2, pin2; int vector; @@ -1603,61 +1599,43 @@ static inline void check_timer(void) */ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); init_8259A(1); - if (timer_over_8254 > 0) - enable_8259A_irq(0); pin1 = find_isa_irq_pin(0, mp_INT); apic1 = find_isa_irq_apic(0, mp_INT); pin2 = ioapic_i8259.pin; apic2 = ioapic_i8259.apic; - apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", - vector, apic1, pin1, apic2, pin2); + /* Do this first, otherwise we get double interrupts on ATI boards */ + if ((pin1 != -1) && try_apic_pin(apic1, pin1,"with 8259 IRQ0 disabled")) + return; - if (pin1 != -1) { - /* - * Ok, does IRQ0 through the IOAPIC work? - */ - unmask_IO_APIC_irq(0); - if (!no_timer_check && timer_irq_works()) { - nmi_watchdog_default(); - if (nmi_watchdog == NMI_IO_APIC) { - disable_8259A_irq(0); - setup_nmi(); - enable_8259A_irq(0); - } - if (disable_timer_pin_1 > 0) - clear_IO_APIC_pin(0, pin1); - return; - } - clear_IO_APIC_pin(apic1, pin1); - apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: 8254 timer not " - "connected to IO-APIC\n"); - } + /* Now try again with IRQ0 8259A enabled. + Assumes timer is on IO-APIC 0 ?!? */ + enable_8259A_irq(0); + unmask_IO_APIC_irq(0); + if (try_apic_pin(apic1, pin1, "with 8259 IRQ0 enabled")) + return; + disable_8259A_irq(0); - apic_printk(APIC_VERBOSE,KERN_INFO "...trying to set up timer (IRQ0) " - "through the 8259A ... "); + /* Always try pin0 and pin2 on APIC 0 to handle buggy timer overrides + on Nvidia boards */ + if (!(apic1 == 0 && pin1 == 0) && + try_apic_pin(0, 0, "fallback with 8259 IRQ0 disabled")) + return; + if (!(apic1 == 0 && pin1 == 2) && + try_apic_pin(0, 2, "fallback with 8259 IRQ0 disabled")) + return; + + /* Then try pure 8259A routing on the 8259 as reported by BIOS*/ + enable_8259A_irq(0); if (pin2 != -1) { - apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...", - apic2, pin2); - /* - * legacy devices should be connected to IO APIC #0 - */ setup_ExtINT_IRQ0_pin(apic2, pin2, vector); - if (timer_irq_works()) { - apic_printk(APIC_VERBOSE," works.\n"); - nmi_watchdog_default(); - if (nmi_watchdog == NMI_IO_APIC) { - setup_nmi(); - } + if (try_apic_pin(apic2,pin2,"8259A broadcast ExtINT from BIOS")) return; - } - /* - * Cleanup, just in case ... - */ - clear_IO_APIC_pin(apic2, pin2); } - apic_printk(APIC_VERBOSE," failed.\n"); + + /* Tried all possibilities to go through the IO-APIC. Now come the + really cheesy fallbacks. */ if (nmi_watchdog == NMI_IO_APIC) { printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); -- cgit v1.2.3 From 58db85482743f5e3495d168c641c60ce1d3dfb06 Mon Sep 17 00:00:00 2001 From: Muli Ben-Yehuda Date: Thu, 7 Dec 2006 02:14:06 +0100 Subject: [PATCH] calgary: phb_shift can be int Signed-off-by: Muli Ben-Yehuda Signed-off-by: Jon Mason Signed-off-by: Andi Kleen --- arch/x86_64/kernel/pci-calgary.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c index d2ea87a95268..f53b581dfd0b 100644 --- a/arch/x86_64/kernel/pci-calgary.c +++ b/arch/x86_64/kernel/pci-calgary.c @@ -740,7 +740,7 @@ static void __init calgary_increase_split_completion_timeout(void __iomem *bbar, { u64 val64; void __iomem *target; - unsigned long phb_shift = -1; + unsigned int phb_shift = ~0; /* silence gcc */ u64 mask; switch (busno_to_phbid(busnum)) { -- cgit v1.2.3 From b34e90b8f0f30151349134f87b5dc6ef75a5218c Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Thu, 7 Dec 2006 02:14:06 +0100 Subject: [PATCH] Calgary: use BIOS supplied BBARs and topology information Find the BBAR register address of each Calgary using the "Extended BIOS Data Area" rather than calculating it ourselves. Also get the bus topology (what PHB each bus is on) from Calgary rather than calculating it ourselves. This patch fixes http://bugzilla.kernel.org/show_bug.cgi?id=7407. Signed-off-by: Laurent Vivier Signed-off-by: Muli Ben-Yehuda Signed-off-by: Jon Mason Signed-off-by: Andi Kleen --- arch/x86_64/kernel/pci-calgary.c | 161 ++++++++++++++++++++++++++++++--------- include/asm-x86_64/rio.h | 76 ++++++++++++++++++ 2 files changed, 201 insertions(+), 36 deletions(-) create mode 100644 include/asm-x86_64/rio.h (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c index f53b581dfd0b..afc0a53505f1 100644 --- a/arch/x86_64/kernel/pci-calgary.c +++ b/arch/x86_64/kernel/pci-calgary.c @@ -41,6 +41,7 @@ #include #include #include +#include #define PCI_DEVICE_ID_IBM_CALGARY 0x02a1 #define PCI_VENDOR_DEVICE_ID_CALGARY \ @@ -115,14 +116,35 @@ static const unsigned long phb_offsets[] = { 0xB000 /* PHB3 */ }; +/* PHB debug registers */ + +static const unsigned long phb_debug_offsets[] = { + 0x4000 /* PHB 0 DEBUG */, + 0x5000 /* PHB 1 DEBUG */, + 0x6000 /* PHB 2 DEBUG */, + 0x7000 /* PHB 3 DEBUG */ +}; + +/* + * STUFF register for each debug PHB, + * byte 1 = start bus number, byte 2 = end bus number + */ + +#define PHB_DEBUG_STUFF_OFFSET 0x0020 + unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED; static int translate_empty_slots __read_mostly = 0; static int calgary_detected __read_mostly = 0; +static struct rio_table_hdr *rio_table_hdr __initdata; +static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; +static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata; + struct calgary_bus_info { void *tce_space; unsigned char translation_disabled; signed char phbid; + void __iomem *bbar; }; static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, }; @@ -475,6 +497,11 @@ static struct dma_mapping_ops calgary_dma_ops = { .unmap_sg = calgary_unmap_sg, }; +static inline void __iomem * busno_to_bbar(unsigned char num) +{ + return bus_info[num].bbar; +} + static inline int busno_to_phbid(unsigned char num) { return bus_info[num].phbid; @@ -828,31 +855,9 @@ static void __init calgary_disable_translation(struct pci_dev *dev) del_timer_sync(&tbl->watchdog_timer); } -static inline unsigned int __init locate_register_space(struct pci_dev *dev) +static inline void __iomem * __init locate_register_space(struct pci_dev *dev) { - int rionodeid; - u32 address; - - /* - * Each Calgary has four busses. The first four busses (first Calgary) - * have RIO node ID 2, then the next four (second Calgary) have RIO - * node ID 3, the next four (third Calgary) have node ID 2 again, etc. - * We use a gross hack - relying on the dev->bus->number ordering, - * modulo 14 - to decide which Calgary a given bus is on. Busses 0, 1, - * 2 and 4 are on the first Calgary (id 2), 6, 8, a and c are on the - * second (id 3), and then it repeats modulo 14. - */ - rionodeid = (dev->bus->number % 14 > 4) ? 3 : 2; - /* - * register space address calculation as follows: - * FE0MB-8MB*OneBasedChassisNumber+1MB*(RioNodeId-ChassisBase) - * ChassisBase is always zero for x366/x260/x460 - * RioNodeId is 2 for first Calgary, 3 for second Calgary - */ - address = START_ADDRESS - - (0x800000 * (ONE_BASED_CHASSIS_NUM + dev->bus->number / 14)) + - (0x100000) * (rionodeid - CHASSIS_BASE); - return address; + return busno_to_bbar(dev->bus->number); } static void __init calgary_init_one_nontraslated(struct pci_dev *dev) @@ -864,15 +869,12 @@ static void __init calgary_init_one_nontraslated(struct pci_dev *dev) static int __init calgary_init_one(struct pci_dev *dev) { - u32 address; void __iomem *bbar; int ret; BUG_ON(dev->bus->number >= MAX_PHB_BUS_NUM); - address = locate_register_space(dev); - /* map entire 1MB of Calgary config space */ - bbar = ioremap_nocache(address, 1024 * 1024); + bbar = locate_register_space(dev); if (!bbar) { ret = -ENODATA; goto done; @@ -898,6 +900,35 @@ static int __init calgary_init(void) { int ret = -ENODEV; struct pci_dev *dev = NULL; + int rio, phb, bus; + void __iomem *bbar; + void __iomem *target; + u8 start_bus, end_bus; + u32 val; + + for (rio = 0; rio < rio_table_hdr->num_rio_dev; rio++) { + + if ( (rio_devs[rio]->type != COMPAT_CALGARY) && + (rio_devs[rio]->type != ALT_CALGARY) ) + continue; + + /* map entire 1MB of Calgary config space */ + bbar = ioremap_nocache(rio_devs[rio]->BBAR, 1024 * 1024); + + for (phb = 0; phb < PHBS_PER_CALGARY; phb++) { + + target = calgary_reg(bbar, phb_debug_offsets[phb] | + PHB_DEBUG_STUFF_OFFSET); + val = be32_to_cpu(readl(target)); + start_bus = (u8)((val & 0x00FF0000) >> 16); + end_bus = (u8)((val & 0x0000FF00) >> 8); + for (bus = start_bus; bus <= end_bus; bus++) { + bus_info[bus].bbar = bbar; + bus_info[bus].phbid = phb; + } + } + } + do { dev = pci_get_device(PCI_VENDOR_ID_IBM, @@ -962,13 +993,55 @@ static inline int __init determine_tce_table_size(u64 ram) return ret; } +static int __init build_detail_arrays(void) +{ + unsigned long ptr; + int i, scal_detail_size, rio_detail_size; + + if (rio_table_hdr->num_scal_dev > MAX_NUMNODES){ + printk(KERN_WARNING + "Calgary: MAX_NUMNODES too low! Defined as %d, " + "but system has %d nodes.\n", + MAX_NUMNODES, rio_table_hdr->num_scal_dev); + return -ENODEV; + } + + switch (rio_table_hdr->version){ + default: + printk(KERN_WARNING + "Calgary: Invalid Rio Grande Table Version: %d\n", + rio_table_hdr->version); + return -ENODEV; + case 2: + scal_detail_size = 11; + rio_detail_size = 13; + break; + case 3: + scal_detail_size = 12; + rio_detail_size = 15; + break; + } + + ptr = ((unsigned long)rio_table_hdr) + 3; + for (i = 0; i < rio_table_hdr->num_scal_dev; + i++, ptr += scal_detail_size) + scal_devs[i] = (struct scal_detail *)ptr; + + for (i = 0; i < rio_table_hdr->num_rio_dev; + i++, ptr += rio_detail_size) + rio_devs[i] = (struct rio_detail *)ptr; + + return 0; +} + void __init detect_calgary(void) { u32 val; int bus; void *tbl; int calgary_found = 0; - int phb = -1; + unsigned long ptr; + int offset; /* * if the user specified iommu=off or iommu=soft or we found @@ -980,6 +1053,29 @@ void __init detect_calgary(void) if (!early_pci_allowed()) return; + ptr = (unsigned long)phys_to_virt(get_bios_ebda()); + + rio_table_hdr = NULL; + offset = 0x180; + while (offset) { + /* The block id is stored in the 2nd word */ + if (*((unsigned short *)(ptr + offset + 2)) == 0x4752){ + /* set the pointer past the offset & block id */ + rio_table_hdr = (struct rio_table_hdr *)(ptr+offset+4); + break; + } + /* The next offset is stored in the 1st word. 0 means no more */ + offset = *((unsigned short *)(ptr + offset)); + } + if (!rio_table_hdr){ + printk(KERN_ERR "Calgary: Unable to locate " + "Rio Grande Table in EBDA - bailing!\n"); + return; + } + + if (build_detail_arrays()) + return; + specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE); for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { @@ -990,12 +1086,6 @@ void __init detect_calgary(void) if (read_pci_config(bus, 0, 0, 0) != PCI_VENDOR_DEVICE_ID_CALGARY) continue; - /* - * There are 4 PHBs per Calgary chip. Set phb to which phb (0-3) - * it is connected to releative to the clagary chip. - */ - phb = (phb + 1) % PHBS_PER_CALGARY; - if (info->translation_disabled) continue; @@ -1010,7 +1100,6 @@ void __init detect_calgary(void) if (!tbl) goto cleanup; info->tce_space = tbl; - info->phbid = phb; calgary_found = 1; break; } diff --git a/include/asm-x86_64/rio.h b/include/asm-x86_64/rio.h new file mode 100644 index 000000000000..1f315c39d76e --- /dev/null +++ b/include/asm-x86_64/rio.h @@ -0,0 +1,76 @@ +/* + * Derived from include/asm-i386/mach-summit/mach_mpparse.h + * and include/asm-i386/mach-default/bios_ebda.h + * + * Author: Laurent Vivier + * + */ + +#ifndef __ASM_RIO_H +#define __ASM_RIO_H + +#define RIO_TABLE_VERSION 3 + +struct rio_table_hdr { + u8 version; /* Version number of this data structure */ + u8 num_scal_dev; /* # of Scalability devices */ + u8 num_rio_dev; /* # of RIO I/O devices */ +} __attribute__((packed)); + +struct scal_detail { + u8 node_id; /* Scalability Node ID */ + u32 CBAR; /* Address of 1MB register space */ + u8 port0node; /* Node ID port connected to: 0xFF=None */ + u8 port0port; /* Port num port connected to: 0,1,2, or */ + /* 0xFF=None */ + u8 port1node; /* Node ID port connected to: 0xFF = None */ + u8 port1port; /* Port num port connected to: 0,1,2, or */ + /* 0xFF=None */ + u8 port2node; /* Node ID port connected to: 0xFF = None */ + u8 port2port; /* Port num port connected to: 0,1,2, or */ + /* 0xFF=None */ + u8 chassis_num; /* 1 based Chassis number (1 = boot node) */ +} __attribute__((packed)); + +struct rio_detail { + u8 node_id; /* RIO Node ID */ + u32 BBAR; /* Address of 1MB register space */ + u8 type; /* Type of device */ + u8 owner_id; /* Node ID of Hurricane that owns this */ + /* node */ + u8 port0node; /* Node ID port connected to: 0xFF=None */ + u8 port0port; /* Port num port connected to: 0,1,2, or */ + /* 0xFF=None */ + u8 port1node; /* Node ID port connected to: 0xFF=None */ + u8 port1port; /* Port num port connected to: 0,1,2, or */ + /* 0xFF=None */ + u8 first_slot; /* Lowest slot number below this Calgary */ + u8 status; /* Bit 0 = 1 : the XAPIC is used */ + /* = 0 : the XAPIC is not used, ie: */ + /* ints fwded to another XAPIC */ + /* Bits1:7 Reserved */ + u8 WP_index; /* instance index - lower ones have */ + /* lower slot numbers/PCI bus numbers */ + u8 chassis_num; /* 1 based Chassis number */ +} __attribute__((packed)); + +enum { + HURR_SCALABILTY = 0, /* Hurricane Scalability info */ + HURR_RIOIB = 2, /* Hurricane RIOIB info */ + COMPAT_CALGARY = 4, /* Compatibility Calgary */ + ALT_CALGARY = 5, /* Second Planar Calgary */ +}; + +/* + * there is a real-mode segmented pointer pointing to the + * 4K EBDA area at 0x40E. + */ + +static inline unsigned long get_bios_ebda(void) +{ + unsigned long address= *(unsigned short *)phys_to_virt(0x40Eul); + address <<= 4; + return address; +} + +#endif /* __ASM_RIO_H */ -- cgit v1.2.3 From eae93755540bae18aff46b8a0e621b5d65bd5380 Mon Sep 17 00:00:00 2001 From: Muli Ben-Yehuda Date: Thu, 7 Dec 2006 02:14:06 +0100 Subject: [PATCH] Calgary: check BBAR ioremap success when ioremapping This patch cleans up the previous "Use BIOS supplied BBAR information" patch. Mostly stylistic clenaups, but also check for ioremap failure when we ioremap the BBAR rather than when trying to use it. Signed-off-by: Muli Ben-Yehuda Signed-off-by: Jon Mason Signed-off-by: Andi Kleen Acked-by: Laurent Vivier --- arch/x86_64/kernel/pci-calgary.c | 85 +++++++++++++++++++++++----------------- include/asm-x86_64/rio.h | 8 ++-- 2 files changed, 52 insertions(+), 41 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c index afc0a53505f1..8a1e4f35bc3c 100644 --- a/arch/x86_64/kernel/pci-calgary.c +++ b/arch/x86_64/kernel/pci-calgary.c @@ -138,7 +138,7 @@ static int calgary_detected __read_mostly = 0; static struct rio_table_hdr *rio_table_hdr __initdata; static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; -static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata; +static struct rio_detail *rio_devs[MAX_NUMNODES * 4] __initdata; struct calgary_bus_info { void *tce_space; @@ -855,11 +855,6 @@ static void __init calgary_disable_translation(struct pci_dev *dev) del_timer_sync(&tbl->watchdog_timer); } -static inline void __iomem * __init locate_register_space(struct pci_dev *dev) -{ - return busno_to_bbar(dev->bus->number); -} - static void __init calgary_init_one_nontraslated(struct pci_dev *dev) { pci_dev_get(dev); @@ -874,15 +869,10 @@ static int __init calgary_init_one(struct pci_dev *dev) BUG_ON(dev->bus->number >= MAX_PHB_BUS_NUM); - bbar = locate_register_space(dev); - if (!bbar) { - ret = -ENODATA; - goto done; - } - + bbar = busno_to_bbar(dev->bus->number); ret = calgary_setup_tar(dev, bbar); if (ret) - goto iounmap; + goto done; pci_dev_get(dev); dev->bus->self = dev; @@ -890,38 +880,39 @@ static int __init calgary_init_one(struct pci_dev *dev) return 0; -iounmap: - iounmap(bbar); done: return ret; } -static int __init calgary_init(void) +static int __init calgary_locate_bbars(void) { - int ret = -ENODEV; - struct pci_dev *dev = NULL; - int rio, phb, bus; + int ret; + int rioidx, phb, bus; void __iomem *bbar; void __iomem *target; + unsigned long offset; u8 start_bus, end_bus; u32 val; - for (rio = 0; rio < rio_table_hdr->num_rio_dev; rio++) { + ret = -ENODATA; + for (rioidx = 0; rioidx < rio_table_hdr->num_rio_dev; rioidx++) { + struct rio_detail *rio = rio_devs[rioidx]; - if ( (rio_devs[rio]->type != COMPAT_CALGARY) && - (rio_devs[rio]->type != ALT_CALGARY) ) + if ((rio->type != COMPAT_CALGARY) && (rio->type != ALT_CALGARY)) continue; /* map entire 1MB of Calgary config space */ - bbar = ioremap_nocache(rio_devs[rio]->BBAR, 1024 * 1024); + bbar = ioremap_nocache(rio->BBAR, 1024 * 1024); + if (!bbar) + goto error; for (phb = 0; phb < PHBS_PER_CALGARY; phb++) { + offset = phb_debug_offsets[phb] | PHB_DEBUG_STUFF_OFFSET; + target = calgary_reg(bbar, offset); - target = calgary_reg(bbar, phb_debug_offsets[phb] | - PHB_DEBUG_STUFF_OFFSET); val = be32_to_cpu(readl(target)); start_bus = (u8)((val & 0x00FF0000) >> 16); - end_bus = (u8)((val & 0x0000FF00) >> 8); + end_bus = (u8)((val & 0x0000FF00) >> 8); for (bus = start_bus; bus <= end_bus; bus++) { bus_info[bus].bbar = bbar; bus_info[bus].phbid = phb; @@ -929,6 +920,25 @@ static int __init calgary_init(void) } } + return 0; + +error: + /* scan bus_info and iounmap any bbars we previously ioremap'd */ + for (bus = 0; bus < ARRAY_SIZE(bus_info); bus++) + if (bus_info[bus].bbar) + iounmap(bus_info[bus].bbar); + + return ret; +} + +static int __init calgary_init(void) +{ + int ret; + struct pci_dev *dev = NULL; + + ret = calgary_locate_bbars(); + if (ret) + return ret; do { dev = pci_get_device(PCI_VENDOR_ID_IBM, @@ -1000,18 +1010,13 @@ static int __init build_detail_arrays(void) if (rio_table_hdr->num_scal_dev > MAX_NUMNODES){ printk(KERN_WARNING - "Calgary: MAX_NUMNODES too low! Defined as %d, " + "Calgary: MAX_NUMNODES too low! Defined as %d, " "but system has %d nodes.\n", MAX_NUMNODES, rio_table_hdr->num_scal_dev); return -ENODEV; } switch (rio_table_hdr->version){ - default: - printk(KERN_WARNING - "Calgary: Invalid Rio Grande Table Version: %d\n", - rio_table_hdr->version); - return -ENODEV; case 2: scal_detail_size = 11; rio_detail_size = 13; @@ -1020,6 +1025,11 @@ static int __init build_detail_arrays(void) scal_detail_size = 12; rio_detail_size = 15; break; + default: + printk(KERN_WARNING + "Calgary: Invalid Rio Grande Table Version: %d\n", + rio_table_hdr->version); + return -EPROTO; } ptr = ((unsigned long)rio_table_hdr) + 3; @@ -1042,6 +1052,7 @@ void __init detect_calgary(void) int calgary_found = 0; unsigned long ptr; int offset; + int ret; /* * if the user specified iommu=off or iommu=soft or we found @@ -1061,27 +1072,29 @@ void __init detect_calgary(void) /* The block id is stored in the 2nd word */ if (*((unsigned short *)(ptr + offset + 2)) == 0x4752){ /* set the pointer past the offset & block id */ - rio_table_hdr = (struct rio_table_hdr *)(ptr+offset+4); + rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4); break; } /* The next offset is stored in the 1st word. 0 means no more */ offset = *((unsigned short *)(ptr + offset)); } - if (!rio_table_hdr){ + if (!rio_table_hdr) { printk(KERN_ERR "Calgary: Unable to locate " "Rio Grande Table in EBDA - bailing!\n"); return; } - if (build_detail_arrays()) + ret = build_detail_arrays(); + if (ret) { + printk(KERN_ERR "Calgary: build_detail_arrays ret %d\n", ret); return; + } specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE); for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { int dev; struct calgary_bus_info *info = &bus_info[bus]; - info->phbid = -1; if (read_pci_config(bus, 0, 0, 0) != PCI_VENDOR_DEVICE_ID_CALGARY) continue; diff --git a/include/asm-x86_64/rio.h b/include/asm-x86_64/rio.h index 1f315c39d76e..c7350f6d2015 100644 --- a/include/asm-x86_64/rio.h +++ b/include/asm-x86_64/rio.h @@ -3,7 +3,6 @@ * and include/asm-i386/mach-default/bios_ebda.h * * Author: Laurent Vivier - * */ #ifndef __ASM_RIO_H @@ -19,7 +18,7 @@ struct rio_table_hdr { struct scal_detail { u8 node_id; /* Scalability Node ID */ - u32 CBAR; /* Address of 1MB register space */ + u32 CBAR; /* Address of 1MB register space */ u8 port0node; /* Node ID port connected to: 0xFF=None */ u8 port0port; /* Port num port connected to: 0,1,2, or */ /* 0xFF=None */ @@ -34,7 +33,7 @@ struct scal_detail { struct rio_detail { u8 node_id; /* RIO Node ID */ - u32 BBAR; /* Address of 1MB register space */ + u32 BBAR; /* Address of 1MB register space */ u8 type; /* Type of device */ u8 owner_id; /* Node ID of Hurricane that owns this */ /* node */ @@ -65,10 +64,9 @@ enum { * there is a real-mode segmented pointer pointing to the * 4K EBDA area at 0x40E. */ - static inline unsigned long get_bios_ebda(void) { - unsigned long address= *(unsigned short *)phys_to_virt(0x40Eul); + unsigned long address = *(unsigned short *)phys_to_virt(0x40EUL); address <<= 4; return address; } -- cgit v1.2.3 From bff6547bb6a4e82c399d74e7fba78b12d2f162ed Mon Sep 17 00:00:00 2001 From: Muli Ben-Yehuda Date: Thu, 7 Dec 2006 02:14:07 +0100 Subject: [PATCH] Calgary: allow compiling Calgary in but not using it by default This patch makes it possible to compile Calgary in but not use it by default. In this mode, use 'iommu=calgary' to activate it. Signed-off-by: Muli Ben-Yehuda Signed-off-by: Jon Mason Signed-off-by: Andi Kleen --- Documentation/x86_64/boot-options.txt | 3 ++- arch/x86_64/Kconfig | 11 +++++++++++ arch/x86_64/kernel/pci-calgary.c | 9 +++++++++ arch/x86_64/kernel/pci-dma.c | 5 +++++ include/asm-x86_64/calgary.h | 2 ++ 5 files changed, 29 insertions(+), 1 deletion(-) (limited to 'arch/x86_64/kernel') diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt index ab9b1c046c00..dbdcaf68e3ea 100644 --- a/Documentation/x86_64/boot-options.txt +++ b/Documentation/x86_64/boot-options.txt @@ -179,7 +179,7 @@ PCI IOMMU iommu=[size][,noagp][,off][,force][,noforce][,leak][,memaper[=order]][,merge] - [,forcesac][,fullflush][,nomerge][,noaperture] + [,forcesac][,fullflush][,nomerge][,noaperture][,calgary] size set size of iommu (in bytes) noagp don't initialize the AGP driver and use full aperture. off don't use the IOMMU @@ -200,6 +200,7 @@ IOMMU buffering. nodac Forbid DMA >4GB panic Always panic when IOMMU overflows + calgary Use the Calgary IOMMU if it is available swiotlb=pages[,force] diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 010d2265f1cf..5cb509dbffe4 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -455,6 +455,17 @@ config CALGARY_IOMMU Normally the kernel will make the right choice by itself. If unsure, say Y. +config CALGARY_IOMMU_ENABLED_BY_DEFAULT + bool "Should Calgary be enabled by default?" + default y + depends on CALGARY_IOMMU + help + Should Calgary be enabled by default? if you choose 'y', Calgary + will be used (if it exists). If you choose 'n', Calgary will not be + used even if it exists. If you choose 'n' and would like to use + Calgary anyway, pass 'iommu=calgary' on the kernel command line. + If unsure, say Y. + # need this always selected by IOMMU for the VIA workaround config SWIOTLB bool diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c index 8a1e4f35bc3c..0ddf29dae7e0 100644 --- a/arch/x86_64/kernel/pci-calgary.c +++ b/arch/x86_64/kernel/pci-calgary.c @@ -43,6 +43,12 @@ #include #include +#ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT +int use_calgary __read_mostly = 1; +#else +int use_calgary __read_mostly = 0; +#endif /* CONFIG_CALGARY_DEFAULT_ENABLED */ + #define PCI_DEVICE_ID_IBM_CALGARY 0x02a1 #define PCI_VENDOR_DEVICE_ID_CALGARY \ (PCI_VENDOR_ID_IBM | PCI_DEVICE_ID_IBM_CALGARY << 16) @@ -1061,6 +1067,9 @@ void __init detect_calgary(void) if (swiotlb || no_iommu || iommu_detected) return; + if (!use_calgary) + return; + if (!early_pci_allowed()) return; diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c index f8d857453f8a..683b7a5c1ab3 100644 --- a/arch/x86_64/kernel/pci-dma.c +++ b/arch/x86_64/kernel/pci-dma.c @@ -296,6 +296,11 @@ __init int iommu_setup(char *p) gart_parse_options(p); #endif +#ifdef CONFIG_CALGARY_IOMMU + if (!strncmp(p, "calgary", 7)) + use_calgary = 1; +#endif /* CONFIG_CALGARY_IOMMU */ + p += strcspn(p, ","); if (*p == ',') ++p; diff --git a/include/asm-x86_64/calgary.h b/include/asm-x86_64/calgary.h index 6b93f5a3a5c8..7ee900645719 100644 --- a/include/asm-x86_64/calgary.h +++ b/include/asm-x86_64/calgary.h @@ -51,6 +51,8 @@ struct iommu_table { #define TCE_TABLE_SIZE_4M 6 #define TCE_TABLE_SIZE_8M 7 +extern int use_calgary; + #ifdef CONFIG_CALGARY_IOMMU extern int calgary_iommu_init(void); extern void detect_calgary(void); -- cgit v1.2.3 From 516d2836434d279fef164bb1b964e83425d312d2 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:07 +0100 Subject: [PATCH] x86-64: Fix race in IO-APIC routing entry setup. Interrupt could happen between setting the IO-APIC entry and setting its interrupt data. Pointed out by Linus. Signed-off-by: Andi Kleen --- arch/x86_64/kernel/io_apic.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index 88fcc4ebbf6e..eaf0b708e67d 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -174,14 +174,20 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) * the interrupt, and we need to make sure the entry is fully populated * before that happens. */ -static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) +static void +__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) { - unsigned long flags; union entry_union eu; eu.entry = e; - spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic, 0x11 + 2*pin, eu.w2); io_apic_write(apic, 0x10 + 2*pin, eu.w1); +} + +static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) +{ + unsigned long flags; + spin_lock_irqsave(&ioapic_lock, flags); + __ioapic_write_entry(apic, pin, e); spin_unlock_irqrestore(&ioapic_lock, flags); } -- cgit v1.2.3 From 103efcd9aac1de4da6a1477f2f3b9fcf35273a18 Mon Sep 17 00:00:00 2001 From: Ernie Petrides Date: Thu, 7 Dec 2006 02:14:09 +0100 Subject: [PATCH] x86-64: fix perms/range of vsyscall vma in /proc/*/maps The final line of /proc//maps on x86_64 for native 64-bit tasks shows an incorrect ending address and incorrect permissions. There is only a single page mapped in this vsyscall region, and it is accessible for both read and execute. The patch below fixes this. (Since 32-bit-compat tasks have a real vma with correct perms/range, no change is necessary for that scenario.) Before the patch, a "cat /proc/self/maps | tail -1" shows this: ffffffffff600000-ffffffffffe00000 ---p 00000000 [...] After the patch, this is the output: ffffffffff600000-ffffffffff601000 r-xp 00000000 [...] Signed-off-by: Ernie Petrides Signed-off-by: Andi Kleen --- arch/x86_64/kernel/vsyscall.c | 1 + arch/x86_64/mm/init.c | 7 ++++--- include/asm-x86_64/vsyscall.h | 1 + 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index 92546c1526f1..c3de9a09cd9f 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c @@ -290,6 +290,7 @@ static void __init map_vsyscall(void) extern char __vsyscall_0; unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0); + /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */ __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL); } diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index 4c0c00ef3ca7..2968b90ef8ad 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -730,14 +730,15 @@ static __init int x8664_sysctl_init(void) __initcall(x8664_sysctl_init); #endif -/* A pseudo VMAs to allow ptrace access for the vsyscall page. This only +/* A pseudo VMA to allow ptrace access for the vsyscall page. This only covers the 64bit vsyscall page now. 32bit has a real VMA now and does not need special handling anymore. */ static struct vm_area_struct gate_vma = { .vm_start = VSYSCALL_START, - .vm_end = VSYSCALL_END, - .vm_page_prot = PAGE_READONLY + .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT), + .vm_page_prot = PAGE_READONLY_EXEC, + .vm_flags = VM_READ | VM_EXEC }; struct vm_area_struct *get_gate_vma(struct task_struct *tsk) diff --git a/include/asm-x86_64/vsyscall.h b/include/asm-x86_64/vsyscall.h index 01d1c17e2849..05cb8dd200de 100644 --- a/include/asm-x86_64/vsyscall.h +++ b/include/asm-x86_64/vsyscall.h @@ -10,6 +10,7 @@ enum vsyscall_num { #define VSYSCALL_START (-10UL << 20) #define VSYSCALL_SIZE 1024 #define VSYSCALL_END (-2UL << 20) +#define VSYSCALL_MAPPED_PAGES 1 #define VSYSCALL_ADDR(vsyscall_nr) (VSYSCALL_START+VSYSCALL_SIZE*(vsyscall_nr)) #ifdef __KERNEL__ -- cgit v1.2.3 From 9a457324229db34d3bcb0b67360130c287289401 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:09 +0100 Subject: [PATCH] x86-64: Rate limit no irq handler messages Signed-off-by: Andi Kleen --- arch/x86_64/kernel/irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c index e46c55856d40..0c06af6c13bc 100644 --- a/arch/x86_64/kernel/irq.c +++ b/arch/x86_64/kernel/irq.c @@ -120,7 +120,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs) if (likely(irq < NR_IRQS)) generic_handle_irq(irq); - else + else if (printk_ratelimit()) printk(KERN_EMERG "%s: %d.%d No irq handler for vector\n", __func__, smp_processor_id(), vector); -- cgit v1.2.3 From 9899f826fc90beba4f78083f6230e06cbe1050c9 Mon Sep 17 00:00:00 2001 From: "Siddha, Suresh B" Date: Thu, 7 Dec 2006 02:14:10 +0100 Subject: [PATCH] x86-64: add genapic_force Add genapic_force. Used by the next Intel quirks patch. Signed-off-by: Suresh Siddha Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: "Li, Shaohua" Signed-off-by: Andrew Morton --- arch/x86_64/kernel/genapic.c | 9 ++++++++- include/asm-x86_64/genapic.h | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/genapic.c b/arch/x86_64/kernel/genapic.c index 8e78a75d1866..b007433f96bb 100644 --- a/arch/x86_64/kernel/genapic.c +++ b/arch/x86_64/kernel/genapic.c @@ -33,7 +33,7 @@ extern struct genapic apic_flat; extern struct genapic apic_physflat; struct genapic *genapic = &apic_flat; - +struct genapic *genapic_force; /* * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. @@ -46,6 +46,13 @@ void __init clustered_apic_check(void) u8 cluster_cnt[NUM_APIC_CLUSTERS]; int max_apic = 0; + /* genapic selection can be forced because of certain quirks. + */ + if (genapic_force) { + genapic = genapic_force; + goto print; + } + #if defined(CONFIG_ACPI) /* * Some x86_64 machines use physical APIC mode regardless of how many diff --git a/include/asm-x86_64/genapic.h b/include/asm-x86_64/genapic.h index a0e9a4b93484..b80f4bb5f273 100644 --- a/include/asm-x86_64/genapic.h +++ b/include/asm-x86_64/genapic.h @@ -30,6 +30,6 @@ struct genapic { }; -extern struct genapic *genapic; +extern struct genapic *genapic, *genapic_force, apic_flat; #endif -- cgit v1.2.3 From b0d0a4ba45760b10ecee9035ed45b442c1a6cc84 Mon Sep 17 00:00:00 2001 From: "Siddha, Suresh B" Date: Thu, 7 Dec 2006 02:14:10 +0100 Subject: [PATCH] x86: fix the irqbalance quirk for E7320/E7520/E7525 Move the irqbalance quirks for E7320/E7520/E7525(Errata 23 in http://download.intel.com/design/chipsets/specupdt/30304203.pdf) to early quirks. And add a PCI quirk for these platforms to check(which happens very late during the boot) if the APIC routing is indeed set to default flat mode. This fixes the breakage(in x86_64) of this quirk due to cpu hotplug which selects physical mode instead of the logical flat(as needed for this errata workaround). Signed-off-by: Suresh Siddha Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: "Li, Shaohua" Signed-off-by: Andrew Morton --- arch/i386/kernel/acpi/earlyquirk.c | 21 +++++++++++++++++ arch/i386/kernel/quirks.c | 46 +++++++++++++++++++++++++++++--------- arch/i386/kernel/smpboot.c | 7 ++++++ arch/x86_64/kernel/early-quirks.c | 13 +++++++++++ arch/x86_64/kernel/smpboot.c | 8 +++++++ include/asm-i386/genapic.h | 2 +- include/asm-i386/irq.h | 2 ++ include/asm-x86_64/proto.h | 1 + 8 files changed, 88 insertions(+), 12 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/i386/kernel/acpi/earlyquirk.c b/arch/i386/kernel/acpi/earlyquirk.c index c9841692bb7c..4b60af7f91dd 100644 --- a/arch/i386/kernel/acpi/earlyquirk.c +++ b/arch/i386/kernel/acpi/earlyquirk.c @@ -10,6 +10,7 @@ #include #include #include +#include #ifdef CONFIG_ACPI @@ -49,6 +50,24 @@ static int __init check_bridge(int vendor, int device) return 0; } +static void check_intel(void) +{ + u16 vendor, device; + + vendor = read_pci_config_16(0, 0, 0, PCI_VENDOR_ID); + + if (vendor != PCI_VENDOR_ID_INTEL) + return; + + device = read_pci_config_16(0, 0, 0, PCI_DEVICE_ID); +#ifdef CONFIG_SMP + if (device == PCI_DEVICE_ID_INTEL_E7320_MCH || + device == PCI_DEVICE_ID_INTEL_E7520_MCH || + device == PCI_DEVICE_ID_INTEL_E7525_MCH) + quirk_intel_irqbalance(); +#endif +} + void __init check_acpi_pci(void) { int num, slot, func; @@ -60,6 +79,8 @@ void __init check_acpi_pci(void) if (!early_pci_allowed()) return; + check_intel(); + /* Poor man's PCI discovery */ for (num = 0; num < 32; num++) { for (slot = 0; slot < 32; slot++) { diff --git a/arch/i386/kernel/quirks.c b/arch/i386/kernel/quirks.c index 9f6ab1789bb0..a01320a7b636 100644 --- a/arch/i386/kernel/quirks.c +++ b/arch/i386/kernel/quirks.c @@ -3,10 +3,23 @@ */ #include #include +#include +#include +#include #if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI) +static void __devinit verify_quirk_intel_irqbalance(struct pci_dev *dev) +{ +#ifdef CONFIG_X86_64 + if (genapic != &apic_flat) + panic("APIC mode must be flat on this system\n"); +#elif defined(CONFIG_X86_GENERICARCH) + if (genapic != &apic_default) + panic("APIC mode must be default(flat) on this system. Use apic=default\n"); +#endif +} -static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) +void __init quirk_intel_irqbalance(void) { u8 config, rev; u32 word; @@ -16,18 +29,18 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) * based platforms. * Disable SW irqbalance/affinity on those platforms. */ - pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); + rev = read_pci_config_byte(0, 0, 0, PCI_CLASS_REVISION); if (rev > 0x9) return; printk(KERN_INFO "Intel E7520/7320/7525 detected."); - /* enable access to config space*/ - pci_read_config_byte(dev, 0xf4, &config); - pci_write_config_byte(dev, 0xf4, config|0x2); + /* enable access to config space */ + config = read_pci_config_byte(0, 0, 0, 0xf4); + write_pci_config_byte(0, 0, 0, 0xf4, config|0x2); /* read xTPR register */ - raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word); + word = read_pci_config_16(0, 0, 0x40, 0x4c); if (!(word & (1 << 13))) { printk(KERN_INFO "Disabling irq balancing and affinity\n"); @@ -37,14 +50,25 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) noirqdebug_setup(""); #ifdef CONFIG_PROC_FS no_irq_affinity = 1; +#endif +#ifdef CONFIG_HOTPLUG_CPU + printk(KERN_INFO "Disabling cpu hotplug control\n"); + enable_cpu_hotplug = 0; +#endif +#ifdef CONFIG_X86_64 + /* force the genapic selection to flat mode so that + * interrupts can be redirected to more than one CPU. + */ + genapic_force = &apic_flat; #endif } - /* put back the original value for config space*/ + /* put back the original value for config space */ if (!(config & 0x2)) - pci_write_config_byte(dev, 0xf4, config); + write_pci_config_byte(0, 0, 0, 0xf4, config); } -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, verify_quirk_intel_irqbalance); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, verify_quirk_intel_irqbalance); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, verify_quirk_intel_irqbalance); + #endif diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index cd7de9c9654b..346f27f4c79f 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include @@ -1482,6 +1483,12 @@ int __devinit __cpu_up(unsigned int cpu) cpu_set(cpu, smp_commenced_mask); while (!cpu_isset(cpu, cpu_online_map)) cpu_relax(); + +#ifdef CONFIG_X86_GENERICARCH + if (num_online_cpus() > 8 && genapic == &apic_default) + panic("Default flat APIC routing can't be used with > 8 cpus\n"); +#endif + return 0; } diff --git a/arch/x86_64/kernel/early-quirks.c b/arch/x86_64/kernel/early-quirks.c index fb0c6da41b7e..829698f6d049 100644 --- a/arch/x86_64/kernel/early-quirks.c +++ b/arch/x86_64/kernel/early-quirks.c @@ -71,6 +71,18 @@ static void ati_bugs(void) { } +static void intel_bugs(void) +{ + u16 device = read_pci_config_16(0, 0, 0, PCI_DEVICE_ID); + +#ifdef CONFIG_SMP + if (device == PCI_DEVICE_ID_INTEL_E7320_MCH || + device == PCI_DEVICE_ID_INTEL_E7520_MCH || + device == PCI_DEVICE_ID_INTEL_E7525_MCH) + quirk_intel_irqbalance(); +#endif +} + struct chipset { u16 vendor; void (*f)(void); @@ -80,6 +92,7 @@ static struct chipset early_qrk[] = { { PCI_VENDOR_ID_NVIDIA, nvidia_bugs }, { PCI_VENDOR_ID_VIA, via_bugs }, { PCI_VENDOR_ID_ATI, ati_bugs }, + { PCI_VENDOR_ID_INTEL, intel_bugs}, {} }; diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 62c2e747af58..4c161c208d5b 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -60,6 +60,7 @@ #include #include #include +#include /* Number of siblings per CPU package */ int smp_num_siblings = 1; @@ -1167,6 +1168,13 @@ int __cpuinit __cpu_up(unsigned int cpu) while (!cpu_isset(cpu, cpu_online_map)) cpu_relax(); + + if (num_online_cpus() > 8 && genapic == &apic_flat) { + printk(KERN_WARNING + "flat APIC routing can't be used with > 8 cpus\n"); + BUG(); + } + err = 0; return err; diff --git a/include/asm-i386/genapic.h b/include/asm-i386/genapic.h index 8ffbb0f07457..fd2be593b06e 100644 --- a/include/asm-i386/genapic.h +++ b/include/asm-i386/genapic.h @@ -122,6 +122,6 @@ struct genapic { APICFUNC(phys_pkg_id) \ } -extern struct genapic *genapic; +extern struct genapic *genapic, apic_default; #endif diff --git a/include/asm-i386/irq.h b/include/asm-i386/irq.h index 9e15ce0006eb..11761cdaae19 100644 --- a/include/asm-i386/irq.h +++ b/include/asm-i386/irq.h @@ -37,6 +37,8 @@ static __inline__ int irq_canonicalize(int irq) extern int irqbalance_disable(char *str); #endif +extern void quirk_intel_irqbalance(void); + #ifdef CONFIG_HOTPLUG_CPU extern void fixup_irqs(cpumask_t map); #endif diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h index 21fa5afa232e..6d324b838972 100644 --- a/include/asm-x86_64/proto.h +++ b/include/asm-x86_64/proto.h @@ -87,6 +87,7 @@ extern void syscall32_cpu_init(void); extern void setup_node_bootmem(int nodeid, unsigned long start, unsigned long end); extern void early_quirks(void); +extern void quirk_intel_irqbalance(void); extern void check_efer(void); extern int unhandled_signal(struct task_struct *tsk, int sig); -- cgit v1.2.3 From ee58fad51a2a767cb2567706ace967705233d881 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 7 Dec 2006 02:14:11 +0100 Subject: [PATCH] x86-64: x86-64 add Intel BTS cpufeature bit and detection (take 2) Here is a small patch for x86-64 which adds a cpufeature flag and detection code for Intel's Branch Trace Store (BTS) feature. This feature can be found on Intel P4 and Core 2 processors among others. It can also be used by perfmon. changelog: - add CPU_FEATURE_BTS - add Branch Trace Store detection signed-off-by: stephane eranian Signed-off-by: Andi Kleen --- arch/x86_64/kernel/setup.c | 2 ++ include/asm-x86_64/cpufeature.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 619af2e2fa26..a570c81c8316 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -838,6 +838,8 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if (cpu_has_ds) { unsigned int l1, l2; rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); + if (!(l1 & (1<<11))) + set_bit(X86_FEATURE_BTS, c->x86_capability); if (!(l1 & (1<<12))) set_bit(X86_FEATURE_PEBS, c->x86_capability); } diff --git a/include/asm-x86_64/cpufeature.h b/include/asm-x86_64/cpufeature.h index d280384807ef..0b3c686139f1 100644 --- a/include/asm-x86_64/cpufeature.h +++ b/include/asm-x86_64/cpufeature.h @@ -69,6 +69,7 @@ #define X86_FEATURE_UP (3*32+8) /* SMP kernel running on UP */ #define X86_FEATURE_ARCH_PERFMON (3*32+9) /* Intel Architectural PerfMon */ #define X86_FEATURE_PEBS (3*32+10) /* Precise-Event Based Sampling */ +#define X86_FEATURE_BTS (3*32+11) /* Branch Trace Store */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ @@ -115,5 +116,6 @@ #define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLSH) #define cpu_has_ds boot_cpu_has(X86_FEATURE_DS) #define cpu_has_pebs boot_cpu_has(X86_FEATURE_PEBS) +#define cpu_has_bts boot_cpu_has(X86_FEATURE_BTS) #endif /* __ASM_X8664_CPUFEATURE_H */ -- cgit v1.2.3 From f990fff427d68af3e4e1d16fe799c106abc0bf53 Mon Sep 17 00:00:00 2001 From: Karsten Wiese Date: Thu, 7 Dec 2006 02:14:11 +0100 Subject: [PATCH] x86: Regard MSRs in lapic_suspend()/lapic_resume() Read/Write APIC_LVTPC and APIC_LVTTHMR only, if get_maxlvt() returns certain values. This is done like everywhere else in i386/kernel/apic.c, so I guess its correct. Suspends/Resumes to disk fine and eleminates an smp_error_interrupt() here on a K8. AK: ported to x86-64 too Signed-off-by: Karsten Wiese Signed-off-by: Andi Kleen --- arch/i386/kernel/apic.c | 22 ++++++++++++++++++---- arch/x86_64/kernel/apic.c | 22 ++++++++++++++++++---- 2 files changed, 36 insertions(+), 8 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 2fd4b7d927c2..776d9be26af9 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -647,23 +647,30 @@ static struct { static int lapic_suspend(struct sys_device *dev, pm_message_t state) { unsigned long flags; + int maxlvt; if (!apic_pm_state.active) return 0; + maxlvt = get_maxlvt(); + apic_pm_state.apic_id = apic_read(APIC_ID); apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); apic_pm_state.apic_ldr = apic_read(APIC_LDR); apic_pm_state.apic_dfr = apic_read(APIC_DFR); apic_pm_state.apic_spiv = apic_read(APIC_SPIV); apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); - apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); + if (maxlvt >= 4) + apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); apic_pm_state.apic_tmict = apic_read(APIC_TMICT); apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); - apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); +#ifdef CONFIG_X86_MCE_P4THERMAL + if (maxlvt >= 5) + apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); +#endif local_irq_save(flags); disable_local_APIC(); @@ -675,10 +682,13 @@ static int lapic_resume(struct sys_device *dev) { unsigned int l, h; unsigned long flags; + int maxlvt; if (!apic_pm_state.active) return 0; + maxlvt = get_maxlvt(); + local_irq_save(flags); /* @@ -700,8 +710,12 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_SPIV, apic_pm_state.apic_spiv); apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); - apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); - apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); +#ifdef CONFIG_X86_MCE_P4THERMAL + if (maxlvt >= 5) + apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); +#endif + if (maxlvt >= 4) + apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); apic_write(APIC_TMICT, apic_pm_state.apic_tmict); diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index 5c468971e645..f0b00d8731cb 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c @@ -459,23 +459,30 @@ static struct { static int lapic_suspend(struct sys_device *dev, pm_message_t state) { unsigned long flags; + int maxlvt; if (!apic_pm_state.active) return 0; + maxlvt = get_maxlvt(); + apic_pm_state.apic_id = apic_read(APIC_ID); apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); apic_pm_state.apic_ldr = apic_read(APIC_LDR); apic_pm_state.apic_dfr = apic_read(APIC_DFR); apic_pm_state.apic_spiv = apic_read(APIC_SPIV); apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); - apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); + if (maxlvt >= 4) + apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); apic_pm_state.apic_tmict = apic_read(APIC_TMICT); apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); - apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); +#ifdef CONFIG_X86_MCE_INTEL + if (maxlvt >= 5) + apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); +#endif local_irq_save(flags); disable_local_APIC(); local_irq_restore(flags); @@ -486,10 +493,13 @@ static int lapic_resume(struct sys_device *dev) { unsigned int l, h; unsigned long flags; + int maxlvt; if (!apic_pm_state.active) return 0; + maxlvt = get_maxlvt(); + local_irq_save(flags); rdmsr(MSR_IA32_APICBASE, l, h); l &= ~MSR_IA32_APICBASE_BASE; @@ -503,8 +513,12 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_SPIV, apic_pm_state.apic_spiv); apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); - apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); - apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); +#ifdef CONFIG_X86_MCE_INTEL + if (maxlvt >= 5) + apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); +#endif + if (maxlvt >= 4) + apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); apic_write(APIC_TMICT, apic_pm_state.apic_tmict); -- cgit v1.2.3 From 86bd58bf4c383fde4e99b83ce917b091a072040d Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Thu, 7 Dec 2006 02:14:11 +0100 Subject: [PATCH] x86-64: Remove unused GET_APIC_VERSION call from clear_local_APIC Remove unused GET_APIC_VERSION call from clear_local_APIC() and __setup_APIC_LVTT(). Reported by D Binderman . Cc: Andi Kleen Cc: Ingo Molnar Signed-off-by: David Rientjes Signed-off-by: Andi Kleen --- arch/x86_64/kernel/apic.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index f0b00d8731cb..124b2d27b4ac 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c @@ -140,7 +140,6 @@ void clear_local_APIC(void) apic_write(APIC_LVTERR, APIC_LVT_MASKED); if (maxlvt >= 4) apic_write(APIC_LVTPC, APIC_LVT_MASKED); - v = GET_APIC_VERSION(apic_read(APIC_LVR)); apic_write(APIC_ESR, 0); apic_read(APIC_ESR); } @@ -736,10 +735,9 @@ void __init init_apic_mappings(void) static void __setup_APIC_LVTT(unsigned int clocks) { - unsigned int lvtt_value, tmp_value, ver; + unsigned int lvtt_value, tmp_value; int cpu = smp_processor_id(); - ver = GET_APIC_VERSION(apic_read(APIC_LVR)); lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; if (cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) -- cgit v1.2.3 From 0741f4d207a644482d7a040f05cd264c98cf7ee8 Mon Sep 17 00:00:00 2001 From: Chuck Ebbert <76306.1226@compuserve.com> Date: Thu, 7 Dec 2006 02:14:11 +0100 Subject: [PATCH] x86: add sysctl for kstack_depth_to_print Add sysctl for kstack_depth_to_print. This lets users change the amount of raw stack data printed in dump_stack() without having to reboot. Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Signed-off-by: Andi Kleen --- Documentation/sysctl/kernel.txt | 8 ++++++++ arch/i386/kernel/traps.c | 2 +- arch/x86_64/kernel/traps.c | 2 +- include/asm-x86_64/stacktrace.h | 2 ++ kernel/sysctl.c | 9 +++++++++ 5 files changed, 21 insertions(+), 2 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 0bc7f1e3c9e6..5922e84d9133 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -27,6 +27,7 @@ show up in /proc/sys/kernel: - hotplug - java-appletviewer [ binfmt_java, obsolete ] - java-interpreter [ binfmt_java, obsolete ] +- kstack_depth_to_print [ X86 only ] - l2cr [ PPC only ] - modprobe ==> Documentation/kmod.txt - msgmax @@ -170,6 +171,13 @@ This flag controls the L2 cache of G3 processor boards. If ============================================================== +kstack_depth_to_print: (X86 only) + +Controls the number of words to print when dumping the raw +kernel stack. + +============================================================== + osrelease, ostype & version: # cat osrelease diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 7b2f9f022089..1d48a75fa338 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -91,7 +91,7 @@ asmlinkage void alignment_check(void); asmlinkage void spurious_interrupt_bug(void); asmlinkage void machine_check(void); -static int kstack_depth_to_print = 24; +int kstack_depth_to_print = 24; #ifdef CONFIG_STACK_UNWIND static int call_trace = 1; #else diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 264db33476ab..75ceccee178c 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -108,7 +108,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) preempt_enable_no_resched(); } -static int kstack_depth_to_print = 12; +int kstack_depth_to_print = 12; #ifdef CONFIG_STACK_UNWIND static int call_trace = 1; #else diff --git a/include/asm-x86_64/stacktrace.h b/include/asm-x86_64/stacktrace.h index 5eb9799bef76..6f0b54594307 100644 --- a/include/asm-x86_64/stacktrace.h +++ b/include/asm-x86_64/stacktrace.h @@ -1,6 +1,8 @@ #ifndef _ASM_STACKTRACE_H #define _ASM_STACKTRACE_H 1 +extern int kstack_depth_to_print; + /* Generic stack tracer with callbacks */ struct stacktrace_ops { diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 09e569f4792b..6fc5e17086f4 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -54,6 +54,7 @@ extern int proc_nr_files(ctl_table *table, int write, struct file *filp, #ifdef CONFIG_X86 #include +#include #endif #if defined(CONFIG_SYSCTL) @@ -707,6 +708,14 @@ static ctl_table kern_table[] = { .mode = 0444, .proc_handler = &proc_dointvec, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "kstack_depth_to_print", + .data = &kstack_depth_to_print, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, #endif #if defined(CONFIG_MMU) { -- cgit v1.2.3 From 3df0af0eb064a16bbdbe81b46bc72a4089f88d54 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 7 Dec 2006 02:14:12 +0100 Subject: [PATCH] x86_64: clear_bss before set_intr_gate with early_idt_handler idt_table is in the .bss section, so clear_bss need to called at first Signed-off-by: Yinghai Lu Signed-off-by: Andi Kleen --- arch/x86_64/kernel/head64.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c index 9561eb3c5b5c..cc230b93cd1c 100644 --- a/arch/x86_64/kernel/head64.c +++ b/arch/x86_64/kernel/head64.c @@ -57,10 +57,12 @@ void __init x86_64_start_kernel(char * real_mode_data) { int i; - for (i = 0; i < 256; i++) + /* clear bss before set_intr_gate with early_idt_handler */ + clear_bss(); + + for (i = 0; i < IDT_ENTRIES; i++) set_intr_gate(i, early_idt_handler); asm volatile("lidt %0" :: "m" (idt_descr)); - clear_bss(); early_printk("Kernel alive\n"); -- cgit v1.2.3 From 8fb6e5f5db860113e71ce7b854382ed40559395b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 7 Dec 2006 02:14:12 +0100 Subject: [PATCH] x86_64: interrupt array size should be aligned to NR_VECTORS interrupt array is referred for idt vectors instead of NR_IRQS, so change size to NR_VECTORS - FIRST_EXTERNAL_VECTOR. Also change to static. Signed-off-by: Yinghai Lu Signed-off-by: Andi Kleen --- arch/x86_64/kernel/i8259.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c index c4ef801b765b..d73c79e821f1 100644 --- a/arch/x86_64/kernel/i8259.c +++ b/arch/x86_64/kernel/i8259.c @@ -76,7 +76,8 @@ BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf) IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) -void (*interrupt[NR_IRQS])(void) = { +/* for the irq vectors */ +static void (*interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = { IRQLIST_16(0x2), IRQLIST_16(0x3), IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7), IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb), -- cgit v1.2.3 From d4c45718b3c0d3045eab803cd15fabbed1ea5bcd Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 7 Dec 2006 02:14:12 +0100 Subject: [PATCH] x86-64: Fix kobject_init() WARN_ON on resume from disk Make mce_remove_device() clean up the kobject in per_cpu(device_mce, cpu) after it has been unregistered. Signed-off-by: Rafael J. Wysocki Signed-off-by: Andi Kleen --- arch/x86_64/kernel/mce.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index bbea88801d88..a7440cb9c14f 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -652,6 +652,7 @@ static void mce_remove_device(unsigned int cpu) sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_tolerant); sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_check_interval); sysdev_unregister(&per_cpu(device_mce,cpu)); + memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject)); } /* Get notified when a cpu comes on/off. Be hotplug friendly. */ -- cgit v1.2.3 From 73ad8355d7db6a3cdcf313d4a4586a8f81b19c2f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 7 Dec 2006 02:14:12 +0100 Subject: [PATCH] x86-64: remove unused acpi_found_madt in mparse. remove unused acpi_found_madt in mparse.c Signed-off-by: Yinghai Lu Signed-off-by: Andi Kleen --- arch/x86_64/kernel/mpparse.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index b147ab19fbd4..08072568847d 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c @@ -35,8 +35,6 @@ int smp_found_config; unsigned int __initdata maxcpus = NR_CPUS; -int acpi_found_madt; - /* * Various Linux-internal data structures created from the * MP-table. -- cgit v1.2.3 From 616779656989cb8c59177e35cb13e87028b1edc8 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:12 +0100 Subject: [PATCH] x86-64: Synchronize RDTSC on single core AMD There is no guarantee that two RDTSCs in a row are monotonic, so don't assume it on single core AMD systems. This will make gettimeofday slower again Signed-off-by: Andi Kleen --- arch/x86_64/kernel/setup.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index a570c81c8316..05eaca41802b 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -732,11 +732,8 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) /* Fix cpuid4 emulation for more */ num_cache_leaves = 3; - /* When there is only one core no need to synchronize RDTSC */ - if (num_possible_cpus() == 1) - set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); - else - clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); + /* RDTSC can be speculated around */ + clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); } static void __cpuinit detect_ht(struct cpuinfo_x86 *c) -- cgit v1.2.3 From e496a0da7f8110054d0ad4039245b0f49f9540f5 Mon Sep 17 00:00:00 2001 From: Muli Ben-Yehuda Date: Thu, 7 Dec 2006 02:14:12 +0100 Subject: [PATCH] Calgary: remove unused variables Spotted by d binderman . Signed-off-by: Muli Ben-Yehuda Signed-off-by: Andi Kleen --- arch/x86_64/kernel/pci-calgary.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c index 0ddf29dae7e0..3215675ab128 100644 --- a/arch/x86_64/kernel/pci-calgary.c +++ b/arch/x86_64/kernel/pci-calgary.c @@ -653,14 +653,9 @@ static void __init calgary_reserve_peripheral_mem_2(struct pci_dev *dev) static void __init calgary_reserve_regions(struct pci_dev *dev) { unsigned int npages; - void __iomem *bbar; - unsigned char busnum; u64 start; struct iommu_table *tbl = dev->sysdata; - bbar = tbl->bbar; - busnum = dev->bus->number; - /* reserve bad_dma_address in case it's a legal address */ iommu_range_reserve(tbl, bad_dma_address, 1); -- cgit v1.2.3 From f3d73707a1e84f0687a05144b70b660441e999c7 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Thu, 7 Dec 2006 02:14:12 +0100 Subject: [PATCH] x86-64: Mark rdtsc as sync only for netburst, not for core2 On the Core2 cpus, the rdtsc instruction is not serializing (as defined in the architecture reference since rdtsc exists) and due to the deep speculation of these cores, it's possible that you can observe time go backwards between cores due to this speculation. Since the kernel already deals with this with the SYNC_RDTSC flag, the solution is simple, only assume that the instruction is serializing on family 15... The price one pays for this is a slightly slower gettimeofday (by a dozen or two cycles), but that increase is quite small to pay for a really-going-forward tsc counter. Signed-off-by: Arjan van de Ven Signed-off-by: Andi Kleen --- arch/x86_64/kernel/setup.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 05eaca41802b..6595a4ebe7f1 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -860,7 +860,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); if (c->x86 == 6) set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); - set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); + if (c->x86 == 15) + set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); + else + clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); c->x86_max_cores = intel_num_cpu_cores(c); srat_detect_node(); -- cgit v1.2.3 From 446f713ba1afd68568139ae4691335ba273fa7f4 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:12 +0100 Subject: [PATCH] unwinder: always use unlocked module list access in unwinder fallback We're already well protected against module unloads because module unload uses stop_machine(). The only exception is NMIs, but other users already risk lockless accesses here. This avoids some hackery in lockdep and also a potential deadlock This matches what i386 does. Signed-off-by: Andi Kleen --- arch/x86_64/kernel/traps.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 75ceccee178c..9864d195c408 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -317,9 +317,9 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, #define HANDLE_STACK(cond) \ do while (cond) { \ unsigned long addr = *stack++; \ - if (oops_in_progress ? \ - __kernel_text_address(addr) : \ - kernel_text_address(addr)) { \ + /* Use unlocked access here because except for NMIs \ + we should be already protected against module unloads */ \ + if (__kernel_text_address(addr)) { \ /* \ * If the address is either in the text segment of the \ * kernel, or in the region which contains vmalloc'ed \ -- cgit v1.2.3 From 359ad0d4015a9ab39243f2ebc4eb07915bd618b2 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 7 Dec 2006 02:14:13 +0100 Subject: [PATCH] unwinder: more sanity checks in Dwarf2 unwinder Tighten the requirements on both input to and output from the Dwarf2 unwinder. Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen --- arch/i386/kernel/traps.c | 7 +++++++ arch/x86_64/kernel/traps.c | 7 +++++++ include/asm-i386/unwind.h | 12 ++++-------- include/asm-x86_64/unwind.h | 8 ++------ kernel/unwind.c | 16 +++++++++++++++- 5 files changed, 35 insertions(+), 15 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 86d8476be4fe..c447807e2a45 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -161,12 +161,19 @@ dump_trace_unwind(struct unwind_frame_info *info, void *data) { struct ops_and_data *oad = (struct ops_and_data *)data; int n = 0; + unsigned long sp = UNW_SP(info); + if (arch_unw_user_mode(info)) + return -1; while (unwind(info) == 0 && UNW_PC(info)) { n++; oad->ops->address(oad->data, UNW_PC(info)); if (arch_unw_user_mode(info)) break; + if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1)) + && sp > UNW_SP(info)) + break; + sp = UNW_SP(info); } return n; } diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 9864d195c408..4fdd162f0bef 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -225,12 +225,19 @@ static int dump_trace_unwind(struct unwind_frame_info *info, void *context) { struct ops_and_data *oad = (struct ops_and_data *)context; int n = 0; + unsigned long sp = UNW_SP(info); + if (arch_unw_user_mode(info)) + return -1; while (unwind(info) == 0 && UNW_PC(info)) { n++; oad->ops->address(oad->data, UNW_PC(info)); if (arch_unw_user_mode(info)) break; + if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1)) + && sp > UNW_SP(info)) + break; + sp = UNW_SP(info); } return n; } diff --git a/include/asm-i386/unwind.h b/include/asm-i386/unwind.h index 601fc67bd775..aa2c931e30db 100644 --- a/include/asm-i386/unwind.h +++ b/include/asm-i386/unwind.h @@ -79,17 +79,13 @@ extern asmlinkage int arch_unwind_init_running(struct unwind_frame_info *, void *arg), void *arg); -static inline int arch_unw_user_mode(const struct unwind_frame_info *info) +static inline int arch_unw_user_mode(/*const*/ struct unwind_frame_info *info) { -#if 0 /* This can only work when selector register and EFLAGS saves/restores - are properly annotated (and tracked in UNW_REGISTER_INFO). */ - return user_mode_vm(&info->regs); -#else - return info->regs.eip < PAGE_OFFSET + return user_mode_vm(&info->regs) + || info->regs.eip < PAGE_OFFSET || (info->regs.eip >= __fix_to_virt(FIX_VDSO) - && info->regs.eip < __fix_to_virt(FIX_VDSO) + PAGE_SIZE) + && info->regs.eip < __fix_to_virt(FIX_VDSO) + PAGE_SIZE) || info->regs.esp < PAGE_OFFSET; -#endif } #else diff --git a/include/asm-x86_64/unwind.h b/include/asm-x86_64/unwind.h index 2e7ff10fd775..2f6349e48717 100644 --- a/include/asm-x86_64/unwind.h +++ b/include/asm-x86_64/unwind.h @@ -87,14 +87,10 @@ extern int arch_unwind_init_running(struct unwind_frame_info *, static inline int arch_unw_user_mode(const struct unwind_frame_info *info) { -#if 0 /* This can only work when selector register saves/restores - are properly annotated (and tracked in UNW_REGISTER_INFO). */ - return user_mode(&info->regs); -#else - return (long)info->regs.rip >= 0 + return user_mode(&info->regs) + || (long)info->regs.rip >= 0 || (info->regs.rip >= VSYSCALL_START && info->regs.rip < VSYSCALL_END) || (long)info->regs.rsp >= 0; -#endif } #else diff --git a/kernel/unwind.c b/kernel/unwind.c index af48168a3afb..7e721f104105 100644 --- a/kernel/unwind.c +++ b/kernel/unwind.c @@ -95,6 +95,7 @@ static const struct { typedef unsigned long uleb128_t; typedef signed long sleb128_t; +#define sleb128abs __builtin_labs static struct unwind_table { struct { @@ -787,7 +788,7 @@ int unwind(struct unwind_frame_info *frame) #define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs]) const u32 *fde = NULL, *cie = NULL; const u8 *ptr = NULL, *end = NULL; - unsigned long pc = UNW_PC(frame) - frame->call_frame; + unsigned long pc = UNW_PC(frame) - frame->call_frame, sp; unsigned long startLoc = 0, endLoc = 0, cfa; unsigned i; signed ptrType = -1; @@ -936,6 +937,9 @@ int unwind(struct unwind_frame_info *frame) state.dataAlign = get_sleb128(&ptr, end); if (state.codeAlign == 0 || state.dataAlign == 0 || ptr >= end) cie = NULL; + else if (UNW_PC(frame) % state.codeAlign + || UNW_SP(frame) % sleb128abs(state.dataAlign)) + return -EPERM; else { retAddrReg = state.version <= 1 ? *ptr++ : get_uleb128(&ptr, end); /* skip augmentation */ @@ -968,6 +972,8 @@ int unwind(struct unwind_frame_info *frame) #ifdef CONFIG_FRAME_POINTER unsigned long top, bottom; + if ((UNW_SP(frame) | UNW_FP(frame)) % sizeof(unsigned long)) + return -EPERM; top = STACK_TOP(frame->task); bottom = STACK_BOTTOM(frame->task); # if FRAME_RETADDR_OFFSET < 0 @@ -1018,6 +1024,7 @@ int unwind(struct unwind_frame_info *frame) || state.regs[retAddrReg].where == Nowhere || state.cfa.reg >= ARRAY_SIZE(reg_info) || reg_info[state.cfa.reg].width != sizeof(unsigned long) + || FRAME_REG(state.cfa.reg, unsigned long) % sizeof(unsigned long) || state.cfa.offs % sizeof(unsigned long)) return -EIO; /* update frame */ @@ -1038,6 +1045,8 @@ int unwind(struct unwind_frame_info *frame) #else # define CASES CASE(8); CASE(16); CASE(32); CASE(64) #endif + pc = UNW_PC(frame); + sp = UNW_SP(frame); for (i = 0; i < ARRAY_SIZE(state.regs); ++i) { if (REG_INVALID(i)) { if (state.regs[i].where == Nowhere) @@ -1118,6 +1127,11 @@ int unwind(struct unwind_frame_info *frame) } } + if (UNW_PC(frame) % state.codeAlign + || UNW_SP(frame) % sleb128abs(state.dataAlign) + || (pc == UNW_PC(frame) && sp == UNW_SP(frame))) + return -EIO; + return 0; #undef CASES #undef FRAME_REG -- cgit v1.2.3 From a0429d0d7a6116dedcb71d9128da904bf135f189 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:13 +0100 Subject: [PATCH] x86-64: Remove unwind stack pointer alignment forcing again This was added as a workaround for the fallback unwinder not supporting unaligned stack pointers properly. But now it was fixed to do that, so it's not needed anymore Cc: mingo@elte.hu Signed-off-by: Andi Kleen --- arch/x86_64/kernel/traps.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 4fdd162f0bef..a1641ffdffcf 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -309,12 +309,6 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, if (tsk && tsk != current) stack = (unsigned long *)tsk->thread.rsp; } - /* - * Align the stack pointer on word boundary, later loops - * rely on that (and corruption / debug info bugs can cause - * unaligned values here): - */ - stack = (unsigned long *)((unsigned long)stack & ~(sizeof(long)-1)); /* * Print function call entries within a stack. 'cond' is the -- cgit v1.2.3 From d331e739f5ad2aaa9d8553891ba6ca823bdbce37 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Thu, 7 Dec 2006 02:14:13 +0100 Subject: [PATCH] x86-64: Fix interrupt race in idle callback (3rd try) Idle callbacks has some races when enter_idle() sets isidle and subsequent interrupts that can happen on that CPU, before CPU goes to idle. Due to this, an IDLE_END can get called before IDLE_START. To avoid these races, disable interrupts before enter_idle and make sure that all idle routines do not enable interrupts before entering idle. Note that poll_idle() still has a this race as it has to enable interrupts before going to idle. But, all other idle routines have the race fixed. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Andi Kleen --- arch/x86_64/kernel/process.c | 19 +++++++++++++++++-- include/asm-x86_64/processor.h | 8 ++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 0b7b4caa4f74..a418ee4c8c62 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -127,6 +127,7 @@ static void default_idle(void) */ static void poll_idle (void) { + local_irq_enable(); cpu_relax(); } @@ -208,6 +209,12 @@ void cpu_idle (void) idle = default_idle; if (cpu_is_offline(smp_processor_id())) play_dead(); + /* + * Idle routines should keep interrupts disabled + * from here on, until they go to idle. + * Otherwise, idle callbacks can misfire. + */ + local_irq_disable(); enter_idle(); idle(); /* In many cases the interrupt that ended idle @@ -245,8 +252,16 @@ void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) /* Default MONITOR/MWAIT with no hints, used for default C1 state */ static void mwait_idle(void) { - local_irq_enable(); - mwait_idle_with_hints(0,0); + if (!need_resched()) { + __monitor((void *)¤t_thread_info()->flags, 0, 0); + smp_mb(); + if (!need_resched()) + __sti_mwait(0, 0); + else + local_irq_enable(); + } else { + local_irq_enable(); + } } void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h index cef17e0f828c..76552d72804c 100644 --- a/include/asm-x86_64/processor.h +++ b/include/asm-x86_64/processor.h @@ -475,6 +475,14 @@ static inline void __mwait(unsigned long eax, unsigned long ecx) : :"a" (eax), "c" (ecx)); } +static inline void __sti_mwait(unsigned long eax, unsigned long ecx) +{ + /* "mwait %eax,%ecx;" */ + asm volatile( + "sti; .byte 0x0f,0x01,0xc9;" + : :"a" (eax), "c" (ecx)); +} + extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx); #define stack_current() \ -- cgit v1.2.3 From 3807fd46e94ab9f09e5ee3bff5e6515a94e9b3c7 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:13 +0100 Subject: [PATCH] x86-64: Clarify error message in GART code - Remove "Disabling IOMMU" message because it confuses people - Clarify that the GART IOMMU is refered to in other message Signed-off-by: Andi Kleen --- arch/x86_64/kernel/pci-gart.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c index 16261a8a3303..fc1960f1f243 100644 --- a/arch/x86_64/kernel/pci-gart.c +++ b/arch/x86_64/kernel/pci-gart.c @@ -601,10 +601,9 @@ void __init gart_iommu_init(void) (!force_iommu && end_pfn <= MAX_DMA32_PFN) || !iommu_aperture || (no_agp && init_k8_gatt(&info) < 0)) { - printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n"); if (end_pfn > MAX_DMA32_PFN) { printk(KERN_ERR "WARNING more than 4GB of memory " - "but IOMMU not available.\n" + "but GART IOMMU not available.\n" KERN_ERR "WARNING 32bit PCI may malfunction.\n"); } return; -- cgit v1.2.3 From ad892f5e0d01f3c3b475a688d1ddc211cf3ea56d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 7 Dec 2006 02:14:19 +0100 Subject: [PATCH] x86-64: check vector in setup_ioapic_dest to verify if need setup_IO_APIC_irq setup_IO_APIC_irqs could fail to get vector for some device when you have too many devices, because at that time only boot cpu is online. So check vector for irq in setup_ioapic_dest and call setup_IO_APIC_irq to make sure IO-APIC irq-routing table is initialized. Also seperate setup_IO_APIC_irq from setup_IO_APIC_irqs. Signed-off-by: Yinghai Lu Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: "Eric W. Biederman" Cc: Ingo Molnar Signed-off-by: Andrew Morton --- arch/x86_64/kernel/io_apic.c | 104 +++++++++++++++++++++++++------------------ 1 file changed, 61 insertions(+), 43 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index eaf0b708e67d..2a1dcd5f69c2 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -789,27 +789,65 @@ static void ioapic_register_intr(int irq, int vector, unsigned long trigger) handle_edge_irq, "edge"); } } - -static void __init setup_IO_APIC_irqs(void) +static void __init setup_IO_APIC_irq(int apic, int pin, int idx, int irq) { struct IO_APIC_route_entry entry; - int apic, pin, idx, irq, first_notcon = 1, vector; + int vector; unsigned long flags; - apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); - for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + /* + * add it to the IO-APIC irq-routing table: + */ + memset(&entry,0,sizeof(entry)); - /* - * add it to the IO-APIC irq-routing table: - */ - memset(&entry,0,sizeof(entry)); + entry.delivery_mode = INT_DELIVERY_MODE; + entry.dest_mode = INT_DEST_MODE; + entry.mask = 0; /* enable IRQ */ + entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); + + entry.trigger = irq_trigger(idx); + entry.polarity = irq_polarity(idx); - entry.delivery_mode = INT_DELIVERY_MODE; - entry.dest_mode = INT_DEST_MODE; - entry.mask = 0; /* enable IRQ */ + if (irq_trigger(idx)) { + entry.trigger = 1; + entry.mask = 1; entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); + } + + if (!apic && !IO_APIC_IRQ(irq)) + return; + + if (IO_APIC_IRQ(irq)) { + cpumask_t mask; + vector = assign_irq_vector(irq, TARGET_CPUS, &mask); + if (vector < 0) + return; + + entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask); + entry.vector = vector; + + ioapic_register_intr(irq, vector, IOAPIC_AUTO); + if (!apic && (irq < 16)) + disable_8259A_irq(irq); + } + + ioapic_write_entry(apic, pin, entry); + + spin_lock_irqsave(&ioapic_lock, flags); + set_native_irq_info(irq, TARGET_CPUS); + spin_unlock_irqrestore(&ioapic_lock, flags); + +} + +static void __init setup_IO_APIC_irqs(void) +{ + int apic, pin, idx, irq, first_notcon = 1; + + apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); + + for (apic = 0; apic < nr_ioapics; apic++) { + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { idx = find_irq_entry(apic,pin,mp_INT); if (idx == -1) { @@ -821,39 +859,11 @@ static void __init setup_IO_APIC_irqs(void) continue; } - entry.trigger = irq_trigger(idx); - entry.polarity = irq_polarity(idx); - - if (irq_trigger(idx)) { - entry.trigger = 1; - entry.mask = 1; - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); - } - irq = pin_2_irq(idx, apic, pin); add_pin_to_irq(irq, apic, pin); - if (!apic && !IO_APIC_IRQ(irq)) - continue; + setup_IO_APIC_irq(apic, pin, idx, irq); - if (IO_APIC_IRQ(irq)) { - cpumask_t mask; - vector = assign_irq_vector(irq, TARGET_CPUS, &mask); - if (vector < 0) - continue; - - entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask); - entry.vector = vector; - - ioapic_register_intr(irq, vector, IOAPIC_AUTO); - if (!apic && (irq < 16)) - disable_8259A_irq(irq); - } - ioapic_write_entry(apic, pin, entry); - - spin_lock_irqsave(&ioapic_lock, flags); - set_native_irq_info(irq, TARGET_CPUS); - spin_unlock_irqrestore(&ioapic_lock, flags); } } @@ -2139,7 +2149,15 @@ void __init setup_ioapic_dest(void) if (irq_entry == -1) continue; irq = pin_2_irq(irq_entry, ioapic, pin); - set_ioapic_affinity_irq(irq, TARGET_CPUS); + + /* setup_IO_APIC_irqs could fail to get vector for some device + * when you have too many devices, because at that time only boot + * cpu is online. + */ + if(!irq_vector[irq]) + setup_IO_APIC_irq(ioapic, pin, irq_entry, irq); + else + set_ioapic_affinity_irq(irq, TARGET_CPUS); } } -- cgit v1.2.3 From b65780e123ba9b762276482bbfb52836e4d41fd9 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 7 Dec 2006 02:14:19 +0100 Subject: [PATCH] unwinder: move .eh_frame to RODATA The .eh_frame section contents is never written to, so it can as well benefit from CONFIG_DEBUG_RODATA. Diff-ed against firstfloor tree. Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen --- arch/i386/kernel/vmlinux.lds.S | 9 --------- arch/x86_64/kernel/vmlinux.lds.S | 9 --------- include/asm-generic/vmlinux.lds.h | 17 ++++++++++++----- kernel/unwind.c | 2 +- 4 files changed, 13 insertions(+), 24 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index 25581e87c60d..56e6ad5cb045 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -102,15 +102,6 @@ SECTIONS _edata = .; /* End of data section */ } -#ifdef CONFIG_STACK_UNWIND - . = ALIGN(4); - .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) { - __start_unwind = .; - *(.eh_frame) - __end_unwind = .; - } -#endif - . = ALIGN(THREAD_SIZE); /* init_task */ .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { *(.data.init_task) diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index d9534e750d4f..6a1f8f491e5d 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S @@ -51,15 +51,6 @@ SECTIONS RODATA -#ifdef CONFIG_STACK_UNWIND - . = ALIGN(8); - .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) { - __start_unwind = .; - *(.eh_frame) - __end_unwind = .; - } -#endif - . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */ /* Data */ .data : AT(ADDR(.data) - LOAD_OFFSET) { diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 9f4747780dac..4d4c62d11059 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -119,8 +119,7 @@ *(__ksymtab_strings) \ } \ \ - /* Unwind data binary search table */ \ - EH_FRAME_HDR \ + EH_FRAME \ \ /* Built-in module parameters. */ \ __param : AT(ADDR(__param) - LOAD_OFFSET) { \ @@ -162,15 +161,23 @@ VMLINUX_SYMBOL(__kprobes_text_end) = .; #ifdef CONFIG_STACK_UNWIND - /* Unwind data binary search table */ -#define EH_FRAME_HDR \ +#define EH_FRAME \ + /* Unwind data binary search table */ \ + . = ALIGN(8); \ .eh_frame_hdr : AT(ADDR(.eh_frame_hdr) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start_unwind_hdr) = .; \ *(.eh_frame_hdr) \ VMLINUX_SYMBOL(__end_unwind_hdr) = .; \ + } \ + /* Unwind data */ \ + . = ALIGN(8); \ + .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__start_unwind) = .; \ + *(.eh_frame) \ + VMLINUX_SYMBOL(__end_unwind) = .; \ } #else -#define EH_FRAME_HDR +#define EH_FRAME #endif /* DWARF debug sections. diff --git a/kernel/unwind.c b/kernel/unwind.c index 08645aa7c2d6..09c261329249 100644 --- a/kernel/unwind.c +++ b/kernel/unwind.c @@ -19,7 +19,7 @@ #include #include -extern char __start_unwind[], __end_unwind[]; +extern const char __start_unwind[], __end_unwind[]; extern const u8 __start_unwind_hdr[], __end_unwind_hdr[]; #define MAX_STACK_DEPTH 8 -- cgit v1.2.3 From 64a26a731235b59c9d73bbe82c1f896d57400d37 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:19 +0100 Subject: [PATCH] x86-64: Export smp_call_function_single smp_call_function() is exported, makes sense to export this one too. Signed-off-by: Andi Kleen --- arch/x86_64/kernel/smp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c index 9f74c883568c..1bf0e094f439 100644 --- a/arch/x86_64/kernel/smp.c +++ b/arch/x86_64/kernel/smp.c @@ -385,6 +385,7 @@ int smp_call_function_single (int cpu, void (*func) (void *info), void *info, put_cpu(); return 0; } +EXPORT_SYMBOL(smp_call_function_single); /* * this function sends a 'generic call function' IPI to all other CPUs -- cgit v1.2.3