From 4be44fcd3bf648b782f4460fd06dfae6c42ded4b Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 5 Aug 2005 00:44:28 -0400 Subject: [ACPI] Lindent all ACPI files Signed-off-by: Len Brown --- arch/x86_64/kernel/acpi/sleep.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/acpi/sleep.c b/arch/x86_64/kernel/acpi/sleep.c index 7a275de6df22..148f6f7ea315 100644 --- a/arch/x86_64/kernel/acpi/sleep.c +++ b/arch/x86_64/kernel/acpi/sleep.c @@ -47,7 +47,6 @@ #include #include - /* -------------------------------------------------------------------------- Low-Level Sleep Support -------------------------------------------------------------------------- */ @@ -77,11 +76,12 @@ static void init_low_mapping(void) * Create an identity mapped page table and copy the wakeup routine to * low memory. */ -int acpi_save_state_mem (void) +int acpi_save_state_mem(void) { init_low_mapping(); - memcpy((void *) acpi_wakeup_address, &wakeup_start, &wakeup_end - &wakeup_start); + memcpy((void *)acpi_wakeup_address, &wakeup_start, + &wakeup_end - &wakeup_start); acpi_copy_wakeup_routine(acpi_wakeup_address); return 0; @@ -90,7 +90,7 @@ int acpi_save_state_mem (void) /* * acpi_restore_state */ -void acpi_restore_state_mem (void) +void acpi_restore_state_mem(void) { set_pgd(pgd_offset(current->mm, 0UL), low_ptr); flush_tlb_all(); @@ -108,7 +108,8 @@ void __init acpi_reserve_bootmem(void) { acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE); if ((&wakeup_end - &wakeup_start) > PAGE_SIZE) - printk(KERN_CRIT "ACPI: Wakeup code way too big, will crash on attempt to suspend\n"); + printk(KERN_CRIT + "ACPI: Wakeup code way too big, will crash on attempt to suspend\n"); } static int __init acpi_sleep_setup(char *str) @@ -127,6 +128,8 @@ static int __init acpi_sleep_setup(char *str) __setup("acpi_sleep=", acpi_sleep_setup); -#endif /*CONFIG_ACPI_SLEEP*/ +#endif /*CONFIG_ACPI_SLEEP */ -void acpi_pci_link_exit(void) {} +void acpi_pci_link_exit(void) +{ +} -- cgit v1.2.1 From 888ba6c62bc61a995d283977eb3a6cbafd6f4ac6 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 24 Aug 2005 12:07:20 -0400 Subject: [ACPI] delete CONFIG_ACPI_BOOT it has been a synonym for CONFIG_ACPI since 2.6.12 Signed-off-by: Len Brown --- arch/x86_64/kernel/Makefile | 2 +- arch/x86_64/kernel/acpi/Makefile | 4 ++-- arch/x86_64/kernel/io_apic.c | 4 ++-- arch/x86_64/kernel/mpparse.c | 10 ++++------ arch/x86_64/kernel/setup.c | 8 ++++---- 5 files changed, 13 insertions(+), 15 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index 48f9e2c19cd6..0296ca6cbfa3 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile @@ -12,7 +12,7 @@ obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ obj-$(CONFIG_X86_MCE) += mce.o obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o obj-$(CONFIG_MTRR) += ../../i386/kernel/cpu/mtrr/ -obj-$(CONFIG_ACPI_BOOT) += acpi/ +obj-$(CONFIG_ACPI) += acpi/ obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_MICROCODE) += microcode.o obj-$(CONFIG_X86_CPUID) += cpuid.o diff --git a/arch/x86_64/kernel/acpi/Makefile b/arch/x86_64/kernel/acpi/Makefile index d2c2ee5f9a88..7da9ace890bd 100644 --- a/arch/x86_64/kernel/acpi/Makefile +++ b/arch/x86_64/kernel/acpi/Makefile @@ -1,3 +1,3 @@ -obj-$(CONFIG_ACPI_BOOT) := boot.o -boot-$(CONFIG_ACPI_BOOT) := ../../../i386/kernel/acpi/boot.o +obj-y := boot.o +boot-y := ../../../i386/kernel/acpi/boot.o obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index d206d7e49cf5..1013a8fe44d7 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -1841,7 +1841,7 @@ device_initcall(ioapic_init_sysfs); ACPI-based IOAPIC Configuration -------------------------------------------------------------------------- */ -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI #define IO_APIC_MAX_ID 0xFE @@ -1923,7 +1923,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a return 0; } -#endif /*CONFIG_ACPI_BOOT*/ +#endif /* CONFIG_ACPI */ /* diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index 79c362d03e2e..86445f320d86 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c @@ -74,7 +74,7 @@ static unsigned int num_processors = 0; physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE; /* ACPI MADT entry parsing functions */ -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI extern struct acpi_boot_flags acpi_boot; #ifdef CONFIG_X86_LOCAL_APIC extern int acpi_parse_lapic (acpi_table_entry_header *header); @@ -84,7 +84,7 @@ extern int acpi_parse_lapic_nmi (acpi_table_entry_header *header); #ifdef CONFIG_X86_IO_APIC extern int acpi_parse_ioapic (acpi_table_entry_header *header); #endif /*CONFIG_X86_IO_APIC*/ -#endif /*CONFIG_ACPI_BOOT*/ +#endif /*CONFIG_ACPI*/ u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; @@ -519,8 +519,6 @@ void __init get_smp_config (void) struct intel_mp_floating *mpf = mpf_found; /* - * ACPI may be used to obtain the entire SMP configuration or just to - * enumerate/configure processors (CONFIG_ACPI_BOOT). Note that * ACPI supports both logical (e.g. Hyper-Threading) and physical * processors, where MPS only supports physical. */ @@ -673,7 +671,7 @@ void __init find_smp_config (void) ACPI-based MP Configuration -------------------------------------------------------------------------- */ -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI void __init mp_register_lapic_address ( u64 address) @@ -994,4 +992,4 @@ int mp_register_gsi(u32 gsi, int edge_level, int active_high_low) } #endif /*CONFIG_X86_IO_APIC*/ -#endif /*CONFIG_ACPI_BOOT*/ +#endif /*CONFIG_ACPI*/ diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 116a491e2961..cfcebc8ab7b4 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -71,7 +71,7 @@ unsigned long mmu_cr4_features; int acpi_disabled; EXPORT_SYMBOL(acpi_disabled); -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI extern int __initdata acpi_ht; extern acpi_interrupt_flags acpi_sci_flags; int __initdata acpi_force = 0; @@ -294,7 +294,7 @@ static __init void parse_cmdline_early (char ** cmdline_p) maxcpus = simple_strtoul(from + 8, NULL, 0); } #endif -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI /* "acpi=off" disables both ACPI table parsing and interpreter init */ if (!memcmp(from, "acpi=off", 8)) disable_acpi(); @@ -566,7 +566,7 @@ void __init setup_arch(char **cmdline_p) init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT)); -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI /* * Initialize the ACPI boot-time table parser (gets the RSDP and SDT). * Call this early for SRAT node setup. @@ -658,7 +658,7 @@ void __init setup_arch(char **cmdline_p) check_ioapic(); -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI /* * Read APIC and some other early information from ACPI tables. */ -- cgit v1.2.1 From 76f58584824c61eb5b3bdbf019236815921d2e7c Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 24 Aug 2005 12:10:49 -0400 Subject: [ACPI] delete CONFIG_ACPI_BUS it is a synonym for CONFIG_ACPI Signed-off-by: Len Brown --- arch/x86_64/kernel/genapic.c | 4 ++-- arch/x86_64/kernel/mpparse.c | 4 ---- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/genapic.c b/arch/x86_64/kernel/genapic.c index 30c843a5efdd..f031358906f5 100644 --- a/arch/x86_64/kernel/genapic.c +++ b/arch/x86_64/kernel/genapic.c @@ -20,7 +20,7 @@ #include #include -#if defined(CONFIG_ACPI_BUS) +#if defined(CONFIG_ACPI) #include #endif @@ -47,7 +47,7 @@ void __init clustered_apic_check(void) u8 cluster_cnt[NUM_APIC_CLUSTERS]; int num_cpus = 0; -#if defined(CONFIG_ACPI_BUS) +#if defined(CONFIG_ACPI) /* * Some x86_64 machines use physical APIC mode regardless of how many * procs/clusters are present (x86_64 ES7000 is an example). diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index 86445f320d86..8d8ed6ae1d0c 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c @@ -927,11 +927,9 @@ int mp_register_gsi(u32 gsi, int edge_level, int active_high_low) if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) return gsi; -#ifdef CONFIG_ACPI_BUS /* Don't set up the ACPI SCI because it's already set up */ if (acpi_fadt.sci_int == gsi) return gsi; -#endif ioapic = mp_find_ioapic(gsi); if (ioapic < 0) { @@ -971,13 +969,11 @@ int mp_register_gsi(u32 gsi, int edge_level, int active_high_low) if (gsi < MAX_GSI_NUM) { if (gsi > 15) gsi = pci_irq++; -#ifdef CONFIG_ACPI_BUS /* * Don't assign IRQ used by ACPI SCI */ if (gsi == acpi_fadt.sci_int) gsi = pci_irq++; -#endif gsi_to_irq[irq] = gsi; } else { printk(KERN_ERR "GSI %u is too high\n", gsi); -- cgit v1.2.1 From 54d5d42404e7705cf3804593189e963350d470e5 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Tue, 6 Sep 2005 15:16:15 -0700 Subject: [PATCH] x86/x86_64: deferred handling of writes to /proc/irqxx/smp_affinity When handling writes to /proc/irq, current code is re-programming rte entries directly. This is not recommended and could potentially cause chipset's to lockup, or cause missing interrupts. CONFIG_IRQ_BALANCE does this correctly, where it re-programs only when the interrupt is pending. The same needs to be done for /proc/irq handling as well. Otherwise user space irq balancers are really not doing the right thing. - Changed pending_irq_balance_cpumask to pending_irq_migrate_cpumask for lack of a generic name. - added move_irq out of IRQ_BALANCE, and added this same to X86_64 - Added new proc handler for write, so we can do deferred write at irq handling time. - Display of /proc/irq/XX/smp_affinity used to display CPU_MASKALL, instead it now shows only active cpu masks, or exactly what was set. - Provided a common move_irq implementation, instead of duplicating when using generic irq framework. Tested on i386/x86_64 and ia64 with CONFIG_PCI_MSI turned on and off. Tested UP builds as well. MSI testing: tbd: I have cards, need to look for a x-over cable, although I did test an earlier version of this patch. Will test in a couple days. Signed-off-by: Ashok Raj Acked-by: Zwane Mwaikambo Grudgingly-acked-by: Andi Kleen Signed-off-by: Coywolf Qi Hunt Signed-off-by: Ashok Raj Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/io_apic.c | 102 +++++++++++++++++++++++++++---------------- 1 file changed, 64 insertions(+), 38 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index d206d7e49cf5..76bcc4e6979d 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -78,6 +78,54 @@ int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1}; #define vector_to_irq(vector) (vector) #endif +#define __DO_ACTION(R, ACTION, FINAL) \ + \ +{ \ + int pin; \ + struct irq_pin_list *entry = irq_2_pin + irq; \ + \ + for (;;) { \ + unsigned int reg; \ + pin = entry->pin; \ + if (pin == -1) \ + break; \ + reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ + reg ACTION; \ + io_apic_modify(entry->apic, reg); \ + if (!entry->next) \ + break; \ + entry = irq_2_pin + entry->next; \ + } \ + FINAL; \ +} + +#ifdef CONFIG_SMP +static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +{ + unsigned long flags; + unsigned int dest; + cpumask_t tmp; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + tmp = TARGET_CPUS; + + cpus_and(mask, tmp, CPU_MASK_ALL); + + dest = cpu_mask_to_apicid(mask); + + /* + * Only the high 8 bits are valid. + */ + dest = SET_APIC_LOGICAL_ID(dest); + + spin_lock_irqsave(&ioapic_lock, flags); + __DO_ACTION(1, = dest, ) + set_irq_info(irq, mask); + spin_unlock_irqrestore(&ioapic_lock, flags); +} +#endif + /* * The common case is 1:1 IRQ<->pin mappings. Sometimes there are * shared ISA-space IRQs, so we have to support them. We are super @@ -101,26 +149,6 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) entry->pin = pin; } -#define __DO_ACTION(R, ACTION, FINAL) \ - \ -{ \ - int pin; \ - struct irq_pin_list *entry = irq_2_pin + irq; \ - \ - for (;;) { \ - unsigned int reg; \ - pin = entry->pin; \ - if (pin == -1) \ - break; \ - reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ - reg ACTION; \ - io_apic_modify(entry->apic, reg); \ - if (!entry->next) \ - break; \ - entry = irq_2_pin + entry->next; \ - } \ - FINAL; \ -} #define DO_ACTION(name,R,ACTION, FINAL) \ \ @@ -767,6 +795,7 @@ static void __init setup_IO_APIC_irqs(void) spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); + set_native_irq_info(irq, TARGET_CPUS); spin_unlock_irqrestore(&ioapic_lock, flags); } } @@ -1314,6 +1343,7 @@ static unsigned int startup_edge_ioapic_irq(unsigned int irq) */ static void ack_edge_ioapic_irq(unsigned int irq) { + move_irq(irq); if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) == (IRQ_PENDING | IRQ_DISABLED)) mask_IO_APIC_irq(irq); @@ -1343,26 +1373,10 @@ static unsigned int startup_level_ioapic_irq (unsigned int irq) static void end_level_ioapic_irq (unsigned int irq) { + move_irq(irq); ack_APIC_irq(); } -static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) -{ - unsigned long flags; - unsigned int dest; - - dest = cpu_mask_to_apicid(mask); - - /* - * Only the high 8 bits are valid. - */ - dest = SET_APIC_LOGICAL_ID(dest); - - spin_lock_irqsave(&ioapic_lock, flags); - __DO_ACTION(1, = dest, ) - spin_unlock_irqrestore(&ioapic_lock, flags); -} - #ifdef CONFIG_PCI_MSI static unsigned int startup_edge_ioapic_vector(unsigned int vector) { @@ -1375,6 +1389,7 @@ static void ack_edge_ioapic_vector(unsigned int vector) { int irq = vector_to_irq(vector); + move_native_irq(vector); ack_edge_ioapic_irq(irq); } @@ -1389,6 +1404,7 @@ static void end_level_ioapic_vector (unsigned int vector) { int irq = vector_to_irq(vector); + move_native_irq(vector); end_level_ioapic_irq(irq); } @@ -1406,14 +1422,17 @@ static void unmask_IO_APIC_vector (unsigned int vector) unmask_IO_APIC_irq(irq); } +#ifdef CONFIG_SMP static void set_ioapic_affinity_vector (unsigned int vector, cpumask_t cpu_mask) { int irq = vector_to_irq(vector); + set_native_irq_info(vector, cpu_mask); set_ioapic_affinity_irq(irq, cpu_mask); } -#endif +#endif // CONFIG_SMP +#endif // CONFIG_PCI_MSI /* * Level and edge triggered IO-APIC interrupts need different handling, @@ -1432,7 +1451,9 @@ static struct hw_interrupt_type ioapic_edge_type = { .disable = disable_edge_ioapic, .ack = ack_edge_ioapic, .end = end_edge_ioapic, +#ifdef CONFIG_SMP .set_affinity = set_ioapic_affinity, +#endif }; static struct hw_interrupt_type ioapic_level_type = { @@ -1443,7 +1464,9 @@ static struct hw_interrupt_type ioapic_level_type = { .disable = disable_level_ioapic, .ack = mask_and_ack_level_ioapic, .end = end_level_ioapic, +#ifdef CONFIG_SMP .set_affinity = set_ioapic_affinity, +#endif }; static inline void init_IO_APIC_traps(void) @@ -1918,6 +1941,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); + set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS); spin_unlock_irqrestore(&ioapic_lock, flags); return 0; @@ -1931,6 +1955,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a * we need to reprogram the ioredtbls to cater for the cpus which have come online * so mask in all cases should simply be TARGET_CPUS */ +#ifdef CONFIG_SMP void __init setup_ioapic_dest(void) { int pin, ioapic, irq, irq_entry; @@ -1949,3 +1974,4 @@ void __init setup_ioapic_dest(void) } } +#endif -- cgit v1.2.1 From c078d3266e58a3edd698b9f143f78fc585679031 Mon Sep 17 00:00:00 2001 From: Zwane Mwaikambo Date: Tue, 6 Sep 2005 15:16:16 -0700 Subject: [PATCH] x86_64: print processor number in show_regs Up to date I've been using the GS value to determine the processor number in dumps from show_regs, however this can be cumbersome to do if you don't have the vmlinux to verify with the address of cpu_pda, how about the following? I considered using hard_smp_processor_id for robustness but we already dereference current so we're already relying on MSR_GS_BASE being sane. Signed-off-by: Zwane Mwaikambo Acked-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/process.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 7577f9d7a75d..8661f82ac70b 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -310,6 +310,7 @@ void __show_regs(struct pt_regs * regs) void show_regs(struct pt_regs *regs) { + printk("CPU %d:", smp_processor_id()); __show_regs(regs); show_trace(®s->rsp); } -- cgit v1.2.1 From 0c2b9d5c035f5a82c9c4ba717984d04838a0701e Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Tue, 6 Sep 2005 15:16:18 -0700 Subject: [PATCH] x86_64: Fix cluster mode send_IPI_allbutself to use get_cpu()/put_cpu() Need to ensure we dont get prempted when we clear ourself from mask when using clustered mode genapic code. Signed-off-by: Ashok Raj Acked-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/genapic_cluster.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/genapic_cluster.c b/arch/x86_64/kernel/genapic_cluster.c index 9703da7202e3..f6523dd1bc09 100644 --- a/arch/x86_64/kernel/genapic_cluster.c +++ b/arch/x86_64/kernel/genapic_cluster.c @@ -72,10 +72,14 @@ static void cluster_send_IPI_mask(cpumask_t mask, int vector) static void cluster_send_IPI_allbutself(int vector) { cpumask_t mask = cpu_online_map; - cpu_clear(smp_processor_id(), mask); + int me = get_cpu(); /* Ensure we are not preempted when we clear */ + + cpu_clear(me, mask); if (!cpus_empty(mask)) cluster_send_IPI_mask(mask, vector); + + put_cpu(); } static void cluster_send_IPI_all(int vector) -- cgit v1.2.1 From 48c8b1134249432318c8e5d19adc37c45242c4b1 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 6 Sep 2005 15:16:20 -0700 Subject: [PATCH] x86_64: Fix off by one in e820_mapped This allows a valid iommu placed immediately after memory to work, to be recognized as after the last byte of memory and not overlapping it. Signed-off-by: Eric W. Biederman Acked-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/e820.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c index b548dea4e5b9..116ac5f53dce 100644 --- a/arch/x86_64/kernel/e820.c +++ b/arch/x86_64/kernel/e820.c @@ -85,7 +85,7 @@ int __init e820_mapped(unsigned long start, unsigned long end, unsigned type) struct e820entry *ei = &e820.map[i]; if (type && ei->type != type) continue; - if (ei->addr >= end || ei->addr + ei->size < start) + if (ei->addr >= end || ei->addr + ei->size <= start) continue; return 1; } -- cgit v1.2.1 From 8446f1d391f3d27e6bf9c43d4cbcdac0ca720417 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 6 Sep 2005 15:16:27 -0700 Subject: [PATCH] detect soft lockups This patch adds a new kernel debug feature: CONFIG_DETECT_SOFTLOCKUP. When enabled then per-CPU watchdog threads are started, which try to run once per second. If they get delayed for more than 10 seconds then a callback from the timer interrupt detects this condition and prints out a warning message and a stack dump (once per lockup incident). The feature is otherwise non-intrusive, it doesnt try to unlock the box in any way, it only gets the debug info out, automatically, and on all CPUs affected by the lockup. Signed-off-by: Ingo Molnar Signed-off-by: Nishanth Aravamudan Signed-Off-By: Matthias Urlichs Signed-off-by: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/nmi.c | 2 ++ arch/x86_64/kernel/time.c | 1 + 2 files changed, 3 insertions(+) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 64a8e05d5811..84cae81fff8b 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -463,6 +463,8 @@ void touch_nmi_watchdog (void) */ for (i = 0; i < NR_CPUS; i++) per_cpu(nmi_touch, i) = 1; + + touch_softlockup_watchdog(); } void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason) diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 66bf6ddeb0c3..2b5d9da912a2 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -1041,6 +1041,7 @@ static int timer_resume(struct sys_device *dev) write_sequnlock_irqrestore(&xtime_lock,flags); jiffies += sleep_length; wall_jiffies += sleep_length; + touch_softlockup_watchdog(); return 0; } -- cgit v1.2.1 From 19306059cd7fedaf96b4b0260a9a8a45e513c857 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 6 Sep 2005 15:16:35 -0700 Subject: [PATCH] NMI: Update NMI users of RCU to use new API Uses of RCU for dynamically changeable NMI handlers need to use the new rcu_dereference() and rcu_assign_pointer() facilities. This change makes it clear that these uses are safe from a memory-barrier viewpoint, but the main purpose is to document exactly what operations are being protected by RCU. This has been tested on x86 and x86-64, which are the only architectures affected by this change. Signed-off-by: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/nmi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 84cae81fff8b..caf164959e19 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -524,14 +524,14 @@ asmlinkage void do_nmi(struct pt_regs * regs, long error_code) nmi_enter(); add_pda(__nmi_count,1); - if (!nmi_callback(regs, cpu)) + if (!rcu_dereference(nmi_callback)(regs, cpu)) default_do_nmi(regs); nmi_exit(); } void set_nmi_callback(nmi_callback_t callback) { - nmi_callback = callback; + rcu_assign_pointer(nmi_callback, callback); } void unset_nmi_callback(void) -- cgit v1.2.1 From 6c231b7bab0aa6860cd9da2de8a064eddc34c146 Mon Sep 17 00:00:00 2001 From: Ravikiran G Thirumalai Date: Tue, 6 Sep 2005 15:17:45 -0700 Subject: [PATCH] Additions to .data.read_mostly section Mark variables which are usually accessed for reads with __readmostly. Signed-off-by: Alok N Kataria Signed-off-by: Shai Fultheim Signed-off-by: Ravikiran Thirumalai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/genapic.c | 2 +- arch/x86_64/kernel/io_apic.c | 10 +++++----- arch/x86_64/kernel/setup.c | 2 +- arch/x86_64/kernel/setup64.c | 2 +- arch/x86_64/kernel/smpboot.c | 10 +++++----- 5 files changed, 13 insertions(+), 13 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/genapic.c b/arch/x86_64/kernel/genapic.c index 30c843a5efdd..f062aa03bab7 100644 --- a/arch/x86_64/kernel/genapic.c +++ b/arch/x86_64/kernel/genapic.c @@ -25,7 +25,7 @@ #endif /* which logical CPU number maps to which CPU (physical APIC ID) */ -u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; +u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; EXPORT_SYMBOL(x86_cpu_to_apicid); u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index 76bcc4e6979d..ba1a744e9bf0 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -70,7 +70,7 @@ static struct irq_pin_list { short apic, pin, next; } irq_2_pin[PIN_MAP_SIZE]; -int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1}; +int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1}; #ifdef CONFIG_PCI_MSI #define vector_to_irq(vector) \ (platform_legacy_irq(vector) ? vector : vector_irq[vector]) @@ -683,7 +683,7 @@ static inline int IO_APIC_irq_trigger(int irq) } /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ -u8 irq_vector[NR_IRQ_VECTORS] = { FIRST_DEVICE_VECTOR , 0 }; +u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 }; int assign_irq_vector(int irq) { @@ -1443,7 +1443,7 @@ static void set_ioapic_affinity_vector (unsigned int vector, * races. */ -static struct hw_interrupt_type ioapic_edge_type = { +static struct hw_interrupt_type ioapic_edge_type __read_mostly = { .typename = "IO-APIC-edge", .startup = startup_edge_ioapic, .shutdown = shutdown_edge_ioapic, @@ -1456,7 +1456,7 @@ static struct hw_interrupt_type ioapic_edge_type = { #endif }; -static struct hw_interrupt_type ioapic_level_type = { +static struct hw_interrupt_type ioapic_level_type __read_mostly = { .typename = "IO-APIC-level", .startup = startup_level_ioapic, .shutdown = shutdown_level_ioapic, @@ -1529,7 +1529,7 @@ static void ack_lapic_irq (unsigned int irq) static void end_lapic_irq (unsigned int i) { /* nothing */ } -static struct hw_interrupt_type lapic_irq_type = { +static struct hw_interrupt_type lapic_irq_type __read_mostly = { .typename = "local-APIC-edge", .startup = NULL, /* startup_irq() not used for IRQ0 */ .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */ diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 116a491e2961..b356f8e6adfe 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -65,7 +65,7 @@ * Machine setup.. */ -struct cpuinfo_x86 boot_cpu_data; +struct cpuinfo_x86 boot_cpu_data __read_mostly; unsigned long mmu_cr4_features; diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index 34082c1cc41e..e3ffcacc8c90 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c @@ -36,7 +36,7 @@ struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table }; char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned"))); -unsigned long __supported_pte_mask = ~0UL; +unsigned long __supported_pte_mask __read_mostly = ~0UL; static int do_not_nx __initdata = 0; /* noexec=on|off diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index fa25e39fe54d..90aeccd15190 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -62,13 +62,13 @@ /* Number of siblings per CPU package */ int smp_num_siblings = 1; /* Package ID of each logical CPU */ -u8 phys_proc_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; -u8 cpu_core_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; +u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; +u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; EXPORT_SYMBOL(phys_proc_id); EXPORT_SYMBOL(cpu_core_id); /* Bitmask of currently online CPUs */ -cpumask_t cpu_online_map; +cpumask_t cpu_online_map __read_mostly; EXPORT_SYMBOL(cpu_online_map); @@ -88,8 +88,8 @@ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; /* Set when the idlers are all forked */ int smp_threads_ready; -cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; -cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; +cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; +cpumask_t cpu_core_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_core_map); /* -- cgit v1.2.1 From b149ee2233edf08fb59b11e879a2c5941929bcb8 Mon Sep 17 00:00:00 2001 From: john stultz Date: Tue, 6 Sep 2005 15:17:46 -0700 Subject: [PATCH] NTP: ntp-helper functions This patch cleans up a commonly repeated set of changes to the NTP state variables by adding two helper inline functions: ntp_clear(): Clears the ntp state variables ntp_synced(): Returns 1 if the system is synced with a time server. This was compile tested for alpha, arm, i386, x86-64, ppc64, s390, sparc, sparc64. Signed-off-by: John Stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/time.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 2b5d9da912a2..7b6abe058253 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -176,10 +176,7 @@ int do_settimeofday(struct timespec *tv) set_normalized_timespec(&xtime, sec, nsec); set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; + ntp_clear(); write_sequnlock_irq(&xtime_lock); clock_was_set(); @@ -471,7 +468,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) * off) isn't likely to go away much sooner anyway. */ - if ((~time_status & STA_UNSYNC) && xtime.tv_sec > rtc_update && + if (ntp_synced() && xtime.tv_sec > rtc_update && abs(xtime.tv_nsec - 500000000) <= tick_nsec / 2) { set_rtc_mmss(xtime.tv_sec); rtc_update = xtime.tv_sec + 660; -- cgit v1.2.1 From 0f2fbdcbb041f9087da42f8ac2e81f2817098d2a Mon Sep 17 00:00:00 2001 From: Prasanna S Panchamukhi Date: Tue, 6 Sep 2005 15:19:28 -0700 Subject: [PATCH] kprobes: prevent possible race conditions x86_64 changes This patch contains the x86_64 architecture specific changes to prevent the possible race conditions. Signed-off-by: Prasanna S Panchamukhi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/entry.S | 12 ++++++++---- arch/x86_64/kernel/kprobes.c | 35 ++++++++++++++++++----------------- arch/x86_64/kernel/traps.c | 14 +++++++++----- arch/x86_64/kernel/vmlinux.lds.S | 1 + 4 files changed, 36 insertions(+), 26 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index 096d470e280f..be51dbe1f75e 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S @@ -784,8 +784,9 @@ ENTRY(execve) ret CFI_ENDPROC -ENTRY(page_fault) +KPROBE_ENTRY(page_fault) errorentry do_page_fault + .previous .text ENTRY(coprocessor_error) zeroentry do_coprocessor_error @@ -797,13 +798,14 @@ ENTRY(device_not_available) zeroentry math_state_restore /* runs on exception stack */ -ENTRY(debug) +KPROBE_ENTRY(debug) CFI_STARTPROC pushq $0 CFI_ADJUST_CFA_OFFSET 8 paranoidentry do_debug jmp paranoid_exit CFI_ENDPROC + .previous .text /* runs on exception stack */ ENTRY(nmi) @@ -854,8 +856,9 @@ paranoid_schedule: jmp paranoid_userspace CFI_ENDPROC -ENTRY(int3) +KPROBE_ENTRY(int3) zeroentry do_int3 + .previous .text ENTRY(overflow) zeroentry do_overflow @@ -892,8 +895,9 @@ ENTRY(stack_segment) jmp paranoid_exit CFI_ENDPROC -ENTRY(general_protection) +KPROBE_ENTRY(general_protection) errorentry do_general_protection + .previous .text ENTRY(alignment_check) errorentry do_alignment_check diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index 5c6dc7051482..c21cceaea275 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -74,7 +74,7 @@ static inline int is_IF_modifier(kprobe_opcode_t *insn) return 0; } -int arch_prepare_kprobe(struct kprobe *p) +int __kprobes arch_prepare_kprobe(struct kprobe *p) { /* insn: must be on special executable page on x86_64. */ up(&kprobe_mutex); @@ -189,7 +189,7 @@ static inline s32 *is_riprel(u8 *insn) return NULL; } -void arch_copy_kprobe(struct kprobe *p) +void __kprobes arch_copy_kprobe(struct kprobe *p) { s32 *ripdisp; memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE); @@ -215,21 +215,21 @@ void arch_copy_kprobe(struct kprobe *p) p->opcode = *p->addr; } -void arch_arm_kprobe(struct kprobe *p) +void __kprobes arch_arm_kprobe(struct kprobe *p) { *p->addr = BREAKPOINT_INSTRUCTION; flush_icache_range((unsigned long) p->addr, (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } -void arch_disarm_kprobe(struct kprobe *p) +void __kprobes arch_disarm_kprobe(struct kprobe *p) { *p->addr = p->opcode; flush_icache_range((unsigned long) p->addr, (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } -void arch_remove_kprobe(struct kprobe *p) +void __kprobes arch_remove_kprobe(struct kprobe *p) { up(&kprobe_mutex); free_insn_slot(p->ainsn.insn); @@ -261,7 +261,7 @@ static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs) kprobe_saved_rflags &= ~IF_MASK; } -static void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) +static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) { regs->eflags |= TF_MASK; regs->eflags &= ~IF_MASK; @@ -272,7 +272,8 @@ static void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) regs->rip = (unsigned long)p->ainsn.insn; } -void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) +void __kprobes arch_prepare_kretprobe(struct kretprobe *rp, + struct pt_regs *regs) { unsigned long *sara = (unsigned long *)regs->rsp; struct kretprobe_instance *ri; @@ -295,7 +296,7 @@ void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) * Interrupts are disabled on entry as trap3 is an interrupt gate and they * remain disabled thorough out this function. */ -int kprobe_handler(struct pt_regs *regs) +int __kprobes kprobe_handler(struct pt_regs *regs) { struct kprobe *p; int ret = 0; @@ -399,7 +400,7 @@ no_kprobe: /* * Called when we hit the probe point at kretprobe_trampoline */ -int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) +int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { struct kretprobe_instance *ri = NULL; struct hlist_head *head; @@ -478,7 +479,7 @@ int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) * that is atop the stack is the address following the copied instruction. * We need to make it the address following the original instruction. */ -static void resume_execution(struct kprobe *p, struct pt_regs *regs) +static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) { unsigned long *tos = (unsigned long *)regs->rsp; unsigned long next_rip = 0; @@ -536,7 +537,7 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs) * Interrupts are disabled on entry as trap1 is an interrupt gate and they * remain disabled thoroughout this function. And we hold kprobe lock. */ -int post_kprobe_handler(struct pt_regs *regs) +int __kprobes post_kprobe_handler(struct pt_regs *regs) { if (!kprobe_running()) return 0; @@ -571,7 +572,7 @@ out: } /* Interrupts disabled, kprobe_lock held. */ -int kprobe_fault_handler(struct pt_regs *regs, int trapnr) +int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) { if (current_kprobe->fault_handler && current_kprobe->fault_handler(current_kprobe, regs, trapnr)) @@ -590,8 +591,8 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr) /* * Wrapper routine for handling exceptions. */ -int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, - void *data) +int __kprobes kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) { struct die_args *args = (struct die_args *)data; switch (val) { @@ -619,7 +620,7 @@ int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, return NOTIFY_DONE; } -int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct jprobe *jp = container_of(p, struct jprobe, kp); unsigned long addr; @@ -640,7 +641,7 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) return 1; } -void jprobe_return(void) +void __kprobes jprobe_return(void) { preempt_enable_no_resched(); asm volatile (" xchg %%rbx,%%rsp \n" @@ -651,7 +652,7 @@ void jprobe_return(void) (jprobe_saved_rsp):"memory"); } -int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) { u8 *addr = (u8 *) (regs->rip - 1); unsigned long stack_addr = (unsigned long)jprobe_saved_rsp; diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 6ead433a3885..f238d6078a5a 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -419,8 +420,9 @@ void die_nmi(char *str, struct pt_regs *regs) do_exit(SIGSEGV); } -static void do_trap(int trapnr, int signr, char *str, - struct pt_regs * regs, long error_code, siginfo_t *info) +static void __kprobes do_trap(int trapnr, int signr, char *str, + struct pt_regs * regs, long error_code, + siginfo_t *info) { conditional_sti(regs); @@ -504,7 +506,8 @@ DO_ERROR(18, SIGSEGV, "reserved", reserved) DO_ERROR(12, SIGBUS, "stack segment", stack_segment) DO_ERROR( 8, SIGSEGV, "double fault", double_fault) -asmlinkage void do_general_protection(struct pt_regs * regs, long error_code) +asmlinkage void __kprobes do_general_protection(struct pt_regs * regs, + long error_code) { conditional_sti(regs); @@ -622,7 +625,7 @@ asmlinkage void default_do_nmi(struct pt_regs *regs) io_check_error(reason, regs); } -asmlinkage void do_int3(struct pt_regs * regs, long error_code) +asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code) { if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) { return; @@ -653,7 +656,8 @@ asmlinkage struct pt_regs *sync_regs(struct pt_regs *eregs) } /* runs on IST stack. */ -asmlinkage void do_debug(struct pt_regs * regs, unsigned long error_code) +asmlinkage void __kprobes do_debug(struct pt_regs * regs, + unsigned long error_code) { unsigned long condition; struct task_struct *tsk = current; diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index 2a94f9b60b2d..d4abb07af52d 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S @@ -21,6 +21,7 @@ SECTIONS *(.text) SCHED_TEXT LOCK_TEXT + KPROBES_TEXT *(.fixup) *(.gnu.warning) } = 0x9090 -- cgit v1.2.1 From bce0649417d6e71f6df8ab7b11103d247913b142 Mon Sep 17 00:00:00 2001 From: Jim Keniston Date: Tue, 6 Sep 2005 15:19:34 -0700 Subject: [PATCH] kprobes: fix handling of simultaneous probe hit/unregister This patch fixes a bug in kprobes's handling of a corner case on i386 and x86_64. On an SMP system, if one CPU unregisters a kprobe just after another CPU hits that probepoint, kprobe_handler() on the latter CPU sees that the kprobe has been unregistered, and attempts to let the CPU continue as if the probepoint hadn't been hit. The bug is that on i386 and x86_64, we were neglecting to set the IP back to the beginning of the probed instruction. This could cause an oops or crash. This bug doesn't exist on ppc64 and ia64, where a breakpoint instruction leaves the IP pointing to the beginning of the instruction. I don't know about sparc64. (Dave, could you please advise?) This fix has been tested on i386 and x86_64 SMP systems. To reproduce the problem, set one CPU to work registering and unregistering a kprobe repeatedly, and another CPU pounding the probepoint in a tight loop. Acked-by: Prasanna S Panchamukhi Signed-off-by: Jim Keniston Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/kprobes.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index c21cceaea275..2d7658fbbb28 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -361,7 +361,10 @@ int __kprobes kprobe_handler(struct pt_regs *regs) * either a probepoint or a debugger breakpoint * at this address. In either case, no further * handling of this interrupt is appropriate. + * Back up over the (now missing) int3 and run + * the original instruction. */ + regs->rip = (unsigned long)addr; ret = 1; } /* Not one of ours: let kernel handle it */ -- cgit v1.2.1 From deac66ae454cacf942c051b86d9232af546fb187 Mon Sep 17 00:00:00 2001 From: Keshavamurthy Anil S Date: Tue, 6 Sep 2005 15:19:35 -0700 Subject: [PATCH] kprobes: fix bug when probed on task and isr functions This patch fixes a race condition where in system used to hang or sometime crash within minutes when kprobes are inserted on ISR routine and a task routine. The fix has been stress tested on i386, ia64, pp64 and on x86_64. To reproduce the problem insert kprobes on schedule() and do_IRQ() functions and you should see hang or system crash. Signed-off-by: Anil S Keshavamurthy Signed-off-by: Ananth N Mavinakayanahalli Acked-by: Prasanna S Panchamukhi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/kprobes.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index 2d7658fbbb28..df08c43276a0 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -311,7 +311,8 @@ int __kprobes kprobe_handler(struct pt_regs *regs) Disarm the probe we just hit, and ignore it. */ p = get_kprobe(addr); if (p) { - if (kprobe_status == KPROBE_HIT_SS) { + if (kprobe_status == KPROBE_HIT_SS && + *p->ainsn.insn == BREAKPOINT_INSTRUCTION) { regs->eflags &= ~TF_MASK; regs->eflags |= kprobe_saved_rflags; unlock_kprobes(); -- cgit v1.2.1