diff options
Diffstat (limited to 'arch')
119 files changed, 1928 insertions, 546 deletions
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index f7c96635d3b4..c5739d6309df 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -509,7 +509,7 @@ config NR_CPUS depends on SMP default "64" -config DISCONTIGMEM +config ARCH_DISCONTIGMEM_ENABLE bool "Discontiguous Memory Support (EXPERIMENTAL)" depends on EXPERIMENTAL help @@ -518,6 +518,8 @@ config DISCONTIGMEM or have huge holes in the physical address space for other reasons. See <file:Documentation/vm/numa> for more. +source "mm/Kconfig" + config NUMA bool "NUMA Support (EXPERIMENTAL)" depends on DISCONTIGMEM diff --git a/arch/alpha/defconfig b/arch/alpha/defconfig index 5e39b7a7c8f4..6da9c3dbde44 100644 --- a/arch/alpha/defconfig +++ b/arch/alpha/defconfig @@ -96,7 +96,7 @@ CONFIG_ALPHA_CORE_AGP=y CONFIG_ALPHA_BROKEN_IRQ_MASK=y CONFIG_EISA=y # CONFIG_SMP is not set -# CONFIG_DISCONTIGMEM is not set +# CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set CONFIG_VERBOSE_MCHECK=y CONFIG_VERBOSE_MCHECK_ON=1 CONFIG_PCI_LEGACY_PROC=y diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c index ba81c4422aaf..c7481d59b6df 100644 --- a/arch/alpha/mm/numa.c +++ b/arch/alpha/mm/numa.c @@ -327,8 +327,6 @@ void __init mem_init(void) extern char _text, _etext, _data, _edata; extern char __init_begin, __init_end; unsigned long nid, i; - struct page * lmem_map; - high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); reservedpages = 0; @@ -338,10 +336,10 @@ void __init mem_init(void) */ totalram_pages += free_all_bootmem_node(NODE_DATA(nid)); - lmem_map = node_mem_map(nid); pfn = NODE_DATA(nid)->node_start_pfn; for (i = 0; i < node_spanned_pages(nid); i++, pfn++) - if (page_is_ram(pfn) && PageReserved(lmem_map+i)) + if (page_is_ram(pfn) && + PageReserved(nid_page_nr(nid, i))) reservedpages++; } @@ -373,18 +371,18 @@ show_mem(void) show_free_areas(); printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); for_each_online_node(nid) { - struct page * lmem_map = node_mem_map(nid); i = node_spanned_pages(nid); while (i-- > 0) { + struct page *page = nid_page_nr(nid, i); total++; - if (PageReserved(lmem_map+i)) + if (PageReserved(page)) reserved++; - else if (PageSwapCache(lmem_map+i)) + else if (PageSwapCache(page)) cached++; - else if (!page_count(lmem_map+i)) + else if (!page_count(page)) free++; else - shared += page_count(lmem_map + i) - 1; + shared += page_count(page) - 1; } } printk("%ld pages of RAM\n",total); diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index ee8a9ad7bbd9..07ba77c19f6c 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -346,7 +346,7 @@ config PREEMPT Say Y here if you are building a kernel for a desktop, embedded or real-time system. Say N if you are unsure. -config DISCONTIGMEM +config ARCH_DISCONTIGMEM_ENABLE bool default (ARCH_LH7A40X && !LH7A40X_CONTIGMEM) help @@ -355,6 +355,8 @@ config DISCONTIGMEM or have huge holes in the physical address space for other reasons. See <file:Documentation/vm/numa> for more. +source "mm/Kconfig" + config LEDS bool "Timer and CPU usage LEDs" depends on ARCH_CDB89712 || ARCH_CO285 || ARCH_EBSA110 || \ diff --git a/arch/arm/boot/install.sh b/arch/arm/boot/install.sh index 935bb27369e9..9f9bed207345 100644 --- a/arch/arm/boot/install.sh +++ b/arch/arm/boot/install.sh @@ -21,8 +21,8 @@ # # User may have a custom install script -if [ -x ~/bin/installkernel ]; then exec ~/bin/installkernel "$@"; fi -if [ -x /sbin/installkernel ]; then exec /sbin/installkernel "$@"; fi +if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi +if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi if [ "$(basename $2)" = "zImage" ]; then # Compressed install diff --git a/arch/arm26/Kconfig b/arch/arm26/Kconfig index 6caed90661fc..dc0c1936969b 100644 --- a/arch/arm26/Kconfig +++ b/arch/arm26/Kconfig @@ -179,6 +179,8 @@ config CMDLINE time by entering them here. As a minimum, you should specify the memory size and the root device (e.g., mem=64M root=/dev/nfs). +source "mm/Kconfig" + endmenu source "drivers/base/Kconfig" diff --git a/arch/arm26/boot/install.sh b/arch/arm26/boot/install.sh index c628328dd9ec..8a8399b26cf7 100644 --- a/arch/arm26/boot/install.sh +++ b/arch/arm26/boot/install.sh @@ -23,8 +23,8 @@ # User may have a custom install script -if [ -x /sbin/installkernel ]; then - exec /sbin/installkernel "$@" +if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then + exec /sbin/${CROSS_COMPILE}installkernel "$@" fi if [ "$2" = "zImage" ]; then diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig index 4332ca348d51..f848e3761491 100644 --- a/arch/cris/Kconfig +++ b/arch/cris/Kconfig @@ -74,6 +74,8 @@ config PREEMPT Say Y here if you are building a kernel for a desktop, embedded or real-time system. Say N if you are unsure. +source mm/Kconfig + endmenu menu "Hardware setup" diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index 2b19372767eb..c93f95146cc2 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig @@ -74,6 +74,8 @@ config HIGHPTE with a lot of RAM, this can be wasteful of precious low memory. Setting this option will put user-space page tables in high memory. +source "mm/Kconfig" + choice prompt "uClinux kernel load address" depends on !MMU diff --git a/arch/h8300/Kconfig.cpu b/arch/h8300/Kconfig.cpu index d9dd62a565a9..a380167a13cf 100644 --- a/arch/h8300/Kconfig.cpu +++ b/arch/h8300/Kconfig.cpu @@ -180,4 +180,7 @@ config CPU_H8S config PREEMPT bool "Preemptible Kernel" default n + +source "mm/Kconfig" + endmenu diff --git a/arch/h8300/platform/h8300h/ptrace_h8300h.c b/arch/h8300/platform/h8300h/ptrace_h8300h.c index 18e51a7167d3..6ac93c05a1ae 100644 --- a/arch/h8300/platform/h8300h/ptrace_h8300h.c +++ b/arch/h8300/platform/h8300h/ptrace_h8300h.c @@ -245,12 +245,12 @@ static unsigned short *getnextpc(struct task_struct *child, unsigned short *pc) addr = h8300_get_reg(child, regno-1+PT_ER1); return (unsigned short *)addr; case relb: - if ((inst = 0x55) || isbranch(child,inst & 0x0f)) + if (inst == 0x55 || isbranch(child,inst & 0x0f)) pc = (unsigned short *)((unsigned long)pc + ((signed char)(*fetch_p))); return pc+1; /* skip myself */ case relw: - if ((inst = 0x5c) || isbranch(child,(*fetch_p & 0xf0) >> 4)) + if (inst == 0x5c || isbranch(child,(*fetch_p & 0xf0) >> 4)) pc = (unsigned short *)((unsigned long)pc + ((signed short)(*(pc+1)))); return pc+2; /* skip myself */ diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index dfd904f6883b..d4ae5f9ceae6 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -68,7 +68,6 @@ config X86_VOYAGER config X86_NUMAQ bool "NUMAQ (IBM/Sequent)" - select DISCONTIGMEM select NUMA help This option is used for getting Linux to run on a (IBM/Sequent) NUMA @@ -783,25 +782,48 @@ comment "NUMA (NUMA-Q) requires SMP, 64GB highmem support" comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" depends on X86_SUMMIT && (!HIGHMEM64G || !ACPI) -config DISCONTIGMEM - bool - depends on NUMA - default y - config HAVE_ARCH_BOOTMEM_NODE bool depends on NUMA default y -config HAVE_MEMORY_PRESENT +config ARCH_HAVE_MEMORY_PRESENT bool depends on DISCONTIGMEM default y config NEED_NODE_MEMMAP_SIZE bool - depends on DISCONTIGMEM + depends on DISCONTIGMEM || SPARSEMEM + default y + +config HAVE_ARCH_ALLOC_REMAP + bool + depends on NUMA + default y + +config ARCH_DISCONTIGMEM_ENABLE + def_bool y + depends on NUMA + +config ARCH_DISCONTIGMEM_DEFAULT + def_bool y + depends on NUMA + +config ARCH_SPARSEMEM_ENABLE + def_bool y + depends on NUMA + +config ARCH_SELECT_MEMORY_MODEL + def_bool y + depends on ARCH_SPARSEMEM_ENABLE + +source "mm/Kconfig" + +config HAVE_ARCH_EARLY_PFN_TO_NID + bool default y + depends on NUMA config HIGHPTE bool "Allocate 3rd-level pagetables from highmem" @@ -939,6 +961,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source kernel/Kconfig.hz + endmenu diff --git a/arch/i386/Makefile b/arch/i386/Makefile index 1c36ca332a96..bf7c9ba709f3 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -17,6 +17,13 @@ # 20050320 Kianusch Sayah Karadji <kianusch@sk-tech.net> # Added support for GEODE CPU +HAS_BIARCH := $(call cc-option-yn, -m32) +ifeq ($(HAS_BIARCH),y) +AS := $(AS) --32 +LD := $(LD) -m elf_i386 +CC := $(CC) -m32 +endif + LDFLAGS := -m elf_i386 OBJCOPYFLAGS := -O binary -R .note -R .comment -S LDFLAGS_vmlinux := diff --git a/arch/i386/boot/install.sh b/arch/i386/boot/install.sh index 90f2452b3b9e..f17b40dfc0f4 100644 --- a/arch/i386/boot/install.sh +++ b/arch/i386/boot/install.sh @@ -21,8 +21,8 @@ # User may have a custom install script -if [ -x ~/bin/installkernel ]; then exec ~/bin/installkernel "$@"; fi -if [ -x /sbin/installkernel ]; then exec /sbin/installkernel "$@"; fi +if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi +if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi # Default install - same as make zlilo diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index d509836b70c3..8d993fa71754 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -1133,7 +1133,7 @@ inline void smp_local_timer_interrupt(struct pt_regs * regs) } #ifdef CONFIG_SMP - update_process_times(user_mode(regs)); + update_process_times(user_mode_vm(regs)); #endif } diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index d199e525680a..b9954248d0aa 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -635,7 +635,7 @@ void __init cpu_init (void) /* Clear all 6 debug registers: */ -#define CD(register) __asm__("movl %0,%%db" #register ::"r"(0) ); +#define CD(register) set_debugreg(0, register) CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7); diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c index e1c2042b9b7e..d66b09e0c820 100644 --- a/arch/i386/kernel/cpu/mtrr/main.c +++ b/arch/i386/kernel/cpu/mtrr/main.c @@ -375,6 +375,19 @@ int mtrr_add_page(unsigned long base, unsigned long size, return error; } +static int mtrr_check(unsigned long base, unsigned long size) +{ + if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { + printk(KERN_WARNING + "mtrr: size and base must be multiples of 4 kiB\n"); + printk(KERN_DEBUG + "mtrr: size: 0x%lx base: 0x%lx\n", size, base); + dump_stack(); + return -1; + } + return 0; +} + /** * mtrr_add - Add a memory type region * @base: Physical base address of region @@ -415,11 +428,8 @@ int mtrr_add(unsigned long base, unsigned long size, unsigned int type, char increment) { - if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { - printk(KERN_WARNING "mtrr: size and base must be multiples of 4 kiB\n"); - printk(KERN_DEBUG "mtrr: size: 0x%lx base: 0x%lx\n", size, base); + if (mtrr_check(base, size)) return -EINVAL; - } return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type, increment); } @@ -511,11 +521,8 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) int mtrr_del(int reg, unsigned long base, unsigned long size) { - if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { - printk(KERN_INFO "mtrr: size and base must be multiples of 4 kiB\n"); - printk(KERN_DEBUG "mtrr: size: 0x%lx base: 0x%lx\n", size, base); + if (mtrr_check(base, size)) return -EINVAL; - } return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT); } diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index 7323c19f354e..8bd77d948a84 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c @@ -86,7 +86,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "stepping\t: unknown\n"); if ( cpu_has(c, X86_FEATURE_TSC) ) { - seq_printf(m, "cpu MHz\t\t: %lu.%03lu\n", + seq_printf(m, "cpu MHz\t\t: %u.%03u\n", cpu_khz / 1000, (cpu_khz % 1000)); } diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c index 903190a4b3ff..180f070d03cb 100644 --- a/arch/i386/kernel/i386_ksyms.c +++ b/arch/i386/kernel/i386_ksyms.c @@ -1,97 +1,17 @@ #include <linux/config.h> #include <linux/module.h> -#include <linux/smp.h> -#include <linux/user.h> -#include <linux/elfcore.h> -#include <linux/mca.h> -#include <linux/sched.h> -#include <linux/in6.h> -#include <linux/interrupt.h> -#include <linux/smp_lock.h> -#include <linux/pm.h> -#include <linux/pci.h> -#include <linux/apm_bios.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/tty.h> -#include <linux/highmem.h> -#include <linux/time.h> - -#include <asm/semaphore.h> -#include <asm/processor.h> -#include <asm/i387.h> -#include <asm/uaccess.h> #include <asm/checksum.h> -#include <asm/io.h> -#include <asm/delay.h> -#include <asm/irq.h> -#include <asm/mmx.h> #include <asm/desc.h> -#include <asm/pgtable.h> -#include <asm/tlbflush.h> -#include <asm/nmi.h> -#include <asm/ist.h> -#include <asm/kdebug.h> - -extern void dump_thread(struct pt_regs *, struct user *); -extern spinlock_t rtc_lock; /* This is definitely a GPL-only symbol */ EXPORT_SYMBOL_GPL(cpu_gdt_table); -#if defined(CONFIG_APM_MODULE) -extern void machine_real_restart(unsigned char *, int); -EXPORT_SYMBOL(machine_real_restart); -extern void default_idle(void); -EXPORT_SYMBOL(default_idle); -#endif - -#ifdef CONFIG_SMP -extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); -extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); -#endif - -#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE) -extern struct drive_info_struct drive_info; -EXPORT_SYMBOL(drive_info); -#endif - -extern unsigned long cpu_khz; -extern unsigned long get_cmos_time(void); - -/* platform dependent support */ -EXPORT_SYMBOL(boot_cpu_data); -#ifdef CONFIG_DISCONTIGMEM -EXPORT_SYMBOL(node_data); -EXPORT_SYMBOL(physnode_map); -#endif -#ifdef CONFIG_X86_NUMAQ -EXPORT_SYMBOL(xquad_portio); -#endif -EXPORT_SYMBOL(dump_thread); -EXPORT_SYMBOL(dump_fpu); -EXPORT_SYMBOL_GPL(kernel_fpu_begin); -EXPORT_SYMBOL(__ioremap); -EXPORT_SYMBOL(ioremap_nocache); -EXPORT_SYMBOL(iounmap); -EXPORT_SYMBOL(kernel_thread); -EXPORT_SYMBOL(pm_idle); -EXPORT_SYMBOL(pm_power_off); -EXPORT_SYMBOL(get_cmos_time); -EXPORT_SYMBOL(cpu_khz); -EXPORT_SYMBOL(apm_info); - EXPORT_SYMBOL(__down_failed); EXPORT_SYMBOL(__down_failed_interruptible); EXPORT_SYMBOL(__down_failed_trylock); EXPORT_SYMBOL(__up_wakeup); /* Networking helper routines. */ EXPORT_SYMBOL(csum_partial_copy_generic); -/* Delay loops */ -EXPORT_SYMBOL(__ndelay); -EXPORT_SYMBOL(__udelay); -EXPORT_SYMBOL(__delay); -EXPORT_SYMBOL(__const_udelay); EXPORT_SYMBOL(__get_user_1); EXPORT_SYMBOL(__get_user_2); @@ -105,87 +25,11 @@ EXPORT_SYMBOL(__put_user_8); EXPORT_SYMBOL(strpbrk); EXPORT_SYMBOL(strstr); -EXPORT_SYMBOL(strncpy_from_user); -EXPORT_SYMBOL(__strncpy_from_user); -EXPORT_SYMBOL(clear_user); -EXPORT_SYMBOL(__clear_user); -EXPORT_SYMBOL(__copy_from_user_ll); -EXPORT_SYMBOL(__copy_to_user_ll); -EXPORT_SYMBOL(strnlen_user); - -EXPORT_SYMBOL(dma_alloc_coherent); -EXPORT_SYMBOL(dma_free_coherent); - -#ifdef CONFIG_PCI -EXPORT_SYMBOL(pci_mem_start); -#endif - -#ifdef CONFIG_PCI_BIOS -EXPORT_SYMBOL(pcibios_set_irq_routing); -EXPORT_SYMBOL(pcibios_get_irq_routing_table); -#endif - -#ifdef CONFIG_X86_USE_3DNOW -EXPORT_SYMBOL(_mmx_memcpy); -EXPORT_SYMBOL(mmx_clear_page); -EXPORT_SYMBOL(mmx_copy_page); -#endif - -#ifdef CONFIG_X86_HT -EXPORT_SYMBOL(smp_num_siblings); -EXPORT_SYMBOL(cpu_sibling_map); -#endif - #ifdef CONFIG_SMP -EXPORT_SYMBOL(cpu_data); -EXPORT_SYMBOL(cpu_online_map); -EXPORT_SYMBOL(cpu_callout_map); +extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); +extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); EXPORT_SYMBOL(__write_lock_failed); EXPORT_SYMBOL(__read_lock_failed); - -/* Global SMP stuff */ -EXPORT_SYMBOL(smp_call_function); - -/* TLB flushing */ -EXPORT_SYMBOL(flush_tlb_page); -#endif - -#ifdef CONFIG_X86_IO_APIC -EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); -#endif - -#ifdef CONFIG_MCA -EXPORT_SYMBOL(machine_id); -#endif - -#ifdef CONFIG_VT -EXPORT_SYMBOL(screen_info); -#endif - -EXPORT_SYMBOL(get_wchan); - -EXPORT_SYMBOL(rtc_lock); - -EXPORT_SYMBOL_GPL(set_nmi_callback); -EXPORT_SYMBOL_GPL(unset_nmi_callback); - -EXPORT_SYMBOL(register_die_notifier); -#ifdef CONFIG_HAVE_DEC_LOCK -EXPORT_SYMBOL(_atomic_dec_and_lock); -#endif - -EXPORT_SYMBOL(__PAGE_KERNEL); - -#ifdef CONFIG_HIGHMEM -EXPORT_SYMBOL(kmap); -EXPORT_SYMBOL(kunmap); -EXPORT_SYMBOL(kmap_atomic); -EXPORT_SYMBOL(kunmap_atomic); -EXPORT_SYMBOL(kmap_atomic_to_page); -#endif - -#if defined(CONFIG_X86_SPEEDSTEP_SMI) || defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE) -EXPORT_SYMBOL(ist_info); #endif EXPORT_SYMBOL(csum_partial); diff --git a/arch/i386/kernel/i387.c b/arch/i386/kernel/i387.c index c55e037f08f7..b817168d9c62 100644 --- a/arch/i386/kernel/i387.c +++ b/arch/i386/kernel/i387.c @@ -10,6 +10,7 @@ #include <linux/config.h> #include <linux/sched.h> +#include <linux/module.h> #include <asm/processor.h> #include <asm/i387.h> #include <asm/math_emu.h> @@ -79,6 +80,7 @@ void kernel_fpu_begin(void) } clts(); } +EXPORT_SYMBOL_GPL(kernel_fpu_begin); void restore_fpu( struct task_struct *tsk ) { @@ -526,6 +528,7 @@ int dump_fpu( struct pt_regs *regs, struct user_i387_struct *fpu ) return fpvalid; } +EXPORT_SYMBOL(dump_fpu); int dump_task_fpu(struct task_struct *tsk, struct user_i387_struct *fpu) { diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 7a324e8b86f9..08540bc4ba3e 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -31,7 +31,7 @@ #include <linux/mc146818rtc.h> #include <linux/compiler.h> #include <linux/acpi.h> - +#include <linux/module.h> #include <linux/sysdev.h> #include <asm/io.h> #include <asm/smp.h> @@ -812,6 +812,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) } return best_guess; } +EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); /* * This function currently is only a helper for the i386 smp boot process where @@ -1659,6 +1660,12 @@ static void __init setup_ioapic_ids_from_mpc(void) unsigned long flags; /* + * Don't check I/O APIC IDs for xAPIC systems. They have + * no meaning without the serial APIC bus. + */ + if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86 < 15)) + return; + /* * This is broken; anything with a real cpu count has to * circumvent this idiocy regardless. */ @@ -1684,10 +1691,6 @@ static void __init setup_ioapic_ids_from_mpc(void) mp_ioapics[apic].mpc_apicid = reg_00.bits.ID; } - /* Don't check I/O APIC IDs for some xAPIC systems. They have - * no meaning without the serial APIC bus. */ - if (NO_IOAPIC_CHECK) - continue; /* * Sanity check, is the ID really free? Every APIC in a * system must have a unique ID or we get lots of nice diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index 59ff9b455069..3762f6b35ab2 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -23,6 +23,9 @@ * Rusty Russell). * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes * interface to access function arguments. + * 2005-May Hien Nguyen <hien@us.ibm.com>, Jim Keniston + * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi + * <prasanna@in.ibm.com> added function-return probes. */ #include <linux/config.h> @@ -30,15 +33,14 @@ #include <linux/ptrace.h> #include <linux/spinlock.h> #include <linux/preempt.h> +#include <asm/cacheflush.h> #include <asm/kdebug.h> #include <asm/desc.h> -/* kprobe_status settings */ -#define KPROBE_HIT_ACTIVE 0x00000001 -#define KPROBE_HIT_SS 0x00000002 - static struct kprobe *current_kprobe; static unsigned long kprobe_status, kprobe_old_eflags, kprobe_saved_eflags; +static struct kprobe *kprobe_prev; +static unsigned long kprobe_status_prev, kprobe_old_eflags_prev, kprobe_saved_eflags_prev; static struct pt_regs jprobe_saved_regs; static long *jprobe_saved_esp; /* copy of the kernel stack at the probe fire time */ @@ -68,16 +70,50 @@ int arch_prepare_kprobe(struct kprobe *p) void arch_copy_kprobe(struct kprobe *p) { memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + p->opcode = *p->addr; } -void arch_remove_kprobe(struct kprobe *p) +void arch_arm_kprobe(struct kprobe *p) { + *p->addr = BREAKPOINT_INSTRUCTION; + flush_icache_range((unsigned long) p->addr, + (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } -static inline void disarm_kprobe(struct kprobe *p, struct pt_regs *regs) +void arch_disarm_kprobe(struct kprobe *p) { *p->addr = p->opcode; - regs->eip = (unsigned long)p->addr; + flush_icache_range((unsigned long) p->addr, + (unsigned long) p->addr + sizeof(kprobe_opcode_t)); +} + +void arch_remove_kprobe(struct kprobe *p) +{ +} + +static inline void save_previous_kprobe(void) +{ + kprobe_prev = current_kprobe; + kprobe_status_prev = kprobe_status; + kprobe_old_eflags_prev = kprobe_old_eflags; + kprobe_saved_eflags_prev = kprobe_saved_eflags; +} + +static inline void restore_previous_kprobe(void) +{ + current_kprobe = kprobe_prev; + kprobe_status = kprobe_status_prev; + kprobe_old_eflags = kprobe_old_eflags_prev; + kprobe_saved_eflags = kprobe_saved_eflags_prev; +} + +static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs) +{ + current_kprobe = p; + kprobe_saved_eflags = kprobe_old_eflags + = (regs->eflags & (TF_MASK | IF_MASK)); + if (is_IF_modifier(p->opcode)) + kprobe_saved_eflags &= ~IF_MASK; } static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) @@ -91,6 +127,50 @@ static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) regs->eip = (unsigned long)&p->ainsn.insn; } +struct task_struct *arch_get_kprobe_task(void *ptr) +{ + return ((struct thread_info *) (((unsigned long) ptr) & + (~(THREAD_SIZE -1))))->task; +} + +void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) +{ + unsigned long *sara = (unsigned long *)®s->esp; + struct kretprobe_instance *ri; + static void *orig_ret_addr; + + /* + * Save the return address when the return probe hits + * the first time, and use it to populate the (krprobe + * instance)->ret_addr for subsequent return probes at + * the same addrress since stack address would have + * the kretprobe_trampoline by then. + */ + if (((void*) *sara) != kretprobe_trampoline) + orig_ret_addr = (void*) *sara; + + if ((ri = get_free_rp_inst(rp)) != NULL) { + ri->rp = rp; + ri->stack_addr = sara; + ri->ret_addr = orig_ret_addr; + add_rp_inst(ri); + /* Replace the return addr with trampoline addr */ + *sara = (unsigned long) &kretprobe_trampoline; + } else { + rp->nmissed++; + } +} + +void arch_kprobe_flush_task(struct task_struct *tk) +{ + struct kretprobe_instance *ri; + while ((ri = get_rp_inst_tsk(tk)) != NULL) { + *((unsigned long *)(ri->stack_addr)) = + (unsigned long) ri->ret_addr; + recycle_rp_inst(ri); + } +} + /* * Interrupts are disabled on entry as trap3 is an interrupt gate and they * remain disabled thorough out this function. @@ -127,8 +207,18 @@ static int kprobe_handler(struct pt_regs *regs) unlock_kprobes(); goto no_kprobe; } - disarm_kprobe(p, regs); - ret = 1; + /* We have reentered the kprobe_handler(), since + * another probe was hit while within the handler. + * We here save the original kprobes variables and + * just single step on the instruction of the new probe + * without calling any user handlers. + */ + save_previous_kprobe(); + set_current_kprobe(p, regs); + p->nmissed++; + prepare_singlestep(p, regs); + kprobe_status = KPROBE_REENTER; + return 1; } else { p = current_kprobe; if (p->break_handler && p->break_handler(p, regs)) { @@ -163,11 +253,7 @@ static int kprobe_handler(struct pt_regs *regs) } kprobe_status = KPROBE_HIT_ACTIVE; - current_kprobe = p; - kprobe_saved_eflags = kprobe_old_eflags - = (regs->eflags & (TF_MASK | IF_MASK)); - if (is_IF_modifier(p->opcode)) - kprobe_saved_eflags &= ~IF_MASK; + set_current_kprobe(p, regs); if (p->pre_handler && p->pre_handler(p, regs)) /* handler has already set things up, so skip ss setup */ @@ -184,6 +270,55 @@ no_kprobe: } /* + * For function-return probes, init_kprobes() establishes a probepoint + * here. When a retprobed function returns, this probe is hit and + * trampoline_probe_handler() runs, calling the kretprobe's handler. + */ + void kretprobe_trampoline_holder(void) + { + asm volatile ( ".global kretprobe_trampoline\n" + "kretprobe_trampoline: \n" + "nop\n"); + } + +/* + * Called when we hit the probe point at kretprobe_trampoline + */ +int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct task_struct *tsk; + struct kretprobe_instance *ri; + struct hlist_head *head; + struct hlist_node *node; + unsigned long *sara = ((unsigned long *) ®s->esp) - 1; + + tsk = arch_get_kprobe_task(sara); + head = kretprobe_inst_table_head(tsk); + + hlist_for_each_entry(ri, node, head, hlist) { + if (ri->stack_addr == sara && ri->rp) { + if (ri->rp->handler) + ri->rp->handler(ri, regs); + } + } + return 0; +} + +void trampoline_post_handler(struct kprobe *p, struct pt_regs *regs, + unsigned long flags) +{ + struct kretprobe_instance *ri; + /* RA already popped */ + unsigned long *sara = ((unsigned long *)®s->esp) - 1; + + while ((ri = get_rp_inst(sara))) { + regs->eip = (unsigned long)ri->ret_addr; + recycle_rp_inst(ri); + } + regs->eflags &= ~TF_MASK; +} + +/* * Called after single-stepping. p->addr is the address of the * instruction whose first byte has been replaced by the "int 3" * instruction. To avoid the SMP problems that can occur when we @@ -263,13 +398,22 @@ static inline int post_kprobe_handler(struct pt_regs *regs) if (!kprobe_running()) return 0; - if (current_kprobe->post_handler) + if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) { + kprobe_status = KPROBE_HIT_SSDONE; current_kprobe->post_handler(current_kprobe, regs, 0); + } - resume_execution(current_kprobe, regs); + if (current_kprobe->post_handler != trampoline_post_handler) + resume_execution(current_kprobe, regs); regs->eflags |= kprobe_saved_eflags; + /*Restore back the original saved kprobes variables and continue. */ + if (kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(); + goto out; + } unlock_kprobes(); +out: preempt_enable_no_resched(); /* diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index 1347ab4939e7..383a11600d2c 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -914,7 +914,10 @@ void __init mp_register_ioapic ( mp_ioapics[idx].mpc_apicaddr = address; set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); - mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id); + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 < 15)) + mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id); + else + mp_ioapics[idx].mpc_apicid = id; mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); /* @@ -1055,11 +1058,20 @@ void __init mp_config_acpi_legacy_irqs (void) } } +#define MAX_GSI_NUM 4096 + int mp_register_gsi (u32 gsi, int edge_level, int active_high_low) { int ioapic = -1; int ioapic_pin = 0; int idx, bit = 0; + static int pci_irq = 16; + /* + * Mapping between Global System Interrups, which + * represent all possible interrupts, and IRQs + * assigned to actual devices. + */ + static int gsi_to_irq[MAX_GSI_NUM]; #ifdef CONFIG_ACPI_BUS /* Don't set up the ACPI SCI because it's already set up */ @@ -1094,11 +1106,26 @@ int mp_register_gsi (u32 gsi, int edge_level, int active_high_low) if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) { Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", mp_ioapic_routing[ioapic].apic_id, ioapic_pin); - return gsi; + return gsi_to_irq[gsi]; } mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit); + if (edge_level) { + /* + * For PCI devices assign IRQs in order, avoiding gaps + * due to unused I/O APIC pins. + */ + int irq = gsi; + if (gsi < MAX_GSI_NUM) { + gsi = pci_irq++; + gsi_to_irq[irq] = gsi; + } else { + printk(KERN_ERR "GSI %u is too high\n", gsi); + return gsi; + } + } + io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, edge_level == ACPI_EDGE_SENSITIVE ? 0 : 1, active_high_low == ACPI_ACTIVE_HIGH ? 0 : 1); diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 2c0ee9c2d020..da6c46d667cb 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -28,8 +28,7 @@ #include <linux/sysctl.h> #include <asm/smp.h> -#include <asm/mtrr.h> -#include <asm/mpspec.h> +#include <asm/div64.h> #include <asm/nmi.h> #include "mach_traps.h" @@ -324,6 +323,16 @@ static void clear_msr_range(unsigned int base, unsigned int n) wrmsr(base+i, 0, 0); } +static inline void write_watchdog_counter(const char *descr) +{ + u64 count = (u64)cpu_khz * 1000; + + do_div(count, nmi_hz); + if(descr) + Dprintk("setting %s to -0x%08Lx\n", descr, count); + wrmsrl(nmi_perfctr_msr, 0 - count); +} + static void setup_k7_watchdog(void) { unsigned int evntsel; @@ -339,8 +348,7 @@ static void setup_k7_watchdog(void) | K7_NMI_EVENT; wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); - Dprintk("setting K7_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000)); - wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1); + write_watchdog_counter("K7_PERFCTR0"); apic_write(APIC_LVTPC, APIC_DM_NMI); evntsel |= K7_EVNTSEL_ENABLE; wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); @@ -361,8 +369,7 @@ static void setup_p6_watchdog(void) | P6_NMI_EVENT; wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); - Dprintk("setting P6_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000)); - wrmsr(MSR_P6_PERFCTR0, -(cpu_khz/nmi_hz*1000), 0); + write_watchdog_counter("P6_PERFCTR0"); apic_write(APIC_LVTPC, APIC_DM_NMI); evntsel |= P6_EVNTSEL0_ENABLE; wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); @@ -402,8 +409,7 @@ static int setup_p4_watchdog(void) wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0); wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0); - Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000)); - wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1); + write_watchdog_counter("P4_IQ_COUNTER0"); apic_write(APIC_LVTPC, APIC_DM_NMI); wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); return 1; @@ -518,7 +524,7 @@ void nmi_watchdog_tick (struct pt_regs * regs) * other P6 variant */ apic_write(APIC_LVTPC, APIC_DM_NMI); } - wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1); + write_watchdog_counter(NULL); } } diff --git a/arch/i386/kernel/pci-dma.c b/arch/i386/kernel/pci-dma.c index 4de2e03c7b45..1e51427cc9eb 100644 --- a/arch/i386/kernel/pci-dma.c +++ b/arch/i386/kernel/pci-dma.c @@ -11,6 +11,7 @@ #include <linux/mm.h> #include <linux/string.h> #include <linux/pci.h> +#include <linux/module.h> #include <asm/io.h> struct dma_coherent_mem { @@ -54,6 +55,7 @@ void *dma_alloc_coherent(struct device *dev, size_t size, } return ret; } +EXPORT_SYMBOL(dma_alloc_coherent); void dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle) @@ -68,6 +70,7 @@ void dma_free_coherent(struct device *dev, size_t size, } else free_pages((unsigned long)vaddr, order); } +EXPORT_SYMBOL(dma_free_coherent); int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, dma_addr_t device_addr, size_t size, int flags) diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 96e3ea6b17c7..aea2ce1145df 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -37,6 +37,7 @@ #include <linux/kallsyms.h> #include <linux/ptrace.h> #include <linux/random.h> +#include <linux/kprobes.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -73,6 +74,7 @@ unsigned long thread_saved_pc(struct task_struct *tsk) * Powermanagement idle function, if any.. */ void (*pm_idle)(void); +EXPORT_SYMBOL(pm_idle); static DEFINE_PER_CPU(unsigned int, cpu_idle_state); void disable_hlt(void) @@ -105,6 +107,9 @@ void default_idle(void) cpu_relax(); } } +#ifdef CONFIG_APM_MODULE +EXPORT_SYMBOL(default_idle); +#endif /* * On SMP it's slightly faster (but much more power-consuming!) @@ -262,7 +267,7 @@ void show_regs(struct pt_regs * regs) printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id()); print_symbol("EIP is at %s\n", regs->eip); - if (regs->xcs & 3) + if (user_mode(regs)) printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); printk(" EFLAGS: %08lx %s (%s)\n", regs->eflags, print_tainted(), system_utsname.release); @@ -325,6 +330,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) /* Ok, create the new process.. */ return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); } +EXPORT_SYMBOL(kernel_thread); /* * Free current thread data structures etc.. @@ -334,6 +340,13 @@ void exit_thread(void) struct task_struct *tsk = current; struct thread_struct *t = &tsk->thread; + /* + * Remove function-return probe instances associated with this task + * and put them back on the free list. Do not insert an exit probe for + * this function, it will be disabled by kprobe_flush_task if you do. + */ + kprobe_flush_task(tsk); + /* The process may have allocated an io port bitmap... nuke it. */ if (unlikely(NULL != t->io_bitmap_ptr)) { int cpu = get_cpu(); @@ -357,6 +370,13 @@ void flush_thread(void) { struct task_struct *tsk = current; + /* + * Remove function-return probe instances associated with this task + * and put them back on the free list. Do not insert an exit probe for + * this function, it will be disabled by kprobe_flush_task if you do. + */ + kprobe_flush_task(tsk); + memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); /* @@ -508,6 +528,7 @@ void dump_thread(struct pt_regs * regs, struct user * dump) dump->u_fpvalid = dump_fpu (regs, &dump->i387); } +EXPORT_SYMBOL(dump_thread); /* * Capture the user space registers if the task is not running (in user space) @@ -627,13 +648,13 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas * Now maybe reload the debug registers */ if (unlikely(next->debugreg[7])) { - loaddebug(next, 0); - loaddebug(next, 1); - loaddebug(next, 2); - loaddebug(next, 3); + set_debugreg(current->thread.debugreg[0], 0); + set_debugreg(current->thread.debugreg[1], 1); + set_debugreg(current->thread.debugreg[2], 2); + set_debugreg(current->thread.debugreg[3], 3); /* no 4 and 5 */ - loaddebug(next, 6); - loaddebug(next, 7); + set_debugreg(current->thread.debugreg[6], 6); + set_debugreg(current->thread.debugreg[7], 7); } if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) @@ -731,6 +752,7 @@ unsigned long get_wchan(struct task_struct *p) } while (count++ < 16); return 0; } +EXPORT_SYMBOL(get_wchan); /* * sys_alloc_thread_area: get a yet unused TLS descriptor index. diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index e34f651fa13c..0da59b42843c 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -668,7 +668,7 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) info.si_code = TRAP_BRKPT; /* User-mode eip? */ - info.si_addr = user_mode(regs) ? (void __user *) regs->eip : NULL; + info.si_addr = user_mode_vm(regs) ? (void __user *) regs->eip : NULL; /* Send us the fakey SIGTRAP */ force_sig_info(SIGTRAP, &info, tsk); diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c index 6dc27eb70ee7..db912209a8d3 100644 --- a/arch/i386/kernel/reboot.c +++ b/arch/i386/kernel/reboot.c @@ -2,6 +2,7 @@ * linux/arch/i386/kernel/reboot.c */ +#include <linux/config.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/delay.h> @@ -19,6 +20,7 @@ * Power off function, if any */ void (*pm_power_off)(void); +EXPORT_SYMBOL(pm_power_off); static int reboot_mode; static int reboot_thru_bios; @@ -295,6 +297,9 @@ void machine_real_restart(unsigned char *code, int length) : : "i" ((void *) (0x1000 - sizeof (real_mode_switch) - 100))); } +#ifdef CONFIG_APM_MODULE +EXPORT_SYMBOL(machine_real_restart); +#endif void machine_restart(char * __unused) { diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 2bfbddebdbf8..30406fd0b64c 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -23,8 +23,10 @@ * This file handles the architecture-dependent parts of initialization */ +#include <linux/config.h> #include <linux/sched.h> #include <linux/mm.h> +#include <linux/mmzone.h> #include <linux/tty.h> #include <linux/ioport.h> #include <linux/acpi.h> @@ -73,6 +75,7 @@ EXPORT_SYMBOL(efi_enabled); struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; /* common cpu data for all cpus */ struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; +EXPORT_SYMBOL(boot_cpu_data); unsigned long mmu_cr4_features; @@ -90,12 +93,18 @@ extern acpi_interrupt_flags acpi_sci_flags; /* for MCA, but anyone else can use it if they want */ unsigned int machine_id; +#ifdef CONFIG_MCA +EXPORT_SYMBOL(machine_id); +#endif unsigned int machine_submodel_id; unsigned int BIOS_revision; unsigned int mca_pentium_flag; /* For PCI or other memory-mapped resources */ unsigned long pci_mem_start = 0x10000000; +#ifdef CONFIG_PCI +EXPORT_SYMBOL(pci_mem_start); +#endif /* Boot loader ID as an integer, for the benefit of proc_dointvec */ int bootloader_type; @@ -107,14 +116,26 @@ static unsigned int highmem_pages = -1; * Setup options */ struct drive_info_struct { char dummy[32]; } drive_info; +#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || \ + defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE) +EXPORT_SYMBOL(drive_info); +#endif struct screen_info screen_info; +#ifdef CONFIG_VT +EXPORT_SYMBOL(screen_info); +#endif struct apm_info apm_info; +EXPORT_SYMBOL(apm_info); struct sys_desc_table_struct { unsigned short length; unsigned char table[0]; }; struct edid_info edid_info; struct ist_info ist_info; +#if defined(CONFIG_X86_SPEEDSTEP_SMI) || \ + defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE) +EXPORT_SYMBOL(ist_info); +#endif struct e820map e820; extern void early_cpu_init(void); @@ -1022,7 +1043,7 @@ static void __init reserve_ebda_region(void) reserve_bootmem(addr, PAGE_SIZE); } -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_NEED_MULTIPLE_NODES void __init setup_bootmem_allocator(void); static unsigned long __init setup_memory(void) { @@ -1072,9 +1093,9 @@ void __init zone_sizes_init(void) free_area_init(zones_size); } #else -extern unsigned long setup_memory(void); +extern unsigned long __init setup_memory(void); extern void zone_sizes_init(void); -#endif /* !CONFIG_DISCONTIGMEM */ +#endif /* !CONFIG_NEED_MULTIPLE_NODES */ void __init setup_bootmem_allocator(void) { @@ -1475,6 +1496,7 @@ void __init setup_arch(char **cmdline_p) #endif paging_init(); remapped_pgdat_init(); + sparse_init(); zone_sizes_init(); /* diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c index ea46d028af08..b9b8f4e20fad 100644 --- a/arch/i386/kernel/signal.c +++ b/arch/i386/kernel/signal.c @@ -346,8 +346,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) extern void __user __kernel_sigreturn; extern void __user __kernel_rt_sigreturn; -static void setup_frame(int sig, struct k_sigaction *ka, - sigset_t *set, struct pt_regs * regs) +static int setup_frame(int sig, struct k_sigaction *ka, + sigset_t *set, struct pt_regs * regs) { void __user *restorer; struct sigframe __user *frame; @@ -429,13 +429,14 @@ static void setup_frame(int sig, struct k_sigaction *ka, current->comm, current->pid, frame, regs->eip, frame->pretcode); #endif - return; + return 1; give_sigsegv: force_sigsegv(sig, current); + return 0; } -static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, +static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs * regs) { void __user *restorer; @@ -522,20 +523,23 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, current->comm, current->pid, frame, regs->eip, frame->pretcode); #endif - return; + return 1; give_sigsegv: force_sigsegv(sig, current); + return 0; } /* * OK, we're invoking a handler */ -static void +static int handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, sigset_t *oldset, struct pt_regs * regs) { + int ret; + /* Are we from a system call? */ if (regs->orig_eax >= 0) { /* If so, check system call restarting.. */ @@ -569,17 +573,19 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, /* Set up the stack frame */ if (ka->sa.sa_flags & SA_SIGINFO) - setup_rt_frame(sig, ka, info, oldset, regs); + ret = setup_rt_frame(sig, ka, info, oldset, regs); else - setup_frame(sig, ka, oldset, regs); + ret = setup_frame(sig, ka, oldset, regs); - if (!(ka->sa.sa_flags & SA_NODEFER)) { + if (ret && !(ka->sa.sa_flags & SA_NODEFER)) { spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); sigaddset(¤t->blocked,sig); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); } + + return ret; } /* @@ -599,7 +605,7 @@ int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset) * kernel mode. Just return without doing anything * if so. */ - if ((regs->xcs & 3) != 3) + if (!user_mode(regs)) return 1; if (current->flags & PF_FREEZE) { @@ -618,12 +624,11 @@ int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset) * inside the kernel. */ if (unlikely(current->thread.debugreg[7])) { - loaddebug(¤t->thread, 7); + set_debugreg(current->thread.debugreg[7], 7); } /* Whee! Actually deliver the signal. */ - handle_signal(signr, &info, &ka, oldset, regs); - return 1; + return handle_signal(signr, &info, &ka, oldset, regs); } no_signal: diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index 6223c33ac91c..68be7d0c7238 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c @@ -19,6 +19,7 @@ #include <linux/mc146818rtc.h> #include <linux/cache.h> #include <linux/interrupt.h> +#include <linux/module.h> #include <asm/mtrr.h> #include <asm/tlbflush.h> @@ -452,6 +453,7 @@ void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) preempt_enable(); } +EXPORT_SYMBOL(flush_tlb_page); static void do_flush_tlb_all(void* info) { @@ -547,6 +549,7 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic, return 0; } +EXPORT_SYMBOL(smp_call_function); static void stop_this_cpu (void * dummy) { diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index bc1bb6919e6a..c20d96d5c15c 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -60,6 +60,9 @@ static int __initdata smp_b_stepping; /* Number of siblings per CPU package */ int smp_num_siblings = 1; +#ifdef CONFIG_X86_HT +EXPORT_SYMBOL(smp_num_siblings); +#endif int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */ EXPORT_SYMBOL(phys_proc_id); int cpu_core_id[NR_CPUS]; /* Core ID of each logical CPU */ @@ -67,13 +70,16 @@ EXPORT_SYMBOL(cpu_core_id); /* bitmap of online cpus */ cpumask_t cpu_online_map; +EXPORT_SYMBOL(cpu_online_map); cpumask_t cpu_callin_map; cpumask_t cpu_callout_map; +EXPORT_SYMBOL(cpu_callout_map); static cpumask_t smp_commenced_mask; /* Per CPU bogomips and other parameters */ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; +EXPORT_SYMBOL(cpu_data); u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = 0xff }; @@ -199,7 +205,7 @@ static void __init synchronize_tsc_bp (void) unsigned long long t0; unsigned long long sum, avg; long long delta; - unsigned long one_usec; + unsigned int one_usec; int buggy = 0; printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus()); @@ -885,8 +891,14 @@ static void smp_tune_scheduling (void) static int boot_cpu_logical_apicid; /* Where the IO area was mapped on multiquad, always 0 otherwise */ void *xquad_portio; +#ifdef CONFIG_X86_NUMAQ +EXPORT_SYMBOL(xquad_portio); +#endif cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; +#ifdef CONFIG_X86_HT +EXPORT_SYMBOL(cpu_sibling_map); +#endif cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; EXPORT_SYMBOL(cpu_core_map); diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index a0dcb7c87c30..e68d9fdb0759 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -77,11 +77,13 @@ u64 jiffies_64 = INITIAL_JIFFIES; EXPORT_SYMBOL(jiffies_64); -unsigned long cpu_khz; /* Detected as we calibrate the TSC */ +unsigned int cpu_khz; /* Detected as we calibrate the TSC */ +EXPORT_SYMBOL(cpu_khz); extern unsigned long wall_jiffies; DEFINE_SPINLOCK(rtc_lock); +EXPORT_SYMBOL(rtc_lock); DEFINE_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); @@ -324,6 +326,8 @@ unsigned long get_cmos_time(void) return retval; } +EXPORT_SYMBOL(get_cmos_time); + static void sync_cmos_clock(unsigned long dummy); static struct timer_list sync_cmos_timer = diff --git a/arch/i386/kernel/timers/common.c b/arch/i386/kernel/timers/common.c index 8e201219f525..37353bd31803 100644 --- a/arch/i386/kernel/timers/common.c +++ b/arch/i386/kernel/timers/common.c @@ -139,6 +139,15 @@ bad_calibration: } #endif + +unsigned long read_timer_tsc(void) +{ + unsigned long retval; + rdtscl(retval); + return retval; +} + + /* calculate cpu_khz */ void init_cpu_khz(void) { @@ -154,7 +163,8 @@ void init_cpu_khz(void) :"=a" (cpu_khz), "=d" (edx) :"r" (tsc_quotient), "0" (eax), "1" (edx)); - printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); + printk("Detected %u.%03u MHz processor.\n", + cpu_khz / 1000, cpu_khz % 1000); } } } diff --git a/arch/i386/kernel/timers/timer.c b/arch/i386/kernel/timers/timer.c index a3d6a288088b..7e39ed8e33f8 100644 --- a/arch/i386/kernel/timers/timer.c +++ b/arch/i386/kernel/timers/timer.c @@ -64,3 +64,12 @@ struct timer_opts* __init select_timer(void) panic("select_timer: Cannot find a suitable timer\n"); return NULL; } + +int read_current_timer(unsigned long *timer_val) +{ + if (cur_timer->read_timer) { + *timer_val = cur_timer->read_timer(); + return 0; + } + return -1; +} diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c index f778f471a09a..d766e0963ac1 100644 --- a/arch/i386/kernel/timers/timer_hpet.c +++ b/arch/i386/kernel/timers/timer_hpet.c @@ -158,7 +158,7 @@ static int __init init_hpet(char* override) { unsigned long eax=0, edx=1000; ASM_DIV64_REG(cpu_khz, edx, tsc_quotient, eax, edx); - printk("Detected %lu.%03lu MHz processor.\n", + printk("Detected %u.%03u MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); } set_cyc2ns_scale(cpu_khz/1000); @@ -186,6 +186,7 @@ static struct timer_opts timer_hpet = { .get_offset = get_offset_hpet, .monotonic_clock = monotonic_clock_hpet, .delay = delay_hpet, + .read_timer = read_timer_tsc, }; struct init_timer_opts __initdata timer_hpet_init = { diff --git a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c index d77f22030fe6..4ef20e663498 100644 --- a/arch/i386/kernel/timers/timer_pm.c +++ b/arch/i386/kernel/timers/timer_pm.c @@ -246,6 +246,7 @@ static struct timer_opts timer_pmtmr = { .get_offset = get_offset_pmtmr, .monotonic_clock = monotonic_clock_pmtmr, .delay = delay_pmtmr, + .read_timer = read_timer_tsc, }; struct init_timer_opts __initdata timer_pmtmr_init = { diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c index 180444d87824..54c36b182021 100644 --- a/arch/i386/kernel/timers/timer_tsc.c +++ b/arch/i386/kernel/timers/timer_tsc.c @@ -256,7 +256,7 @@ static unsigned long loops_per_jiffy_ref = 0; #ifndef CONFIG_SMP static unsigned long fast_gettimeoffset_ref = 0; -static unsigned long cpu_khz_ref = 0; +static unsigned int cpu_khz_ref = 0; #endif static int @@ -323,7 +323,7 @@ static inline void cpufreq_delayed_get(void) { return; } int recalibrate_cpu_khz(void) { #ifndef CONFIG_SMP - unsigned long cpu_khz_old = cpu_khz; + unsigned int cpu_khz_old = cpu_khz; if (cpu_has_tsc) { init_cpu_khz(); @@ -534,7 +534,8 @@ static int __init init_tsc(char* override) :"=a" (cpu_khz), "=d" (edx) :"r" (tsc_quotient), "0" (eax), "1" (edx)); - printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); + printk("Detected %u.%03u MHz processor.\n", + cpu_khz / 1000, cpu_khz % 1000); } set_cyc2ns_scale(cpu_khz/1000); return 0; @@ -572,6 +573,7 @@ static struct timer_opts timer_tsc = { .get_offset = get_offset_tsc, .monotonic_clock = monotonic_clock_tsc, .delay = delay_tsc, + .read_timer = read_timer_tsc, }; struct init_timer_opts __initdata timer_tsc_init = { diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 83c579e82a81..e4d4e2162c7a 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -104,6 +104,7 @@ int register_die_notifier(struct notifier_block *nb) spin_unlock_irqrestore(&die_notifier_lock, flags); return err; } +EXPORT_SYMBOL(register_die_notifier); static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) { @@ -209,7 +210,7 @@ void show_registers(struct pt_regs *regs) esp = (unsigned long) (®s->esp); ss = __KERNEL_DS; - if (regs->xcs & 3) { + if (user_mode(regs)) { in_kernel = 0; esp = regs->esp; ss = regs->xss & 0xffff; @@ -265,7 +266,7 @@ static void handle_BUG(struct pt_regs *regs) char c; unsigned long eip; - if (regs->xcs & 3) + if (user_mode(regs)) goto no_bug; /* Not in kernel */ eip = regs->eip; @@ -353,7 +354,7 @@ void die(const char * str, struct pt_regs * regs, long err) static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err) { - if (!(regs->eflags & VM_MASK) && !(3 & regs->xcs)) + if (!user_mode_vm(regs)) die(str, regs, err); } @@ -366,7 +367,7 @@ static void do_trap(int trapnr, int signr, char *str, int vm86, goto trap_signal; } - if (!(regs->xcs & 3)) + if (!user_mode(regs)) goto kernel_trap; trap_signal: { @@ -488,7 +489,7 @@ fastcall void do_general_protection(struct pt_regs * regs, long error_code) if (regs->eflags & VM_MASK) goto gp_in_vm86; - if (!(regs->xcs & 3)) + if (!user_mode(regs)) goto gp_in_kernel; current->thread.error_code = error_code; @@ -636,11 +637,13 @@ void set_nmi_callback(nmi_callback_t callback) { nmi_callback = callback; } +EXPORT_SYMBOL_GPL(set_nmi_callback); void unset_nmi_callback(void) { nmi_callback = dummy_nmi_callback; } +EXPORT_SYMBOL_GPL(unset_nmi_callback); #ifdef CONFIG_KPROBES fastcall void do_int3(struct pt_regs *regs, long error_code) @@ -682,7 +685,7 @@ fastcall void do_debug(struct pt_regs * regs, long error_code) unsigned int condition; struct task_struct *tsk = current; - __asm__ __volatile__("movl %%db6,%0" : "=r" (condition)); + get_debugreg(condition, 6); if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, SIGTRAP) == NOTIFY_STOP) @@ -713,7 +716,7 @@ fastcall void do_debug(struct pt_regs * regs, long error_code) * check for kernel mode by just checking the CPL * of CS. */ - if ((regs->xcs & 3) == 0) + if (!user_mode(regs)) goto clear_TF_reenable; } @@ -724,9 +727,7 @@ fastcall void do_debug(struct pt_regs * regs, long error_code) * the signal is delivered. */ clear_dr7: - __asm__("movl %0,%%db7" - : /* no output */ - : "r" (0)); + set_debugreg(0, 7); return; debug_vm86: diff --git a/arch/i386/lib/dec_and_lock.c b/arch/i386/lib/dec_and_lock.c index ab43394dc775..8b81b2524fa6 100644 --- a/arch/i386/lib/dec_and_lock.c +++ b/arch/i386/lib/dec_and_lock.c @@ -8,6 +8,7 @@ */ #include <linux/spinlock.h> +#include <linux/module.h> #include <asm/atomic.h> int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) @@ -38,3 +39,4 @@ slow_path: spin_unlock(lock); return 0; } +EXPORT_SYMBOL(_atomic_dec_and_lock); diff --git a/arch/i386/lib/delay.c b/arch/i386/lib/delay.c index eb0cdfe9280f..c49a6acbee56 100644 --- a/arch/i386/lib/delay.c +++ b/arch/i386/lib/delay.c @@ -13,6 +13,7 @@ #include <linux/config.h> #include <linux/sched.h> #include <linux/delay.h> +#include <linux/module.h> #include <asm/processor.h> #include <asm/delay.h> #include <asm/timer.h> @@ -47,3 +48,8 @@ void __ndelay(unsigned long nsecs) { __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ } + +EXPORT_SYMBOL(__delay); +EXPORT_SYMBOL(__const_udelay); +EXPORT_SYMBOL(__udelay); +EXPORT_SYMBOL(__ndelay); diff --git a/arch/i386/lib/mmx.c b/arch/i386/lib/mmx.c index 01f8b1a2cc84..2afda94dffd3 100644 --- a/arch/i386/lib/mmx.c +++ b/arch/i386/lib/mmx.c @@ -3,6 +3,7 @@ #include <linux/string.h> #include <linux/sched.h> #include <linux/hardirq.h> +#include <linux/module.h> #include <asm/i387.h> @@ -397,3 +398,7 @@ void mmx_copy_page(void *to, void *from) else fast_copy_page(to, from); } + +EXPORT_SYMBOL(_mmx_memcpy); +EXPORT_SYMBOL(mmx_clear_page); +EXPORT_SYMBOL(mmx_copy_page); diff --git a/arch/i386/lib/usercopy.c b/arch/i386/lib/usercopy.c index 51aa2bbb0269..4cf981d70f45 100644 --- a/arch/i386/lib/usercopy.c +++ b/arch/i386/lib/usercopy.c @@ -84,6 +84,7 @@ __strncpy_from_user(char *dst, const char __user *src, long count) __do_strncpy_from_user(dst, src, count, res); return res; } +EXPORT_SYMBOL(__strncpy_from_user); /** * strncpy_from_user: - Copy a NUL terminated string from userspace. @@ -111,7 +112,7 @@ strncpy_from_user(char *dst, const char __user *src, long count) __do_strncpy_from_user(dst, src, count, res); return res; } - +EXPORT_SYMBOL(strncpy_from_user); /* * Zero Userspace @@ -157,6 +158,7 @@ clear_user(void __user *to, unsigned long n) __do_clear_user(to, n); return n; } +EXPORT_SYMBOL(clear_user); /** * __clear_user: - Zero a block of memory in user space, with less checking. @@ -175,6 +177,7 @@ __clear_user(void __user *to, unsigned long n) __do_clear_user(to, n); return n; } +EXPORT_SYMBOL(__clear_user); /** * strlen_user: - Get the size of a string in user space. @@ -218,6 +221,7 @@ long strnlen_user(const char __user *s, long n) :"cc"); return res & mask; } +EXPORT_SYMBOL(strnlen_user); #ifdef CONFIG_X86_INTEL_USERCOPY static unsigned long @@ -570,6 +574,7 @@ survive: n = __copy_user_intel(to, from, n); return n; } +EXPORT_SYMBOL(__copy_to_user_ll); unsigned long __copy_from_user_ll(void *to, const void __user *from, unsigned long n) @@ -581,6 +586,7 @@ __copy_from_user_ll(void *to, const void __user *from, unsigned long n) n = __copy_user_zeroing_intel(to, from, n); return n; } +EXPORT_SYMBOL(__copy_from_user_ll); /** * copy_to_user: - Copy a block of data into user space. diff --git a/arch/i386/mach-voyager/voyager_smp.c b/arch/i386/mach-voyager/voyager_smp.c index a6e0ddd65bd0..8c8527593da0 100644 --- a/arch/i386/mach-voyager/voyager_smp.c +++ b/arch/i386/mach-voyager/voyager_smp.c @@ -1288,7 +1288,7 @@ smp_local_timer_interrupt(struct pt_regs * regs) per_cpu(prof_counter, cpu); } - update_process_times(user_mode(regs)); + update_process_times(user_mode_vm(regs)); } if( ((1<<cpu) & voyager_extended_vic_processors) == 0) diff --git a/arch/i386/mm/Makefile b/arch/i386/mm/Makefile index fc3272506846..80908b5aa60f 100644 --- a/arch/i386/mm/Makefile +++ b/arch/i386/mm/Makefile @@ -4,7 +4,7 @@ obj-y := init.o pgtable.o fault.o ioremap.o extable.o pageattr.o mmap.o -obj-$(CONFIG_DISCONTIGMEM) += discontig.o +obj-$(CONFIG_NUMA) += discontig.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_HIGHMEM) += highmem.o obj-$(CONFIG_BOOT_IOREMAP) += boot_ioremap.o diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index 1726b4096b10..f429c871e845 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c @@ -29,12 +29,14 @@ #include <linux/highmem.h> #include <linux/initrd.h> #include <linux/nodemask.h> +#include <linux/module.h> #include <asm/e820.h> #include <asm/setup.h> #include <asm/mmzone.h> #include <bios_ebda.h> struct pglist_data *node_data[MAX_NUMNODES]; +EXPORT_SYMBOL(node_data); bootmem_data_t node0_bdata; /* @@ -42,12 +44,16 @@ bootmem_data_t node0_bdata; * populated the following initialisation. * * 1) node_online_map - the map of all nodes configured (online) in the system - * 2) physnode_map - the mapping between a pfn and owning node - * 3) node_start_pfn - the starting page frame number for a node + * 2) node_start_pfn - the starting page frame number for a node * 3) node_end_pfn - the ending page fram number for a node */ +unsigned long node_start_pfn[MAX_NUMNODES]; +unsigned long node_end_pfn[MAX_NUMNODES]; + +#ifdef CONFIG_DISCONTIGMEM /* + * 4) physnode_map - the mapping between a pfn and owning node * physnode_map keeps track of the physical memory layout of a generic * numa node on a 256Mb break (each element of the array will * represent 256Mb of memory and will be marked by the node id. so, @@ -59,6 +65,7 @@ bootmem_data_t node0_bdata; * physnode_map[8- ] = -1; */ s8 physnode_map[MAX_ELEMENTS] = { [0 ... (MAX_ELEMENTS - 1)] = -1}; +EXPORT_SYMBOL(physnode_map); void memory_present(int nid, unsigned long start, unsigned long end) { @@ -85,9 +92,7 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, return (nr_pages + 1) * sizeof(struct page); } - -unsigned long node_start_pfn[MAX_NUMNODES]; -unsigned long node_end_pfn[MAX_NUMNODES]; +#endif extern unsigned long find_max_low_pfn(void); extern void find_max_pfn(void); @@ -108,6 +113,9 @@ unsigned long node_remap_offset[MAX_NUMNODES]; void *node_remap_start_vaddr[MAX_NUMNODES]; void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); +void *node_remap_end_vaddr[MAX_NUMNODES]; +void *node_remap_alloc_vaddr[MAX_NUMNODES]; + /* * FLAT - support for basic PC memory model with discontig enabled, essentially * a single node with all available processors in it with a flat @@ -146,6 +154,21 @@ static void __init find_max_pfn_node(int nid) BUG(); } +/* Find the owning node for a pfn. */ +int early_pfn_to_nid(unsigned long pfn) +{ + int nid; + + for_each_node(nid) { + if (node_end_pfn[nid] == 0) + break; + if (node_start_pfn[nid] <= pfn && node_end_pfn[nid] >= pfn) + return nid; + } + + return 0; +} + /* * Allocate memory for the pg_data_t for this node via a crude pre-bootmem * method. For node zero take this from the bottom of memory, for @@ -163,6 +186,21 @@ static void __init allocate_pgdat(int nid) } } +void *alloc_remap(int nid, unsigned long size) +{ + void *allocation = node_remap_alloc_vaddr[nid]; + + size = ALIGN(size, L1_CACHE_BYTES); + + if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid]) + return 0; + + node_remap_alloc_vaddr[nid] += size; + memset(allocation, 0, size); + + return allocation; +} + void __init remap_numa_kva(void) { void *vaddr; @@ -170,8 +208,6 @@ void __init remap_numa_kva(void) int node; for_each_online_node(node) { - if (node == 0) - continue; for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT); set_pmd_pfn((ulong) vaddr, @@ -185,13 +221,9 @@ static unsigned long calculate_numa_remap_pages(void) { int nid; unsigned long size, reserve_pages = 0; + unsigned long pfn; for_each_online_node(nid) { - if (nid == 0) - continue; - if (!node_remap_size[nid]) - continue; - /* * The acpi/srat node info can show hot-add memroy zones * where memory could be added but not currently present. @@ -208,11 +240,24 @@ static unsigned long calculate_numa_remap_pages(void) size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES; /* now the roundup is correct, convert to PAGE_SIZE pages */ size = size * PTRS_PER_PTE; + + /* + * Validate the region we are allocating only contains valid + * pages. + */ + for (pfn = node_end_pfn[nid] - size; + pfn < node_end_pfn[nid]; pfn++) + if (!page_is_ram(pfn)) + break; + + if (pfn != node_end_pfn[nid]) + size = 0; + printk("Reserving %ld pages of KVA for lmem_map of node %d\n", size, nid); node_remap_size[nid] = size; - reserve_pages += size; node_remap_offset[nid] = reserve_pages; + reserve_pages += size; printk("Shrinking node %d from %ld pages to %ld pages\n", nid, node_end_pfn[nid], node_end_pfn[nid] - size); node_end_pfn[nid] -= size; @@ -265,12 +310,18 @@ unsigned long __init setup_memory(void) (ulong) pfn_to_kaddr(max_low_pfn)); for_each_online_node(nid) { node_remap_start_vaddr[nid] = pfn_to_kaddr( - (highstart_pfn + reserve_pages) - node_remap_offset[nid]); + highstart_pfn + node_remap_offset[nid]); + /* Init the node remap allocator */ + node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] + + (node_remap_size[nid] * PAGE_SIZE); + node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] + + ALIGN(sizeof(pg_data_t), PAGE_SIZE); + allocate_pgdat(nid); printk ("node %d will remap to vaddr %08lx - %08lx\n", nid, (ulong) node_remap_start_vaddr[nid], - (ulong) pfn_to_kaddr(highstart_pfn + reserve_pages - - node_remap_offset[nid] + node_remap_size[nid])); + (ulong) pfn_to_kaddr(highstart_pfn + + node_remap_offset[nid] + node_remap_size[nid])); } printk("High memory starts at vaddr %08lx\n", (ulong) pfn_to_kaddr(highstart_pfn)); @@ -333,23 +384,9 @@ void __init zone_sizes_init(void) } zholes_size = get_zholes_size(nid); - /* - * We let the lmem_map for node 0 be allocated from the - * normal bootmem allocator, but other nodes come from the - * remapped KVA area - mbligh - */ - if (!nid) - free_area_init_node(nid, NODE_DATA(nid), - zones_size, start, zholes_size); - else { - unsigned long lmem_map; - lmem_map = (unsigned long)node_remap_start_vaddr[nid]; - lmem_map += sizeof(pg_data_t) + PAGE_SIZE - 1; - lmem_map &= PAGE_MASK; - NODE_DATA(nid)->node_mem_map = (struct page *)lmem_map; - free_area_init_node(nid, NODE_DATA(nid), zones_size, - start, zholes_size); - } + + free_area_init_node(nid, NODE_DATA(nid), zones_size, start, + zholes_size); } return; } @@ -358,24 +395,26 @@ void __init set_highmem_pages_init(int bad_ppro) { #ifdef CONFIG_HIGHMEM struct zone *zone; + struct page *page; for_each_zone(zone) { - unsigned long node_pfn, node_high_size, zone_start_pfn; - struct page * zone_mem_map; - + unsigned long node_pfn, zone_start_pfn, zone_end_pfn; + if (!is_highmem(zone)) continue; - printk("Initializing %s for node %d\n", zone->name, - zone->zone_pgdat->node_id); - - node_high_size = zone->spanned_pages; - zone_mem_map = zone->zone_mem_map; zone_start_pfn = zone->zone_start_pfn; + zone_end_pfn = zone_start_pfn + zone->spanned_pages; + + printk("Initializing %s for node %d (%08lx:%08lx)\n", + zone->name, zone->zone_pgdat->node_id, + zone_start_pfn, zone_end_pfn); - for (node_pfn = 0; node_pfn < node_high_size; node_pfn++) { - one_highpage_init((struct page *)(zone_mem_map + node_pfn), - zone_start_pfn + node_pfn, bad_ppro); + for (node_pfn = zone_start_pfn; node_pfn < zone_end_pfn; node_pfn++) { + if (!pfn_valid(node_pfn)) + continue; + page = pfn_to_page(node_pfn); + one_highpage_init(page, node_pfn, bad_ppro); } } totalram_pages += totalhigh_pages; diff --git a/arch/i386/mm/highmem.c b/arch/i386/mm/highmem.c index fc4c4cad4e98..4b7aaf99d7ea 100644 --- a/arch/i386/mm/highmem.c +++ b/arch/i386/mm/highmem.c @@ -1,4 +1,5 @@ #include <linux/highmem.h> +#include <linux/module.h> void *kmap(struct page *page) { @@ -87,3 +88,8 @@ struct page *kmap_atomic_to_page(void *ptr) return pte_page(*pte); } +EXPORT_SYMBOL(kmap); +EXPORT_SYMBOL(kunmap); +EXPORT_SYMBOL(kmap_atomic); +EXPORT_SYMBOL(kunmap_atomic); +EXPORT_SYMBOL(kmap_atomic_to_page); diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 8766c771bb45..3672e2ef51ae 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -191,7 +191,7 @@ static inline int page_kills_ppro(unsigned long pagenr) extern int is_available_memory(efi_memory_desc_t *); -static inline int page_is_ram(unsigned long pagenr) +int page_is_ram(unsigned long pagenr) { int i; unsigned long addr, end; @@ -276,7 +276,9 @@ void __init one_highpage_init(struct page *page, int pfn, int bad_ppro) SetPageReserved(page); } -#ifndef CONFIG_DISCONTIGMEM +#ifdef CONFIG_NUMA +extern void set_highmem_pages_init(int); +#else static void __init set_highmem_pages_init(int bad_ppro) { int pfn; @@ -284,9 +286,7 @@ static void __init set_highmem_pages_init(int bad_ppro) one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); totalram_pages += totalhigh_pages; } -#else -extern void set_highmem_pages_init(int); -#endif /* !CONFIG_DISCONTIGMEM */ +#endif /* CONFIG_FLATMEM */ #else #define kmap_init() do { } while (0) @@ -295,12 +295,13 @@ extern void set_highmem_pages_init(int); #endif /* CONFIG_HIGHMEM */ unsigned long long __PAGE_KERNEL = _PAGE_KERNEL; +EXPORT_SYMBOL(__PAGE_KERNEL); unsigned long long __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC; -#ifndef CONFIG_DISCONTIGMEM -#define remap_numa_kva() do {} while (0) -#else +#ifdef CONFIG_NUMA extern void __init remap_numa_kva(void); +#else +#define remap_numa_kva() do {} while (0) #endif static void __init pagetable_init (void) @@ -525,7 +526,7 @@ static void __init set_max_mapnr_init(void) #else num_physpages = max_low_pfn; #endif -#ifndef CONFIG_DISCONTIGMEM +#ifdef CONFIG_FLATMEM max_mapnr = num_physpages; #endif } @@ -539,7 +540,7 @@ void __init mem_init(void) int tmp; int bad_ppro; -#ifndef CONFIG_DISCONTIGMEM +#ifdef CONFIG_FLATMEM if (!mem_map) BUG(); #endif diff --git a/arch/i386/mm/ioremap.c b/arch/i386/mm/ioremap.c index ab542792b27b..d393eefc7052 100644 --- a/arch/i386/mm/ioremap.c +++ b/arch/i386/mm/ioremap.c @@ -11,6 +11,7 @@ #include <linux/vmalloc.h> #include <linux/init.h> #include <linux/slab.h> +#include <linux/module.h> #include <asm/io.h> #include <asm/fixmap.h> #include <asm/cacheflush.h> @@ -165,7 +166,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l } return (void __iomem *) (offset + (char __iomem *)addr); } - +EXPORT_SYMBOL(__ioremap); /** * ioremap_nocache - map bus memory into CPU space @@ -222,6 +223,7 @@ void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size) return p; } +EXPORT_SYMBOL(ioremap_nocache); void iounmap(volatile void __iomem *addr) { @@ -255,6 +257,7 @@ out_unlock: write_unlock(&vmlist_lock); kfree(p); } +EXPORT_SYMBOL(iounmap); void __init *bt_ioremap(unsigned long phys_addr, unsigned long size) { diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c index dd81479ff88a..270c59f099a4 100644 --- a/arch/i386/mm/pgtable.c +++ b/arch/i386/mm/pgtable.c @@ -30,13 +30,14 @@ void show_mem(void) struct page *page; pg_data_t *pgdat; unsigned long i; + struct page_state ps; printk("Mem-info:\n"); show_free_areas(); printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { for (i = 0; i < pgdat->node_spanned_pages; ++i) { - page = pgdat->node_mem_map + i; + page = pgdat_page_nr(pgdat, i); total++; if (PageHighMem(page)) highmem++; @@ -53,6 +54,13 @@ void show_mem(void) printk("%d reserved pages\n",reserved); printk("%d pages shared\n",shared); printk("%d pages swap cached\n",cached); + + get_page_state(&ps); + printk("%lu pages dirty\n", ps.nr_dirty); + printk("%lu pages writeback\n", ps.nr_writeback); + printk("%lu pages mapped\n", ps.nr_mapped); + printk("%lu pages slab\n", ps.nr_slab); + printk("%lu pages pagetables\n", ps.nr_page_table_pages); } /* diff --git a/arch/i386/oprofile/backtrace.c b/arch/i386/oprofile/backtrace.c index 52d72e074f7f..65dfd2edb671 100644 --- a/arch/i386/oprofile/backtrace.c +++ b/arch/i386/oprofile/backtrace.c @@ -91,7 +91,7 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth) head = (struct frame_head *)regs->ebp; #endif - if (!user_mode(regs)) { + if (!user_mode_vm(regs)) { while (depth-- && valid_kernel_stack(head, regs)) head = dump_backtrace(head); return; diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c index da21b1d07c15..83458f81e661 100644 --- a/arch/i386/pci/irq.c +++ b/arch/i386/pci/irq.c @@ -227,6 +227,24 @@ static int pirq_via_set(struct pci_dev *router, struct pci_dev *dev, int pirq, i } /* + * The VIA pirq rules are nibble-based, like ALI, + * but without the ugly irq number munging. + * However, for 82C586, nibble map is different . + */ +static int pirq_via586_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + static unsigned int pirqmap[4] = { 3, 2, 5, 1 }; + return read_config_nybble(router, 0x55, pirqmap[pirq-1]); +} + +static int pirq_via586_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + static unsigned int pirqmap[4] = { 3, 2, 5, 1 }; + write_config_nybble(router, 0x55, pirqmap[pirq-1], irq); + return 1; +} + +/* * ITE 8330G pirq rules are nibble-based * FIXME: pirqmap may be { 1, 0, 3, 2 }, * 2+3 are both mapped to irq 9 on my system @@ -512,6 +530,10 @@ static __init int via_router_probe(struct irq_router *r, struct pci_dev *router, switch(device) { case PCI_DEVICE_ID_VIA_82C586_0: + r->name = "VIA"; + r->get = pirq_via586_get; + r->set = pirq_via586_set; + return 1; case PCI_DEVICE_ID_VIA_82C596: case PCI_DEVICE_ID_VIA_82C686: case PCI_DEVICE_ID_VIA_8231: diff --git a/arch/i386/pci/pcbios.c b/arch/i386/pci/pcbios.c index 141421b673b0..b9d65f0bc2d1 100644 --- a/arch/i386/pci/pcbios.c +++ b/arch/i386/pci/pcbios.c @@ -4,6 +4,7 @@ #include <linux/pci.h> #include <linux/init.h> +#include <linux/module.h> #include "pci.h" #include "pci-functions.h" @@ -456,7 +457,7 @@ struct irq_routing_table * __devinit pcibios_get_irq_routing_table(void) free_page(page); return rt; } - +EXPORT_SYMBOL(pcibios_get_irq_routing_table); int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq) { @@ -473,6 +474,7 @@ int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq) "S" (&pci_indirect)); return !(ret & 0xff00); } +EXPORT_SYMBOL(pcibios_set_irq_routing); static int __init pci_pcbios_init(void) { diff --git a/arch/i386/power/cpu.c b/arch/i386/power/cpu.c index cf337c673d92..6f521cf19a13 100644 --- a/arch/i386/power/cpu.c +++ b/arch/i386/power/cpu.c @@ -94,13 +94,13 @@ static void fix_processor_context(void) * Now maybe reload the debug registers */ if (current->thread.debugreg[7]){ - loaddebug(¤t->thread, 0); - loaddebug(¤t->thread, 1); - loaddebug(¤t->thread, 2); - loaddebug(¤t->thread, 3); - /* no 4 and 5 */ - loaddebug(¤t->thread, 6); - loaddebug(¤t->thread, 7); + set_debugreg(current->thread.debugreg[0], 0); + set_debugreg(current->thread.debugreg[1], 1); + set_debugreg(current->thread.debugreg[2], 2); + set_debugreg(current->thread.debugreg[3], 3); + /* no 4 and 5 */ + set_debugreg(current->thread.debugreg[6], 6); + set_debugreg(current->thread.debugreg[7], 7); } } diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index ce4dfa8b834d..01b78e7f992e 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -161,6 +161,8 @@ config IA64_PAGE_SIZE_64KB endchoice +source kernel/Kconfig.hz + config IA64_BRL_EMU bool depends on ITANIUM @@ -197,7 +199,7 @@ config HOLES_IN_ZONE bool default y if VIRTUAL_MEM_MAP -config DISCONTIGMEM +config ARCH_DISCONTIGMEM_ENABLE bool "Discontiguous memory support" depends on (IA64_DIG || IA64_SGI_SN2 || IA64_GENERIC || IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB) && NUMA && VIRTUAL_MEM_MAP default y if (IA64_SGI_SN2 || IA64_GENERIC) && NUMA @@ -300,6 +302,8 @@ config PREEMPT Say Y here if you are building a kernel for a desktop, embedded or real-time system. Say N if you are unsure. +source "mm/Kconfig" + config HAVE_DEC_LOCK bool depends on (SMP || PREEMPT) diff --git a/arch/ia64/Kconfig.debug b/arch/ia64/Kconfig.debug index de9d507ba0fd..fda67ac993d7 100644 --- a/arch/ia64/Kconfig.debug +++ b/arch/ia64/Kconfig.debug @@ -2,6 +2,17 @@ menu "Kernel hacking" source "lib/Kconfig.debug" +config KPROBES + bool "Kprobes" + depends on DEBUG_KERNEL + help + Kprobes allows you to trap at almost any kernel address and + execute a callback function. register_kprobe() establishes + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + + choice prompt "Physical memory granularity" default IA64_GRANULE_64MB diff --git a/arch/ia64/configs/sn2_defconfig b/arch/ia64/configs/sn2_defconfig index a01bb02d074d..487d2e36b0a6 100644 --- a/arch/ia64/configs/sn2_defconfig +++ b/arch/ia64/configs/sn2_defconfig @@ -78,7 +78,7 @@ CONFIG_IA64_L1_CACHE_SHIFT=7 CONFIG_NUMA=y CONFIG_VIRTUAL_MEM_MAP=y CONFIG_HOLES_IN_ZONE=y -CONFIG_DISCONTIGMEM=y +CONFIG_ARCH_DISCONTIGMEM_ENABLE=y # CONFIG_IA64_CYCLONE is not set CONFIG_IOSAPIC=y CONFIG_IA64_SGI_SN_SIM=y diff --git a/arch/ia64/defconfig b/arch/ia64/defconfig index 7be8096e0561..8444add76380 100644 --- a/arch/ia64/defconfig +++ b/arch/ia64/defconfig @@ -84,7 +84,7 @@ CONFIG_IA64_L1_CACHE_SHIFT=7 CONFIG_NUMA=y CONFIG_VIRTUAL_MEM_MAP=y CONFIG_HOLES_IN_ZONE=y -CONFIG_DISCONTIGMEM=y +CONFIG_ARCH_DISCONTIGMEM_ENABLE=y CONFIG_IA64_CYCLONE=y CONFIG_IOSAPIC=y CONFIG_FORCE_MAX_ZONEORDER=18 diff --git a/arch/ia64/ia32/ia32priv.h b/arch/ia64/ia32/ia32priv.h index b2de948bdaea..e3e9290e3ff2 100644 --- a/arch/ia64/ia32/ia32priv.h +++ b/arch/ia64/ia32/ia32priv.h @@ -241,7 +241,7 @@ typedef struct compat_siginfo { /* POSIX.1b timers */ struct { - timer_t _tid; /* timer id */ + compat_timer_t _tid; /* timer id */ int _overrun; /* overrun count */ char _pad[sizeof(unsigned int) - sizeof(int)]; compat_sigval_t _sigval; /* same as below */ diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 4c73d8ba2e3d..b2e2f6509eb0 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_SMP) += smp.o smpboot.o domain.o obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o obj-$(CONFIG_IA64_CYCLONE) += cyclone.o obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o +obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o mca_recovery-y += mca_drv.o mca_drv_asm.o diff --git a/arch/ia64/kernel/jprobes.S b/arch/ia64/kernel/jprobes.S new file mode 100644 index 000000000000..b7fa3ccd2b0f --- /dev/null +++ b/arch/ia64/kernel/jprobes.S @@ -0,0 +1,61 @@ +/* + * Jprobe specific operations + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) Intel Corporation, 2005 + * + * 2005-May Rusty Lynch <rusty.lynch@intel.com> and Anil S Keshavamurthy + * <anil.s.keshavamurthy@intel.com> initial implementation + * + * Jprobes (a.k.a. "jump probes" which is built on-top of kprobes) allow a + * probe to be inserted into the beginning of a function call. The fundamental + * difference between a jprobe and a kprobe is the jprobe handler is executed + * in the same context as the target function, while the kprobe handlers + * are executed in interrupt context. + * + * For jprobes we initially gain control by placing a break point in the + * first instruction of the targeted function. When we catch that specific + * break, we: + * * set the return address to our jprobe_inst_return() function + * * jump to the jprobe handler function + * + * Since we fixed up the return address, the jprobe handler will return to our + * jprobe_inst_return() function, giving us control again. At this point we + * are back in the parents frame marker, so we do yet another call to our + * jprobe_break() function to fix up the frame marker as it would normally + * exist in the target function. + * + * Our jprobe_return function then transfers control back to kprobes.c by + * executing a break instruction using one of our reserved numbers. When we + * catch that break in kprobes.c, we continue like we do for a normal kprobe + * by single stepping the emulated instruction, and then returning execution + * to the correct location. + */ +#include <asm/asmmacro.h> + + /* + * void jprobe_break(void) + */ +ENTRY(jprobe_break) + break.m 0x80300 +END(jprobe_break) + + /* + * void jprobe_inst_return(void) + */ +GLOBAL_ENTRY(jprobe_inst_return) + br.call.sptk.many b0=jprobe_break +END(jprobe_inst_return) diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c new file mode 100644 index 000000000000..5978823d5c63 --- /dev/null +++ b/arch/ia64/kernel/kprobes.c @@ -0,0 +1,601 @@ +/* + * Kernel Probes (KProbes) + * arch/ia64/kernel/kprobes.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2004 + * Copyright (C) Intel Corporation, 2005 + * + * 2005-Apr Rusty Lynch <rusty.lynch@intel.com> and Anil S Keshavamurthy + * <anil.s.keshavamurthy@intel.com> adapted from i386 + */ + +#include <linux/config.h> +#include <linux/kprobes.h> +#include <linux/ptrace.h> +#include <linux/spinlock.h> +#include <linux/string.h> +#include <linux/slab.h> +#include <linux/preempt.h> +#include <linux/moduleloader.h> + +#include <asm/pgtable.h> +#include <asm/kdebug.h> + +extern void jprobe_inst_return(void); + +/* kprobe_status settings */ +#define KPROBE_HIT_ACTIVE 0x00000001 +#define KPROBE_HIT_SS 0x00000002 + +static struct kprobe *current_kprobe, *kprobe_prev; +static unsigned long kprobe_status, kprobe_status_prev; +static struct pt_regs jprobe_saved_regs; + +enum instruction_type {A, I, M, F, B, L, X, u}; +static enum instruction_type bundle_encoding[32][3] = { + { M, I, I }, /* 00 */ + { M, I, I }, /* 01 */ + { M, I, I }, /* 02 */ + { M, I, I }, /* 03 */ + { M, L, X }, /* 04 */ + { M, L, X }, /* 05 */ + { u, u, u }, /* 06 */ + { u, u, u }, /* 07 */ + { M, M, I }, /* 08 */ + { M, M, I }, /* 09 */ + { M, M, I }, /* 0A */ + { M, M, I }, /* 0B */ + { M, F, I }, /* 0C */ + { M, F, I }, /* 0D */ + { M, M, F }, /* 0E */ + { M, M, F }, /* 0F */ + { M, I, B }, /* 10 */ + { M, I, B }, /* 11 */ + { M, B, B }, /* 12 */ + { M, B, B }, /* 13 */ + { u, u, u }, /* 14 */ + { u, u, u }, /* 15 */ + { B, B, B }, /* 16 */ + { B, B, B }, /* 17 */ + { M, M, B }, /* 18 */ + { M, M, B }, /* 19 */ + { u, u, u }, /* 1A */ + { u, u, u }, /* 1B */ + { M, F, B }, /* 1C */ + { M, F, B }, /* 1D */ + { u, u, u }, /* 1E */ + { u, u, u }, /* 1F */ +}; + +/* + * In this function we check to see if the instruction + * is IP relative instruction and update the kprobe + * inst flag accordingly + */ +static void update_kprobe_inst_flag(uint template, uint slot, uint major_opcode, + unsigned long kprobe_inst, struct kprobe *p) +{ + p->ainsn.inst_flag = 0; + p->ainsn.target_br_reg = 0; + + if (bundle_encoding[template][slot] == B) { + switch (major_opcode) { + case INDIRECT_CALL_OPCODE: + p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG; + p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7); + break; + case IP_RELATIVE_PREDICT_OPCODE: + case IP_RELATIVE_BRANCH_OPCODE: + p->ainsn.inst_flag |= INST_FLAG_FIX_RELATIVE_IP_ADDR; + break; + case IP_RELATIVE_CALL_OPCODE: + p->ainsn.inst_flag |= INST_FLAG_FIX_RELATIVE_IP_ADDR; + p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG; + p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7); + break; + } + } else if (bundle_encoding[template][slot] == X) { + switch (major_opcode) { + case LONG_CALL_OPCODE: + p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG; + p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7); + break; + } + } + return; +} + +/* + * In this function we check to see if the instruction + * on which we are inserting kprobe is supported. + * Returns 0 if supported + * Returns -EINVAL if unsupported + */ +static int unsupported_inst(uint template, uint slot, uint major_opcode, + unsigned long kprobe_inst, struct kprobe *p) +{ + unsigned long addr = (unsigned long)p->addr; + + if (bundle_encoding[template][slot] == I) { + switch (major_opcode) { + case 0x0: //I_UNIT_MISC_OPCODE: + /* + * Check for Integer speculation instruction + * - Bit 33-35 to be equal to 0x1 + */ + if (((kprobe_inst >> 33) & 0x7) == 1) { + printk(KERN_WARNING + "Kprobes on speculation inst at <0x%lx> not supported\n", + addr); + return -EINVAL; + } + + /* + * IP relative mov instruction + * - Bit 27-35 to be equal to 0x30 + */ + if (((kprobe_inst >> 27) & 0x1FF) == 0x30) { + printk(KERN_WARNING + "Kprobes on \"mov r1=ip\" at <0x%lx> not supported\n", + addr); + return -EINVAL; + + } + } + } + return 0; +} + + +/* + * In this function we check to see if the instruction + * (qp) cmpx.crel.ctype p1,p2=r2,r3 + * on which we are inserting kprobe is cmp instruction + * with ctype as unc. + */ +static uint is_cmp_ctype_unc_inst(uint template, uint slot, uint major_opcode, +unsigned long kprobe_inst) +{ + cmp_inst_t cmp_inst; + uint ctype_unc = 0; + + if (!((bundle_encoding[template][slot] == I) || + (bundle_encoding[template][slot] == M))) + goto out; + + if (!((major_opcode == 0xC) || (major_opcode == 0xD) || + (major_opcode == 0xE))) + goto out; + + cmp_inst.l = kprobe_inst; + if ((cmp_inst.f.x2 == 0) || (cmp_inst.f.x2 == 1)) { + /* Integere compare - Register Register (A6 type)*/ + if ((cmp_inst.f.tb == 0) && (cmp_inst.f.ta == 0) + &&(cmp_inst.f.c == 1)) + ctype_unc = 1; + } else if ((cmp_inst.f.x2 == 2)||(cmp_inst.f.x2 == 3)) { + /* Integere compare - Immediate Register (A8 type)*/ + if ((cmp_inst.f.ta == 0) &&(cmp_inst.f.c == 1)) + ctype_unc = 1; + } +out: + return ctype_unc; +} + +/* + * In this function we override the bundle with + * the break instruction at the given slot. + */ +static void prepare_break_inst(uint template, uint slot, uint major_opcode, + unsigned long kprobe_inst, struct kprobe *p) +{ + unsigned long break_inst = BREAK_INST; + bundle_t *bundle = &p->ainsn.insn.bundle; + + /* + * Copy the original kprobe_inst qualifying predicate(qp) + * to the break instruction iff !is_cmp_ctype_unc_inst + * because for cmp instruction with ctype equal to unc, + * which is a special instruction always needs to be + * executed regradless of qp + */ + if (!is_cmp_ctype_unc_inst(template, slot, major_opcode, kprobe_inst)) + break_inst |= (0x3f & kprobe_inst); + + switch (slot) { + case 0: + bundle->quad0.slot0 = break_inst; + break; + case 1: + bundle->quad0.slot1_p0 = break_inst; + bundle->quad1.slot1_p1 = break_inst >> (64-46); + break; + case 2: + bundle->quad1.slot2 = break_inst; + break; + } + + /* + * Update the instruction flag, so that we can + * emulate the instruction properly after we + * single step on original instruction + */ + update_kprobe_inst_flag(template, slot, major_opcode, kprobe_inst, p); +} + +static inline void get_kprobe_inst(bundle_t *bundle, uint slot, + unsigned long *kprobe_inst, uint *major_opcode) +{ + unsigned long kprobe_inst_p0, kprobe_inst_p1; + unsigned int template; + + template = bundle->quad0.template; + + switch (slot) { + case 0: + *major_opcode = (bundle->quad0.slot0 >> SLOT0_OPCODE_SHIFT); + *kprobe_inst = bundle->quad0.slot0; + break; + case 1: + *major_opcode = (bundle->quad1.slot1_p1 >> SLOT1_p1_OPCODE_SHIFT); + kprobe_inst_p0 = bundle->quad0.slot1_p0; + kprobe_inst_p1 = bundle->quad1.slot1_p1; + *kprobe_inst = kprobe_inst_p0 | (kprobe_inst_p1 << (64-46)); + break; + case 2: + *major_opcode = (bundle->quad1.slot2 >> SLOT2_OPCODE_SHIFT); + *kprobe_inst = bundle->quad1.slot2; + break; + } +} + +static int valid_kprobe_addr(int template, int slot, unsigned long addr) +{ + if ((slot > 2) || ((bundle_encoding[template][1] == L) && slot > 1)) { + printk(KERN_WARNING "Attempting to insert unaligned kprobe at 0x%lx\n", + addr); + return -EINVAL; + } + return 0; +} + +static inline void save_previous_kprobe(void) +{ + kprobe_prev = current_kprobe; + kprobe_status_prev = kprobe_status; +} + +static inline void restore_previous_kprobe(void) +{ + current_kprobe = kprobe_prev; + kprobe_status = kprobe_status_prev; +} + +static inline void set_current_kprobe(struct kprobe *p) +{ + current_kprobe = p; +} + +int arch_prepare_kprobe(struct kprobe *p) +{ + unsigned long addr = (unsigned long) p->addr; + unsigned long *kprobe_addr = (unsigned long *)(addr & ~0xFULL); + unsigned long kprobe_inst=0; + unsigned int slot = addr & 0xf, template, major_opcode = 0; + bundle_t *bundle = &p->ainsn.insn.bundle; + + memcpy(&p->opcode.bundle, kprobe_addr, sizeof(bundle_t)); + memcpy(&p->ainsn.insn.bundle, kprobe_addr, sizeof(bundle_t)); + + template = bundle->quad0.template; + + if(valid_kprobe_addr(template, slot, addr)) + return -EINVAL; + + /* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */ + if (slot == 1 && bundle_encoding[template][1] == L) + slot++; + + /* Get kprobe_inst and major_opcode from the bundle */ + get_kprobe_inst(bundle, slot, &kprobe_inst, &major_opcode); + + if (unsupported_inst(template, slot, major_opcode, kprobe_inst, p)) + return -EINVAL; + + prepare_break_inst(template, slot, major_opcode, kprobe_inst, p); + + return 0; +} + +void arch_arm_kprobe(struct kprobe *p) +{ + unsigned long addr = (unsigned long)p->addr; + unsigned long arm_addr = addr & ~0xFULL; + + memcpy((char *)arm_addr, &p->ainsn.insn.bundle, sizeof(bundle_t)); + flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t)); +} + +void arch_disarm_kprobe(struct kprobe *p) +{ + unsigned long addr = (unsigned long)p->addr; + unsigned long arm_addr = addr & ~0xFULL; + + /* p->opcode contains the original unaltered bundle */ + memcpy((char *) arm_addr, (char *) &p->opcode.bundle, sizeof(bundle_t)); + flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t)); +} + +void arch_remove_kprobe(struct kprobe *p) +{ +} + +/* + * We are resuming execution after a single step fault, so the pt_regs + * structure reflects the register state after we executed the instruction + * located in the kprobe (p->ainsn.insn.bundle). We still need to adjust + * the ip to point back to the original stack address. To set the IP address + * to original stack address, handle the case where we need to fixup the + * relative IP address and/or fixup branch register. + */ +static void resume_execution(struct kprobe *p, struct pt_regs *regs) +{ + unsigned long bundle_addr = ((unsigned long) (&p->opcode.bundle)) & ~0xFULL; + unsigned long resume_addr = (unsigned long)p->addr & ~0xFULL; + unsigned long template; + int slot = ((unsigned long)p->addr & 0xf); + + template = p->opcode.bundle.quad0.template; + + if (slot == 1 && bundle_encoding[template][1] == L) + slot = 2; + + if (p->ainsn.inst_flag) { + + if (p->ainsn.inst_flag & INST_FLAG_FIX_RELATIVE_IP_ADDR) { + /* Fix relative IP address */ + regs->cr_iip = (regs->cr_iip - bundle_addr) + resume_addr; + } + + if (p->ainsn.inst_flag & INST_FLAG_FIX_BRANCH_REG) { + /* + * Fix target branch register, software convention is + * to use either b0 or b6 or b7, so just checking + * only those registers + */ + switch (p->ainsn.target_br_reg) { + case 0: + if ((regs->b0 == bundle_addr) || + (regs->b0 == bundle_addr + 0x10)) { + regs->b0 = (regs->b0 - bundle_addr) + + resume_addr; + } + break; + case 6: + if ((regs->b6 == bundle_addr) || + (regs->b6 == bundle_addr + 0x10)) { + regs->b6 = (regs->b6 - bundle_addr) + + resume_addr; + } + break; + case 7: + if ((regs->b7 == bundle_addr) || + (regs->b7 == bundle_addr + 0x10)) { + regs->b7 = (regs->b7 - bundle_addr) + + resume_addr; + } + break; + } /* end switch */ + } + goto turn_ss_off; + } + + if (slot == 2) { + if (regs->cr_iip == bundle_addr + 0x10) { + regs->cr_iip = resume_addr + 0x10; + } + } else { + if (regs->cr_iip == bundle_addr) { + regs->cr_iip = resume_addr; + } + } + +turn_ss_off: + /* Turn off Single Step bit */ + ia64_psr(regs)->ss = 0; +} + +static void prepare_ss(struct kprobe *p, struct pt_regs *regs) +{ + unsigned long bundle_addr = (unsigned long) &p->opcode.bundle; + unsigned long slot = (unsigned long)p->addr & 0xf; + + /* Update instruction pointer (IIP) and slot number (IPSR.ri) */ + regs->cr_iip = bundle_addr & ~0xFULL; + + if (slot > 2) + slot = 0; + + ia64_psr(regs)->ri = slot; + + /* turn on single stepping */ + ia64_psr(regs)->ss = 1; +} + +static int pre_kprobes_handler(struct die_args *args) +{ + struct kprobe *p; + int ret = 0; + struct pt_regs *regs = args->regs; + kprobe_opcode_t *addr = (kprobe_opcode_t *)instruction_pointer(regs); + + preempt_disable(); + + /* Handle recursion cases */ + if (kprobe_running()) { + p = get_kprobe(addr); + if (p) { + if (kprobe_status == KPROBE_HIT_SS) { + unlock_kprobes(); + goto no_kprobe; + } + /* We have reentered the pre_kprobe_handler(), since + * another probe was hit while within the handler. + * We here save the original kprobes variables and + * just single step on the instruction of the new probe + * without calling any user handlers. + */ + save_previous_kprobe(); + set_current_kprobe(p); + p->nmissed++; + prepare_ss(p, regs); + kprobe_status = KPROBE_REENTER; + return 1; + } else if (args->err == __IA64_BREAK_JPROBE) { + /* + * jprobe instrumented function just completed + */ + p = current_kprobe; + if (p->break_handler && p->break_handler(p, regs)) { + goto ss_probe; + } + } else { + /* Not our break */ + goto no_kprobe; + } + } + + lock_kprobes(); + p = get_kprobe(addr); + if (!p) { + unlock_kprobes(); + goto no_kprobe; + } + + kprobe_status = KPROBE_HIT_ACTIVE; + set_current_kprobe(p); + + if (p->pre_handler && p->pre_handler(p, regs)) + /* + * Our pre-handler is specifically requesting that we just + * do a return. This is handling the case where the + * pre-handler is really our special jprobe pre-handler. + */ + return 1; + +ss_probe: + prepare_ss(p, regs); + kprobe_status = KPROBE_HIT_SS; + return 1; + +no_kprobe: + preempt_enable_no_resched(); + return ret; +} + +static int post_kprobes_handler(struct pt_regs *regs) +{ + if (!kprobe_running()) + return 0; + + if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) { + kprobe_status = KPROBE_HIT_SSDONE; + current_kprobe->post_handler(current_kprobe, regs, 0); + } + + resume_execution(current_kprobe, regs); + + /*Restore back the original saved kprobes variables and continue. */ + if (kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(); + goto out; + } + + unlock_kprobes(); + +out: + preempt_enable_no_resched(); + return 1; +} + +static int kprobes_fault_handler(struct pt_regs *regs, int trapnr) +{ + if (!kprobe_running()) + return 0; + + if (current_kprobe->fault_handler && + current_kprobe->fault_handler(current_kprobe, regs, trapnr)) + return 1; + + if (kprobe_status & KPROBE_HIT_SS) { + resume_execution(current_kprobe, regs); + unlock_kprobes(); + preempt_enable_no_resched(); + } + + return 0; +} + +int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, + void *data) +{ + struct die_args *args = (struct die_args *)data; + switch(val) { + case DIE_BREAK: + if (pre_kprobes_handler(args)) + return NOTIFY_STOP; + break; + case DIE_SS: + if (post_kprobes_handler(args->regs)) + return NOTIFY_STOP; + break; + case DIE_PAGE_FAULT: + if (kprobes_fault_handler(args->regs, args->trapnr)) + return NOTIFY_STOP; + default: + break; + } + return NOTIFY_DONE; +} + +int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct jprobe *jp = container_of(p, struct jprobe, kp); + unsigned long addr = ((struct fnptr *)(jp->entry))->ip; + + /* save architectural state */ + jprobe_saved_regs = *regs; + + /* after rfi, execute the jprobe instrumented function */ + regs->cr_iip = addr & ~0xFULL; + ia64_psr(regs)->ri = addr & 0xf; + regs->r1 = ((struct fnptr *)(jp->entry))->gp; + + /* + * fix the return address to our jprobe_inst_return() function + * in the jprobes.S file + */ + regs->b0 = ((struct fnptr *)(jprobe_inst_return))->ip; + + return 1; +} + +int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +{ + *regs = jprobe_saved_regs; + return 1; +} diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index 1861173bd4f6..e7e520d90f03 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -21,12 +21,26 @@ #include <asm/intrinsics.h> #include <asm/processor.h> #include <asm/uaccess.h> +#include <asm/kdebug.h> extern spinlock_t timerlist_lock; fpswa_interface_t *fpswa_interface; EXPORT_SYMBOL(fpswa_interface); +struct notifier_block *ia64die_chain; +static DEFINE_SPINLOCK(die_notifier_lock); + +int register_die_notifier(struct notifier_block *nb) +{ + int err = 0; + unsigned long flags; + spin_lock_irqsave(&die_notifier_lock, flags); + err = notifier_chain_register(&ia64die_chain, nb); + spin_unlock_irqrestore(&die_notifier_lock, flags); + return err; +} + void __init trap_init (void) { @@ -137,6 +151,10 @@ ia64_bad_break (unsigned long break_num, struct pt_regs *regs) switch (break_num) { case 0: /* unknown error (used by GCC for __builtin_abort()) */ + if (notify_die(DIE_BREAK, "break 0", regs, break_num, TRAP_BRKPT, SIGTRAP) + == NOTIFY_STOP) { + return; + } die_if_kernel("bugcheck!", regs, break_num); sig = SIGILL; code = ILL_ILLOPC; break; @@ -189,6 +207,15 @@ ia64_bad_break (unsigned long break_num, struct pt_regs *regs) sig = SIGILL; code = __ILL_BNDMOD; break; + case 0x80200: + case 0x80300: + if (notify_die(DIE_BREAK, "kprobe", regs, break_num, TRAP_BRKPT, SIGTRAP) + == NOTIFY_STOP) { + return; + } + sig = SIGTRAP; code = TRAP_BRKPT; + break; + default: if (break_num < 0x40000 || break_num > 0x100000) die_if_kernel("Bad break", regs, break_num); @@ -548,7 +575,11 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, #endif break; case 35: siginfo.si_code = TRAP_BRANCH; ifa = 0; break; - case 36: siginfo.si_code = TRAP_TRACE; ifa = 0; break; + case 36: + if (notify_die(DIE_SS, "ss", ®s, vector, + vector, SIGTRAP) == NOTIFY_STOP) + return; + siginfo.si_code = TRAP_TRACE; ifa = 0; break; } siginfo.si_signo = SIGTRAP; siginfo.si_errno = 0; diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index c00710929390..f3fd528ead3b 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -560,14 +560,15 @@ void show_mem(void) int shared = 0, cached = 0, reserved = 0; printk("Node ID: %d\n", pgdat->node_id); for(i = 0; i < pgdat->node_spanned_pages; i++) { + struct page *page = pgdat_page_nr(pgdat, i); if (!ia64_pfn_valid(pgdat->node_start_pfn+i)) continue; - if (PageReserved(pgdat->node_mem_map+i)) + if (PageReserved(page)) reserved++; - else if (PageSwapCache(pgdat->node_mem_map+i)) + else if (PageSwapCache(page)) cached++; - else if (page_count(pgdat->node_mem_map+i)) - shared += page_count(pgdat->node_mem_map+i)-1; + else if (page_count(page)) + shared += page_count(page)-1; } total_present += present; total_reserved += reserved; diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index 4174ec999dde..ff62551eb3a1 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -14,6 +14,7 @@ #include <asm/processor.h> #include <asm/system.h> #include <asm/uaccess.h> +#include <asm/kdebug.h> extern void die (char *, struct pt_regs *, long); @@ -102,6 +103,13 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re goto bad_area_no_up; #endif + /* + * This is to handle the kprobes on user space access instructions + */ + if (notify_die(DIE_PAGE_FAULT, "page fault", regs, code, TRAP_BRKPT, + SIGSEGV) == NOTIFY_STOP) + return; + down_read(&mm->mmap_sem); vma = find_vma_prev(mm, address, &prev_vma); diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index 64c133344afe..42ca8a39798d 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig @@ -172,11 +172,13 @@ config NOHIGHMEM bool default y -config DISCONTIGMEM +config ARCH_DISCONTIGMEM_ENABLE bool "Internal RAM Support" depends on CHIP_M32700 || CHIP_M32102 || CHIP_VDEC2 || CHIP_OPSP default y +source "mm/Kconfig" + config IRAM_START hex "Internal memory start address (hex)" default "00f00000" diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c index bc423d838fb8..d9a40b1fe8ba 100644 --- a/arch/m32r/mm/init.c +++ b/arch/m32r/mm/init.c @@ -49,7 +49,7 @@ void show_mem(void) printk("Free swap: %6ldkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { for (i = 0; i < pgdat->node_spanned_pages; ++i) { - page = pgdat->node_mem_map + i; + page = pgdat_page_nr(pgdat, i); total++; if (PageHighMem(page)) highmem++; @@ -152,7 +152,7 @@ int __init reservedpages_count(void) reservedpages = 0; for_each_online_node(nid) for (i = 0 ; i < MAX_LOW_PFN(nid) - START_PFN(nid) ; i++) - if (PageReserved(NODE_DATA(nid)->node_mem_map + i)) + if (PageReserved(nid_page_nr(nid, i))) reservedpages++; return reservedpages; diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index d0713c7d9f0a..691a2469ff36 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -357,6 +357,8 @@ config 060_WRITETHROUGH is hardwired on. The 53c710 SCSI driver is known to suffer from this problem. +source "mm/Kconfig" + endmenu menu "General setup" diff --git a/arch/m68knommu/Kconfig b/arch/m68knommu/Kconfig index e729bd280623..dbfcdc8e6087 100644 --- a/arch/m68knommu/Kconfig +++ b/arch/m68knommu/Kconfig @@ -532,6 +532,8 @@ config ROMKERNEL endchoice +source "mm/Kconfig" + endmenu config ISA_DMA_API diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index ab9944693f1f..94f5a8eb2c22 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -492,7 +492,7 @@ config SGI_SN0_N_MODE which allows for more memory. Your system is most probably running in M-Mode, so you should say N here. -config DISCONTIGMEM +config ARCH_DISCONTIGMEM_ENABLE bool default y if SGI_IP27 help diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig index 13472292d0ec..b5bab3a42fc4 100644 --- a/arch/mips/configs/ip27_defconfig +++ b/arch/mips/configs/ip27_defconfig @@ -82,7 +82,7 @@ CONFIG_STOP_MACHINE=y # CONFIG_SGI_IP22 is not set CONFIG_SGI_IP27=y # CONFIG_SGI_SN0_N_MODE is not set -CONFIG_DISCONTIGMEM=y +CONFIG_ARCH_DISCONTIGMEM_ENABLE=y CONFIG_NUMA=y # CONFIG_MAPPED_KERNEL is not set # CONFIG_REPLICATE_KTEXT is not set diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index 0a44a98d7adc..a160d04f7dbe 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -549,9 +549,8 @@ void __init mem_init(void) */ numslots = node_getlastslot(node); for (slot = 1; slot <= numslots; slot++) { - p = NODE_DATA(node)->node_mem_map + - (slot_getbasepfn(node, slot) - - slot_getbasepfn(node, 0)); + p = nid_page_nr(node, slot_getbasepfn(node, slot) - + slot_getbasepfn(node, 0)); /* * Free valid memory in current slot. diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index e7e7c56fc212..ce327c799b44 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -148,7 +148,7 @@ config HOTPLUG_CPU default y if SMP select HOTPLUG -config DISCONTIGMEM +config ARCH_DISCONTIGMEM_ENABLE bool "Discontiguous memory support (EXPERIMENTAL)" depends on EXPERIMENTAL help @@ -157,6 +157,8 @@ config DISCONTIGMEM or have huge holes in the physical address space for other reasons. See <file:Documentation/vm/numa> for more. +source "mm/Kconfig" + config PREEMPT bool # bool "Preemptible Kernel" diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index cac37589e35c..2886ad70db48 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -506,7 +506,7 @@ void show_mem(void) for (j = node_start_pfn(i); j < node_end_pfn(i); j++) { struct page *p; - p = node_mem_map(i) + j - node_start_pfn(i); + p = nid_page_nr(i, j) - node_start_pfn(i); total++; if (PageReserved(p)) diff --git a/arch/ppc/Kconfig b/arch/ppc/Kconfig index 10162b187bcf..848f43970a4b 100644 --- a/arch/ppc/Kconfig +++ b/arch/ppc/Kconfig @@ -905,6 +905,8 @@ config PREEMPT config HIGHMEM bool "High memory support" +source "mm/Kconfig" + source "fs/Kconfig.binfmt" config PROC_DEVICETREE diff --git a/arch/ppc/boot/simple/misc.c b/arch/ppc/boot/simple/misc.c index ab0f9902cb67..e02de5b467a4 100644 --- a/arch/ppc/boot/simple/misc.c +++ b/arch/ppc/boot/simple/misc.c @@ -222,7 +222,7 @@ decompress_kernel(unsigned long load_addr, int num_words, unsigned long cksum) puts("\n"); puts("Uncompressing Linux..."); - gunzip(0x0, 0x400000, zimage_start, &zimage_size); + gunzip(NULL, 0x400000, zimage_start, &zimage_size); puts("done.\n"); /* get the bi_rec address */ diff --git a/arch/ppc/boot/simple/mpc10x_memory.c b/arch/ppc/boot/simple/mpc10x_memory.c index 977daedc14c0..20d92a34ceb8 100644 --- a/arch/ppc/boot/simple/mpc10x_memory.c +++ b/arch/ppc/boot/simple/mpc10x_memory.c @@ -33,7 +33,7 @@ #define MPC10X_PCI_OP(rw, size, type, op, mask) \ static void \ -mpc10x_##rw##_config_##size(unsigned int *cfg_addr, \ +mpc10x_##rw##_config_##size(unsigned int __iomem *cfg_addr, \ unsigned int *cfg_data, int devfn, int offset, \ type val) \ { \ diff --git a/arch/ppc64/Kconfig b/arch/ppc64/Kconfig index bae56ec76ea7..cb27068bfcd4 100644 --- a/arch/ppc64/Kconfig +++ b/arch/ppc64/Kconfig @@ -217,13 +217,49 @@ config HMT This option enables hardware multithreading on RS64 cpus. pSeries systems p620 and p660 have such a cpu type. -config DISCONTIGMEM - bool "Discontiguous Memory Support" +config ARCH_SELECT_MEMORY_MODEL + def_bool y + +config ARCH_FLATMEM_ENABLE + def_bool y + depends on !NUMA + +config ARCH_DISCONTIGMEM_ENABLE + def_bool y depends on SMP && PPC_PSERIES +config ARCH_DISCONTIGMEM_DEFAULT + def_bool y + depends on ARCH_DISCONTIGMEM_ENABLE + +config ARCH_FLATMEM_ENABLE + def_bool y + +config ARCH_SPARSEMEM_ENABLE + def_bool y + depends on ARCH_DISCONTIGMEM_ENABLE + +source "mm/Kconfig" + +config HAVE_ARCH_EARLY_PFN_TO_NID + def_bool y + depends on NEED_MULTIPLE_NODES + +# Some NUMA nodes have memory ranges that span +# other nodes. Even though a pfn is valid and +# between a node's start and end pfns, it may not +# reside on that node. +# +# This is a relatively temporary hack that should +# be able to go away when sparsemem is fully in +# place +config NODES_SPAN_OTHER_NODES + def_bool y + depends on NEED_MULTIPLE_NODES + config NUMA bool "NUMA support" - depends on DISCONTIGMEM + default y if DISCONTIGMEM || SPARSEMEM config SCHED_SMT bool "SMT (Hyperthreading) scheduler support" diff --git a/arch/ppc64/boot/install.sh b/arch/ppc64/boot/install.sh index 955c5681db6c..cb2d6626b555 100644 --- a/arch/ppc64/boot/install.sh +++ b/arch/ppc64/boot/install.sh @@ -22,8 +22,8 @@ # User may have a custom install script -if [ -x ~/bin/installkernel ]; then exec ~/bin/installkernel "$@"; fi -if [ -x /sbin/installkernel ]; then exec /sbin/installkernel "$@"; fi +if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi +if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi # Default install diff --git a/arch/ppc64/configs/pSeries_defconfig b/arch/ppc64/configs/pSeries_defconfig index 3eb5ef25d3a3..d0db8b5966c0 100644 --- a/arch/ppc64/configs/pSeries_defconfig +++ b/arch/ppc64/configs/pSeries_defconfig @@ -88,7 +88,7 @@ CONFIG_IBMVIO=y CONFIG_IOMMU_VMERGE=y CONFIG_SMP=y CONFIG_NR_CPUS=128 -CONFIG_DISCONTIGMEM=y +CONFIG_ARCH_DISCONTIGMEM_ENABLE=y CONFIG_NUMA=y CONFIG_SCHED_SMT=y # CONFIG_PREEMPT is not set diff --git a/arch/ppc64/defconfig b/arch/ppc64/defconfig index 2f31bf3046f9..b8e2066dde77 100644 --- a/arch/ppc64/defconfig +++ b/arch/ppc64/defconfig @@ -89,7 +89,7 @@ CONFIG_BOOTX_TEXT=y CONFIG_IOMMU_VMERGE=y CONFIG_SMP=y CONFIG_NR_CPUS=32 -CONFIG_DISCONTIGMEM=y +CONFIG_ARCH_DISCONTIGMEM_ENABLE=y # CONFIG_NUMA is not set # CONFIG_SCHED_SMT is not set # CONFIG_PREEMPT is not set diff --git a/arch/ppc64/kernel/kprobes.c b/arch/ppc64/kernel/kprobes.c index e950a2058a19..782ce3efa2c1 100644 --- a/arch/ppc64/kernel/kprobes.c +++ b/arch/ppc64/kernel/kprobes.c @@ -32,15 +32,14 @@ #include <linux/ptrace.h> #include <linux/spinlock.h> #include <linux/preempt.h> +#include <asm/cacheflush.h> #include <asm/kdebug.h> #include <asm/sstep.h> -/* kprobe_status settings */ -#define KPROBE_HIT_ACTIVE 0x00000001 -#define KPROBE_HIT_SS 0x00000002 - static struct kprobe *current_kprobe; static unsigned long kprobe_status, kprobe_saved_msr; +static struct kprobe *kprobe_prev; +static unsigned long kprobe_status_prev, kprobe_saved_msr_prev; static struct pt_regs jprobe_saved_regs; int arch_prepare_kprobe(struct kprobe *p) @@ -61,16 +60,25 @@ int arch_prepare_kprobe(struct kprobe *p) void arch_copy_kprobe(struct kprobe *p) { memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + p->opcode = *p->addr; } -void arch_remove_kprobe(struct kprobe *p) +void arch_arm_kprobe(struct kprobe *p) { + *p->addr = BREAKPOINT_INSTRUCTION; + flush_icache_range((unsigned long) p->addr, + (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } -static inline void disarm_kprobe(struct kprobe *p, struct pt_regs *regs) +void arch_disarm_kprobe(struct kprobe *p) { *p->addr = p->opcode; - regs->nip = (unsigned long)p->addr; + flush_icache_range((unsigned long) p->addr, + (unsigned long) p->addr + sizeof(kprobe_opcode_t)); +} + +void arch_remove_kprobe(struct kprobe *p) +{ } static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) @@ -83,6 +91,20 @@ static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) regs->nip = (unsigned long)&p->ainsn.insn; } +static inline void save_previous_kprobe(void) +{ + kprobe_prev = current_kprobe; + kprobe_status_prev = kprobe_status; + kprobe_saved_msr_prev = kprobe_saved_msr; +} + +static inline void restore_previous_kprobe(void) +{ + current_kprobe = kprobe_prev; + kprobe_status = kprobe_status_prev; + kprobe_saved_msr = kprobe_saved_msr_prev; +} + static inline int kprobe_handler(struct pt_regs *regs) { struct kprobe *p; @@ -101,8 +123,19 @@ static inline int kprobe_handler(struct pt_regs *regs) unlock_kprobes(); goto no_kprobe; } - disarm_kprobe(p, regs); - ret = 1; + /* We have reentered the kprobe_handler(), since + * another probe was hit while within the handler. + * We here save the original kprobes variables and + * just single step on the instruction of the new probe + * without calling any user handlers. + */ + save_previous_kprobe(); + current_kprobe = p; + kprobe_saved_msr = regs->msr; + p->nmissed++; + prepare_singlestep(p, regs); + kprobe_status = KPROBE_REENTER; + return 1; } else { p = current_kprobe; if (p->break_handler && p->break_handler(p, regs)) { @@ -184,13 +217,21 @@ static inline int post_kprobe_handler(struct pt_regs *regs) if (!kprobe_running()) return 0; - if (current_kprobe->post_handler) + if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) { + kprobe_status = KPROBE_HIT_SSDONE; current_kprobe->post_handler(current_kprobe, regs, 0); + } resume_execution(current_kprobe, regs); regs->msr |= kprobe_saved_msr; + /*Restore back the original saved kprobes variables and continue. */ + if (kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(); + goto out; + } unlock_kprobes(); +out: preempt_enable_no_resched(); /* diff --git a/arch/ppc64/kernel/setup.c b/arch/ppc64/kernel/setup.c index 7d060ddb5e93..0a47a5ef428d 100644 --- a/arch/ppc64/kernel/setup.c +++ b/arch/ppc64/kernel/setup.c @@ -1074,6 +1074,7 @@ void __init setup_arch(char **cmdline_p) /* set up the bootmem stuff with available memory */ do_init_bootmem(); + sparse_init(); /* initialize the syscall map in systemcfg */ setup_syscall_map(); diff --git a/arch/ppc64/mm/Makefile b/arch/ppc64/mm/Makefile index ac522d57b2a7..3695d00d347f 100644 --- a/arch/ppc64/mm/Makefile +++ b/arch/ppc64/mm/Makefile @@ -6,6 +6,6 @@ EXTRA_CFLAGS += -mno-minimal-toc obj-y := fault.o init.o imalloc.o hash_utils.o hash_low.o tlb.o \ slb_low.o slb.o stab.o mmap.o -obj-$(CONFIG_DISCONTIGMEM) += numa.o +obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_PPC_MULTIPLATFORM) += hash_native.o diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c index 6fa1e6490b57..b50b3a446dbe 100644 --- a/arch/ppc64/mm/init.c +++ b/arch/ppc64/mm/init.c @@ -98,7 +98,7 @@ void show_mem(void) printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { for (i = 0; i < pgdat->node_spanned_pages; i++) { - page = pgdat->node_mem_map + i; + page = pgdat_page_nr(pgdat, i); total++; if (PageReserved(page)) reserved++; @@ -531,7 +531,7 @@ EXPORT_SYMBOL(page_is_ram); * Initialize the bootmem system and give it all the memory we * have available. */ -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_NEED_MULTIPLE_NODES void __init do_init_bootmem(void) { unsigned long i; @@ -553,12 +553,20 @@ void __init do_init_bootmem(void) max_pfn = max_low_pfn; - /* add all physical memory to the bootmem map. Also find the first */ + /* Add all physical memory to the bootmem map, mark each area + * present. + */ for (i=0; i < lmb.memory.cnt; i++) { unsigned long physbase, size; + unsigned long start_pfn, end_pfn; physbase = lmb.memory.region[i].physbase; size = lmb.memory.region[i].size; + + start_pfn = physbase >> PAGE_SHIFT; + end_pfn = start_pfn + (size >> PAGE_SHIFT); + memory_present(0, start_pfn, end_pfn); + free_bootmem(physbase, size); } @@ -597,7 +605,7 @@ void __init paging_init(void) free_area_init_node(0, NODE_DATA(0), zones_size, __pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size); } -#endif /* CONFIG_DISCONTIGMEM */ +#endif /* ! CONFIG_NEED_MULTIPLE_NODES */ static struct kcore_list kcore_vmem; @@ -628,7 +636,7 @@ module_init(setup_kcore); void __init mem_init(void) { -#ifdef CONFIG_DISCONTIGMEM +#ifdef CONFIG_NEED_MULTIPLE_NODES int nid; #endif pg_data_t *pgdat; @@ -639,7 +647,7 @@ void __init mem_init(void) num_physpages = max_low_pfn; /* RAM is assumed contiguous */ high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); -#ifdef CONFIG_DISCONTIGMEM +#ifdef CONFIG_NEED_MULTIPLE_NODES for_each_online_node(nid) { if (NODE_DATA(nid)->node_spanned_pages != 0) { printk("freeing bootmem node %x\n", nid); @@ -654,7 +662,7 @@ void __init mem_init(void) for_each_pgdat(pgdat) { for (i = 0; i < pgdat->node_spanned_pages; i++) { - page = pgdat->node_mem_map + i; + page = pgdat_page_nr(pgdat, i); if (PageReserved(page)) reservedpages++; } diff --git a/arch/ppc64/mm/numa.c b/arch/ppc64/mm/numa.c index ea862ec643d3..cafd91aef289 100644 --- a/arch/ppc64/mm/numa.c +++ b/arch/ppc64/mm/numa.c @@ -440,6 +440,8 @@ new_range: for (i = start ; i < (start+size); i += MEMORY_INCREMENT) numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = numa_domain; + memory_present(numa_domain, start >> PAGE_SHIFT, + (start + size) >> PAGE_SHIFT); if (--ranges) goto new_range; @@ -481,6 +483,7 @@ static void __init setup_nonnuma(void) for (i = 0 ; i < top_of_ram; i += MEMORY_INCREMENT) numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0; + memory_present(0, 0, init_node_data[0].node_end_pfn); } static void __init dump_numa_topology(void) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index ab79af84699a..32696c1d9280 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -226,6 +226,8 @@ config WARN_STACK_SIZE This allows you to specify the maximum frame size a function may have without the compiler complaining about it. +source "mm/Kconfig" + comment "I/O subsystem configuration" config MACHCHK_WARNING diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh index 278a8139cb18..d4026f62cb06 100644 --- a/arch/s390/boot/install.sh +++ b/arch/s390/boot/install.sh @@ -21,8 +21,8 @@ # User may have a custom install script -if [ -x ~/bin/installkernel ]; then exec ~/bin/installkernel "$@"; fi -if [ -x /sbin/installkernel ]; then exec /sbin/installkernel "$@"; fi +if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi +if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi # Default install - same as make zlilo diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h index bf33dcfec7db..3898f66d0b2f 100644 --- a/arch/s390/kernel/compat_linux.h +++ b/arch/s390/kernel/compat_linux.h @@ -45,7 +45,7 @@ typedef struct compat_siginfo { /* POSIX.1b timers */ struct { - timer_t _tid; /* timer id */ + compat_timer_t _tid; /* timer id */ int _overrun; /* overrun count */ compat_sigval_t _sigval; /* same as below */ int _sys_private; /* not to be passed to user */ diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 3468d5127223..a7c8bfc11604 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -486,7 +486,7 @@ config CPU_SUBTYPE_ST40 depends on CPU_SUBTYPE_ST40STB1 || CPU_SUBTYPE_ST40GX1 default y -config DISCONTIGMEM +config ARCH_DISCONTIGMEM_ENABLE bool depends on SH_HP690 default y @@ -496,6 +496,8 @@ config DISCONTIGMEM or have huge holes in the physical address space for other reasons. See <file:Documentation/vm/numa> for more. +source "mm/Kconfig" + config ZERO_PAGE_OFFSET hex "Zero page offset" default "0x00001000" if !(SH_MPC1211 || SH_SH03) diff --git a/arch/sh64/Kconfig b/arch/sh64/Kconfig index 76eb81fba45e..708e59736a4d 100644 --- a/arch/sh64/Kconfig +++ b/arch/sh64/Kconfig @@ -217,6 +217,8 @@ config PREEMPT bool "Preemptible Kernel (EXPERIMENTAL)" depends on EXPERIMENTAL +source "mm/Kconfig" + endmenu menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)" diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 237f922520fd..262e13d086fe 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -264,7 +264,11 @@ config SUNOS_EMUL want to run SunOS binaries on an Ultra you must also say Y to "Kernel support for 32-bit a.out binaries" above. -source "drivers/parport/Kconfig" +source "mm/Kconfig" + +endmenu + +source "drivers/Kconfig" config PRINTER tristate "Parallel printer support" @@ -291,6 +295,8 @@ config PRINTER If you have more than 8 printers, you need to increase the LP_NO macro in lp.c and the PARPORT_MAX macro in parport.h. +source "mm/Kconfig" + endmenu source "drivers/base/Kconfig" @@ -372,18 +378,8 @@ config UNIX98_PTY_COUNT endmenu -source "drivers/input/Kconfig" - source "fs/Kconfig" -source "sound/Kconfig" - -source "drivers/usb/Kconfig" - -source "drivers/infiniband/Kconfig" - -source "drivers/char/watchdog/Kconfig" - source "arch/sparc/Kconfig.debug" source "security/Kconfig" diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index a72fd15d5ea8..e2b050eb3b96 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -484,6 +484,8 @@ config CMDLINE NOTE: This option WILL override the PROM bootargs setting! +source "mm/Kconfig" + endmenu source "drivers/base/Kconfig" diff --git a/arch/sparc64/kernel/kprobes.c b/arch/sparc64/kernel/kprobes.c index 7066d7ba667a..bdac631cf011 100644 --- a/arch/sparc64/kernel/kprobes.c +++ b/arch/sparc64/kernel/kprobes.c @@ -6,7 +6,6 @@ #include <linux/config.h> #include <linux/kernel.h> #include <linux/kprobes.h> - #include <asm/kdebug.h> #include <asm/signal.h> @@ -47,25 +46,59 @@ void arch_copy_kprobe(struct kprobe *p) { p->ainsn.insn[0] = *p->addr; p->ainsn.insn[1] = BREAKPOINT_INSTRUCTION_2; + p->opcode = *p->addr; } -void arch_remove_kprobe(struct kprobe *p) +void arch_arm_kprobe(struct kprobe *p) { + *p->addr = BREAKPOINT_INSTRUCTION; + flushi(p->addr); } -/* kprobe_status settings */ -#define KPROBE_HIT_ACTIVE 0x00000001 -#define KPROBE_HIT_SS 0x00000002 +void arch_disarm_kprobe(struct kprobe *p) +{ + *p->addr = p->opcode; + flushi(p->addr); +} + +void arch_remove_kprobe(struct kprobe *p) +{ +} static struct kprobe *current_kprobe; static unsigned long current_kprobe_orig_tnpc; static unsigned long current_kprobe_orig_tstate_pil; static unsigned int kprobe_status; +static struct kprobe *kprobe_prev; +static unsigned long kprobe_orig_tnpc_prev; +static unsigned long kprobe_orig_tstate_pil_prev; +static unsigned int kprobe_status_prev; -static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) +static inline void save_previous_kprobe(void) +{ + kprobe_status_prev = kprobe_status; + kprobe_orig_tnpc_prev = current_kprobe_orig_tnpc; + kprobe_orig_tstate_pil_prev = current_kprobe_orig_tstate_pil; + kprobe_prev = current_kprobe; +} + +static inline void restore_previous_kprobe(void) +{ + kprobe_status = kprobe_status_prev; + current_kprobe_orig_tnpc = kprobe_orig_tnpc_prev; + current_kprobe_orig_tstate_pil = kprobe_orig_tstate_pil_prev; + current_kprobe = kprobe_prev; +} + +static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs) { current_kprobe_orig_tnpc = regs->tnpc; current_kprobe_orig_tstate_pil = (regs->tstate & TSTATE_PIL); + current_kprobe = p; +} + +static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) +{ regs->tstate |= TSTATE_PIL; /*single step inline, if it a breakpoint instruction*/ @@ -78,17 +111,6 @@ static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) } } -static inline void disarm_kprobe(struct kprobe *p, struct pt_regs *regs) -{ - *p->addr = p->opcode; - flushi(p->addr); - - regs->tpc = (unsigned long) p->addr; - regs->tnpc = current_kprobe_orig_tnpc; - regs->tstate = ((regs->tstate & ~TSTATE_PIL) | - current_kprobe_orig_tstate_pil); -} - static int kprobe_handler(struct pt_regs *regs) { struct kprobe *p; @@ -109,8 +131,18 @@ static int kprobe_handler(struct pt_regs *regs) unlock_kprobes(); goto no_kprobe; } - disarm_kprobe(p, regs); - ret = 1; + /* We have reentered the kprobe_handler(), since + * another probe was hit while within the handler. + * We here save the original kprobes variables and + * just single step on the instruction of the new probe + * without calling any user handlers. + */ + save_previous_kprobe(); + set_current_kprobe(p, regs); + p->nmissed++; + kprobe_status = KPROBE_REENTER; + prepare_singlestep(p, regs); + return 1; } else { p = current_kprobe; if (p->break_handler && p->break_handler(p, regs)) @@ -138,8 +170,8 @@ static int kprobe_handler(struct pt_regs *regs) goto no_kprobe; } + set_current_kprobe(p, regs); kprobe_status = KPROBE_HIT_ACTIVE; - current_kprobe = p; if (p->pre_handler && p->pre_handler(p, regs)) return 1; @@ -245,12 +277,20 @@ static inline int post_kprobe_handler(struct pt_regs *regs) if (!kprobe_running()) return 0; - if (current_kprobe->post_handler) + if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) { + kprobe_status = KPROBE_HIT_SSDONE; current_kprobe->post_handler(current_kprobe, regs, 0); + } resume_execution(current_kprobe, regs); + /*Restore back the original saved kprobes variables and continue. */ + if (kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(); + goto out; + } unlock_kprobes(); +out: preempt_enable_no_resched(); return 1; @@ -392,3 +432,4 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) } return 0; } + diff --git a/arch/sparc64/kernel/signal32.c b/arch/sparc64/kernel/signal32.c index 9a375e975cff..f28428f4170e 100644 --- a/arch/sparc64/kernel/signal32.c +++ b/arch/sparc64/kernel/signal32.c @@ -102,7 +102,7 @@ typedef struct compat_siginfo{ /* POSIX.1b timers */ struct { - timer_t _tid; /* timer id */ + compat_timer_t _tid; /* timer id */ int _overrun; /* overrun count */ compat_sigval_t _sigval; /* same as below */ int _sys_private; /* not to be passed to user */ diff --git a/arch/um/Kconfig b/arch/um/Kconfig index b8e952c88fd1..9469e77303e6 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -74,6 +74,7 @@ config MODE_SKAS option will shrink the UML binary slightly. source "arch/um/Kconfig_arch" +source "mm/Kconfig" config LD_SCRIPT_STATIC bool diff --git a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c index 804c6bbdf67c..157584ae4792 100644 --- a/arch/um/kernel/process_kern.c +++ b/arch/um/kernel/process_kern.c @@ -8,6 +8,7 @@ #include "linux/kernel.h" #include "linux/sched.h" #include "linux/interrupt.h" +#include "linux/string.h" #include "linux/mm.h" #include "linux/slab.h" #include "linux/utsname.h" @@ -322,12 +323,7 @@ void do_uml_exitcalls(void) char *uml_strdup(char *string) { - char *new; - - new = kmalloc(strlen(string) + 1, GFP_KERNEL); - if(new == NULL) return(NULL); - strcpy(new, string); - return(new); + return kstrdup(string, GFP_KERNEL); } int copy_to_user_proc(void __user *to, void *from, int size) diff --git a/arch/v850/Kconfig b/arch/v850/Kconfig index 90cd4baa75ee..27febd6ffa80 100644 --- a/arch/v850/Kconfig +++ b/arch/v850/Kconfig @@ -218,6 +218,8 @@ menu "Processor type and features" a lot of RAM, and you need to able to allocate very large contiguous chunks. If unsure, say N. +source "mm/Kconfig" + endmenu diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 289f448ac89c..db259757dc8a 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -265,7 +265,7 @@ config NUMA_EMU into virtual nodes when booted with "numa=fake=N", where N is the number of nodes. This is only useful for debugging. -config DISCONTIGMEM +config ARCH_DISCONTIGMEM_ENABLE bool depends on NUMA default y @@ -274,6 +274,27 @@ config NUMA bool default n +config ARCH_DISCONTIGMEM_ENABLE + def_bool y + depends on NUMA + +config ARCH_DISCONTIGMEM_DEFAULT + def_bool y + depends on NUMA + +config ARCH_SPARSEMEM_ENABLE + def_bool y + depends on NUMA + +config ARCH_FLATMEM_ENABLE + def_bool y + depends on !NUMA + +source "mm/Kconfig" + +config HAVE_ARCH_EARLY_PFN_TO_NID + def_bool y + config HAVE_DEC_LOCK bool depends on SMP @@ -381,6 +402,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source kernel/Kconfig.hz + endmenu # diff --git a/arch/x86_64/boot/install.sh b/arch/x86_64/boot/install.sh index 90f2452b3b9e..f17b40dfc0f4 100644 --- a/arch/x86_64/boot/install.sh +++ b/arch/x86_64/boot/install.sh @@ -21,8 +21,8 @@ # User may have a custom install script -if [ -x ~/bin/installkernel ]; then exec ~/bin/installkernel "$@"; fi -if [ -x /sbin/installkernel ]; then exec /sbin/installkernel "$@"; fi +if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi +if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi # Default install - same as make zlilo diff --git a/arch/x86_64/ia32/ia32_signal.c b/arch/x86_64/ia32/ia32_signal.c index fbd09b5126ce..66e2821533db 100644 --- a/arch/x86_64/ia32/ia32_signal.c +++ b/arch/x86_64/ia32/ia32_signal.c @@ -428,8 +428,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) return (void __user *)((rsp - frame_size) & -8UL); } -void ia32_setup_frame(int sig, struct k_sigaction *ka, - compat_sigset_t *set, struct pt_regs * regs) +int ia32_setup_frame(int sig, struct k_sigaction *ka, + compat_sigset_t *set, struct pt_regs * regs) { struct sigframe __user *frame; int err = 0; @@ -514,14 +514,15 @@ void ia32_setup_frame(int sig, struct k_sigaction *ka, current->comm, current->pid, frame, regs->rip, frame->pretcode); #endif - return; + return 1; give_sigsegv: force_sigsegv(sig, current); + return 0; } -void ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - compat_sigset_t *set, struct pt_regs * regs) +int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + compat_sigset_t *set, struct pt_regs * regs) { struct rt_sigframe __user *frame; int err = 0; @@ -613,9 +614,9 @@ void ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, current->comm, current->pid, frame, regs->rip, frame->pretcode); #endif - return; + return 1; give_sigsegv: force_sigsegv(sig, current); + return 0; } - diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c index 504e63474993..c9a6b812e926 100644 --- a/arch/x86_64/kernel/aperture.c +++ b/arch/x86_64/kernel/aperture.c @@ -40,11 +40,7 @@ int fix_aperture __initdata = 1; static u32 __init allocate_aperture(void) { -#ifdef CONFIG_DISCONTIGMEM pg_data_t *nd0 = NODE_DATA(0); -#else - pg_data_t *nd0 = &contig_page_data; -#endif u32 aper_size; void *p; diff --git a/arch/x86_64/kernel/early_printk.c b/arch/x86_64/kernel/early_printk.c index e3a19e8ebbf8..9631c747c5e3 100644 --- a/arch/x86_64/kernel/early_printk.c +++ b/arch/x86_64/kernel/early_printk.c @@ -2,20 +2,24 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/string.h> +#include <linux/tty.h> #include <asm/io.h> #include <asm/processor.h> /* Simple VGA output */ #ifdef __i386__ +#include <asm/setup.h> #define VGABASE (__ISA_IO_base + 0xb8000) #else +#include <asm/bootsetup.h> #define VGABASE ((void __iomem *)0xffffffff800b8000UL) #endif -#define MAX_YPOS 25 -#define MAX_XPOS 80 +#define MAX_YPOS max_ypos +#define MAX_XPOS max_xpos +static int max_ypos = 25, max_xpos = 80; static int current_ypos = 1, current_xpos = 0; static void early_vga_write(struct console *con, const char *str, unsigned n) @@ -196,7 +200,10 @@ int __init setup_early_printk(char *opt) } else if (!strncmp(buf, "ttyS", 4)) { early_serial_init(buf); early_console = &early_serial_console; - } else if (!strncmp(buf, "vga", 3)) { + } else if (!strncmp(buf, "vga", 3) + && SCREEN_INFO.orig_video_isVGA == 1) { + max_xpos = SCREEN_INFO.orig_video_cols; + max_ypos = SCREEN_INFO.orig_video_lines; early_console = &early_vga_console; } early_console_initialized = 1; diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c index 0f8c78dcd38c..cf6ab147a2a5 100644 --- a/arch/x86_64/kernel/head64.c +++ b/arch/x86_64/kernel/head64.c @@ -94,7 +94,7 @@ void __init x86_64_start_kernel(char * real_mode_data) s = strstr(saved_command_line, "earlyprintk="); if (s != NULL) setup_early_printk(s); -#ifdef CONFIG_DISCONTIGMEM +#ifdef CONFIG_NUMA s = strstr(saved_command_line, "numa="); if (s != NULL) numa_setup(s+5); diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c index 7873d9ba8814..19eafa0aa95c 100644 --- a/arch/x86_64/kernel/i8259.c +++ b/arch/x86_64/kernel/i8259.c @@ -157,14 +157,13 @@ static unsigned int startup_8259A_irq(unsigned int irq) } static struct hw_interrupt_type i8259A_irq_type = { - "XT-PIC", - startup_8259A_irq, - shutdown_8259A_irq, - enable_8259A_irq, - disable_8259A_irq, - mask_and_ack_8259A, - end_8259A_irq, - NULL + .typename = "XT-PIC", + .startup = startup_8259A_irq, + .shutdown = shutdown_8259A_irq, + .enable = enable_8259A_irq, + .disable = disable_8259A_irq, + .ack = mask_and_ack_8259A, + .end = end_8259A_irq, }; /* diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index f77f8a0ff187..4e680f87a75f 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -27,6 +27,8 @@ * <prasanna@in.ibm.com> adapted for x86_64 * 2005-Mar Roland McGrath <roland@redhat.com> * Fixed to handle %rip-relative addressing mode correctly. + * 2005-May Rusty Lynch <rusty.lynch@intel.com> + * Added function return probes functionality */ #include <linux/config.h> @@ -37,18 +39,16 @@ #include <linux/slab.h> #include <linux/preempt.h> #include <linux/moduleloader.h> - +#include <asm/cacheflush.h> #include <asm/pgtable.h> #include <asm/kdebug.h> static DECLARE_MUTEX(kprobe_mutex); -/* kprobe_status settings */ -#define KPROBE_HIT_ACTIVE 0x00000001 -#define KPROBE_HIT_SS 0x00000002 - static struct kprobe *current_kprobe; static unsigned long kprobe_status, kprobe_old_rflags, kprobe_saved_rflags; +static struct kprobe *kprobe_prev; +static unsigned long kprobe_status_prev, kprobe_old_rflags_prev, kprobe_saved_rflags_prev; static struct pt_regs jprobe_saved_regs; static long *jprobe_saved_rsp; static kprobe_opcode_t *get_insn_slot(void); @@ -214,6 +214,21 @@ void arch_copy_kprobe(struct kprobe *p) BUG_ON((s64) (s32) disp != disp); /* Sanity check. */ *ripdisp = disp; } + p->opcode = *p->addr; +} + +void arch_arm_kprobe(struct kprobe *p) +{ + *p->addr = BREAKPOINT_INSTRUCTION; + flush_icache_range((unsigned long) p->addr, + (unsigned long) p->addr + sizeof(kprobe_opcode_t)); +} + +void arch_disarm_kprobe(struct kprobe *p) +{ + *p->addr = p->opcode; + flush_icache_range((unsigned long) p->addr, + (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } void arch_remove_kprobe(struct kprobe *p) @@ -223,10 +238,29 @@ void arch_remove_kprobe(struct kprobe *p) down(&kprobe_mutex); } -static inline void disarm_kprobe(struct kprobe *p, struct pt_regs *regs) +static inline void save_previous_kprobe(void) { - *p->addr = p->opcode; - regs->rip = (unsigned long)p->addr; + kprobe_prev = current_kprobe; + kprobe_status_prev = kprobe_status; + kprobe_old_rflags_prev = kprobe_old_rflags; + kprobe_saved_rflags_prev = kprobe_saved_rflags; +} + +static inline void restore_previous_kprobe(void) +{ + current_kprobe = kprobe_prev; + kprobe_status = kprobe_status_prev; + kprobe_old_rflags = kprobe_old_rflags_prev; + kprobe_saved_rflags = kprobe_saved_rflags_prev; +} + +static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs) +{ + current_kprobe = p; + kprobe_saved_rflags = kprobe_old_rflags + = (regs->eflags & (TF_MASK | IF_MASK)); + if (is_IF_modifier(p->ainsn.insn)) + kprobe_saved_rflags &= ~IF_MASK; } static void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) @@ -240,6 +274,50 @@ static void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) regs->rip = (unsigned long)p->ainsn.insn; } +struct task_struct *arch_get_kprobe_task(void *ptr) +{ + return ((struct thread_info *) (((unsigned long) ptr) & + (~(THREAD_SIZE -1))))->task; +} + +void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) +{ + unsigned long *sara = (unsigned long *)regs->rsp; + struct kretprobe_instance *ri; + static void *orig_ret_addr; + + /* + * Save the return address when the return probe hits + * the first time, and use it to populate the (krprobe + * instance)->ret_addr for subsequent return probes at + * the same addrress since stack address would have + * the kretprobe_trampoline by then. + */ + if (((void*) *sara) != kretprobe_trampoline) + orig_ret_addr = (void*) *sara; + + if ((ri = get_free_rp_inst(rp)) != NULL) { + ri->rp = rp; + ri->stack_addr = sara; + ri->ret_addr = orig_ret_addr; + add_rp_inst(ri); + /* Replace the return addr with trampoline addr */ + *sara = (unsigned long) &kretprobe_trampoline; + } else { + rp->nmissed++; + } +} + +void arch_kprobe_flush_task(struct task_struct *tk) +{ + struct kretprobe_instance *ri; + while ((ri = get_rp_inst_tsk(tk)) != NULL) { + *((unsigned long *)(ri->stack_addr)) = + (unsigned long) ri->ret_addr; + recycle_rp_inst(ri); + } +} + /* * Interrupts are disabled on entry as trap3 is an interrupt gate and they * remain disabled thorough out this function. @@ -264,9 +342,30 @@ int kprobe_handler(struct pt_regs *regs) regs->eflags |= kprobe_saved_rflags; unlock_kprobes(); goto no_kprobe; + } else if (kprobe_status == KPROBE_HIT_SSDONE) { + /* TODO: Provide re-entrancy from + * post_kprobes_handler() and avoid exception + * stack corruption while single-stepping on + * the instruction of the new probe. + */ + arch_disarm_kprobe(p); + regs->rip = (unsigned long)p->addr; + ret = 1; + } else { + /* We have reentered the kprobe_handler(), since + * another probe was hit while within the + * handler. We here save the original kprobe + * variables and just single step on instruction + * of the new probe without calling any user + * handlers. + */ + save_previous_kprobe(); + set_current_kprobe(p, regs); + p->nmissed++; + prepare_singlestep(p, regs); + kprobe_status = KPROBE_REENTER; + return 1; } - disarm_kprobe(p, regs); - ret = 1; } else { p = current_kprobe; if (p->break_handler && p->break_handler(p, regs)) { @@ -296,11 +395,7 @@ int kprobe_handler(struct pt_regs *regs) } kprobe_status = KPROBE_HIT_ACTIVE; - current_kprobe = p; - kprobe_saved_rflags = kprobe_old_rflags - = (regs->eflags & (TF_MASK | IF_MASK)); - if (is_IF_modifier(p->ainsn.insn)) - kprobe_saved_rflags &= ~IF_MASK; + set_current_kprobe(p, regs); if (p->pre_handler && p->pre_handler(p, regs)) /* handler has already set things up, so skip ss setup */ @@ -317,6 +412,55 @@ no_kprobe: } /* + * For function-return probes, init_kprobes() establishes a probepoint + * here. When a retprobed function returns, this probe is hit and + * trampoline_probe_handler() runs, calling the kretprobe's handler. + */ + void kretprobe_trampoline_holder(void) + { + asm volatile ( ".global kretprobe_trampoline\n" + "kretprobe_trampoline: \n" + "nop\n"); + } + +/* + * Called when we hit the probe point at kretprobe_trampoline + */ +int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct task_struct *tsk; + struct kretprobe_instance *ri; + struct hlist_head *head; + struct hlist_node *node; + unsigned long *sara = (unsigned long *)regs->rsp - 1; + + tsk = arch_get_kprobe_task(sara); + head = kretprobe_inst_table_head(tsk); + + hlist_for_each_entry(ri, node, head, hlist) { + if (ri->stack_addr == sara && ri->rp) { + if (ri->rp->handler) + ri->rp->handler(ri, regs); + } + } + return 0; +} + +void trampoline_post_handler(struct kprobe *p, struct pt_regs *regs, + unsigned long flags) +{ + struct kretprobe_instance *ri; + /* RA already popped */ + unsigned long *sara = ((unsigned long *)regs->rsp) - 1; + + while ((ri = get_rp_inst(sara))) { + regs->rip = (unsigned long)ri->ret_addr; + recycle_rp_inst(ri); + } + regs->eflags &= ~TF_MASK; +} + +/* * Called after single-stepping. p->addr is the address of the * instruction whose first byte has been replaced by the "int 3" * instruction. To avoid the SMP problems that can occur when we @@ -401,13 +545,23 @@ int post_kprobe_handler(struct pt_regs *regs) if (!kprobe_running()) return 0; - if (current_kprobe->post_handler) + if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) { + kprobe_status = KPROBE_HIT_SSDONE; current_kprobe->post_handler(current_kprobe, regs, 0); + } - resume_execution(current_kprobe, regs); + if (current_kprobe->post_handler != trampoline_post_handler) + resume_execution(current_kprobe, regs); regs->eflags |= kprobe_saved_rflags; - unlock_kprobes(); + /* Restore the original saved kprobes variables and continue. */ + if (kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(); + goto out; + } else { + unlock_kprobes(); + } +out: preempt_enable_no_resched(); /* diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index 61a63be6b294..9c5aa2a790c7 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c @@ -23,6 +23,7 @@ #include <linux/kernel_stat.h> #include <linux/mc146818rtc.h> #include <linux/acpi.h> +#include <linux/module.h> #include <asm/smp.h> #include <asm/mtrr.h> @@ -45,7 +46,8 @@ int acpi_found_madt; int apic_version [MAX_APICS]; unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; -cpumask_t pci_bus_to_cpumask [256] = { [0 ... 255] = CPU_MASK_ALL }; +unsigned char pci_bus_to_node [256]; +EXPORT_SYMBOL(pci_bus_to_node); static int mp_current_pci_id = 0; /* I/O APIC entries */ @@ -904,11 +906,20 @@ void __init mp_config_acpi_legacy_irqs (void) return; } +#define MAX_GSI_NUM 4096 + int mp_register_gsi(u32 gsi, int edge_level, int active_high_low) { int ioapic = -1; int ioapic_pin = 0; int idx, bit = 0; + static int pci_irq = 16; + /* + * Mapping between Global System Interrupts, which + * represent all possible interrupts, to the IRQs + * assigned to actual devices. + */ + static int gsi_to_irq[MAX_GSI_NUM]; if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) return gsi; @@ -943,11 +954,21 @@ int mp_register_gsi(u32 gsi, int edge_level, int active_high_low) if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) { Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", mp_ioapic_routing[ioapic].apic_id, ioapic_pin); - return gsi; + return gsi_to_irq[gsi]; } mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit); + if (edge_level) { + /* + * For PCI devices assign IRQs in order, avoiding gaps + * due to unused I/O APIC pins. + */ + int irq = gsi; + gsi = pci_irq++; + gsi_to_irq[irq] = gsi; + } + io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, edge_level == ACPI_EDGE_SENSITIVE ? 0 : 1, active_high_low == ACPI_ACTIVE_HIGH ? 0 : 1); diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index dce8bab4306c..e59d1f9d6163 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -34,6 +34,7 @@ #include <linux/ptrace.h> #include <linux/utsname.h> #include <linux/random.h> +#include <linux/kprobes.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -293,6 +294,14 @@ void exit_thread(void) { struct task_struct *me = current; struct thread_struct *t = &me->thread; + + /* + * Remove function-return probe instances associated with this task + * and put them back on the free list. Do not insert an exit probe for + * this function, it will be disabled by kprobe_flush_task if you do. + */ + kprobe_flush_task(me); + if (me->thread.io_bitmap_ptr) { struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); @@ -312,6 +321,13 @@ void flush_thread(void) struct task_struct *tsk = current; struct thread_info *t = current_thread_info(); + /* + * Remove function-return probe instances associated with this task + * and put them back on the free list. Do not insert an exit probe for + * this function, it will be disabled by kprobe_flush_task if you do. + */ + kprobe_flush_task(tsk); + if (t->flags & _TIF_ABI_PENDING) t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32); diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 99f038ede23c..000015dd5a8b 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -40,6 +40,8 @@ #include <linux/acpi.h> #include <linux/kallsyms.h> #include <linux/edd.h> +#include <linux/mmzone.h> + #include <asm/mtrr.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -348,7 +350,7 @@ static __init void parse_cmdline_early (char ** cmdline_p) if (!memcmp(from, "mem=", 4)) parse_memopt(from+4, &from); -#ifdef CONFIG_DISCONTIGMEM +#ifdef CONFIG_NUMA if (!memcmp(from, "numa=", 5)) numa_setup(from+5); #endif @@ -377,17 +379,20 @@ static __init void parse_cmdline_early (char ** cmdline_p) *cmdline_p = command_line; } -#ifndef CONFIG_DISCONTIGMEM -static void __init contig_initmem_init(void) +#ifndef CONFIG_NUMA +static void __init +contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn) { - unsigned long bootmap_size, bootmap; - bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; - bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size); - if (bootmap == -1L) - panic("Cannot find bootmem map of size %ld\n",bootmap_size); - bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); - e820_bootmem_free(&contig_page_data, 0, end_pfn << PAGE_SHIFT); - reserve_bootmem(bootmap, bootmap_size); + unsigned long bootmap_size, bootmap; + + memory_present(0, start_pfn, end_pfn); + bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; + bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size); + if (bootmap == -1L) + panic("Cannot find bootmem map of size %ld\n",bootmap_size); + bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); + e820_bootmem_free(NODE_DATA(0), 0, end_pfn << PAGE_SHIFT); + reserve_bootmem(bootmap, bootmap_size); } #endif @@ -554,10 +559,10 @@ void __init setup_arch(char **cmdline_p) acpi_numa_init(); #endif -#ifdef CONFIG_DISCONTIGMEM +#ifdef CONFIG_NUMA numa_initmem_init(0, end_pfn); #else - contig_initmem_init(); + contig_initmem_init(0, end_pfn); #endif /* Reserve direct mapping */ @@ -618,6 +623,8 @@ void __init setup_arch(char **cmdline_p) } } #endif + + sparse_init(); paging_init(); check_ioapic(); diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c index 429c0269dc4e..98b7ba95d581 100644 --- a/arch/x86_64/kernel/signal.c +++ b/arch/x86_64/kernel/signal.c @@ -34,9 +34,9 @@ #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) -void ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, +int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs * regs); -void ia32_setup_frame(int sig, struct k_sigaction *ka, +int ia32_setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, struct pt_regs * regs); asmlinkage long @@ -238,7 +238,7 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size) return (void __user *)round_down(rsp - size, 16); } -static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, +static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs * regs) { struct rt_sigframe __user *frame; @@ -327,20 +327,23 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, current->comm, current->pid, frame, regs->rip, frame->pretcode); #endif - return; + return 1; give_sigsegv: force_sigsegv(sig, current); + return 0; } /* * OK, we're invoking a handler */ -static void +static int handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, sigset_t *oldset, struct pt_regs *regs) { + int ret; + #ifdef DEBUG_SIG printk("handle_signal pid:%d sig:%lu rip:%lx rsp:%lx regs=%p\n", current->pid, sig, @@ -384,20 +387,22 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, #ifdef CONFIG_IA32_EMULATION if (test_thread_flag(TIF_IA32)) { if (ka->sa.sa_flags & SA_SIGINFO) - ia32_setup_rt_frame(sig, ka, info, oldset, regs); + ret = ia32_setup_rt_frame(sig, ka, info, oldset, regs); else - ia32_setup_frame(sig, ka, oldset, regs); + ret = ia32_setup_frame(sig, ka, oldset, regs); } else #endif - setup_rt_frame(sig, ka, info, oldset, regs); + ret = setup_rt_frame(sig, ka, info, oldset, regs); - if (!(ka->sa.sa_flags & SA_NODEFER)) { + if (ret && !(ka->sa.sa_flags & SA_NODEFER)) { spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); sigaddset(¤t->blocked,sig); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); } + + return ret; } /* @@ -417,7 +422,7 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset) * kernel mode. Just return without doing anything * if so. */ - if ((regs->cs & 3) != 3) + if (!user_mode(regs)) return 1; if (try_to_freeze(0)) @@ -434,11 +439,10 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset) * inside the kernel. */ if (current->thread.debugreg7) - asm volatile("movq %0,%%db7" : : "r" (current->thread.debugreg7)); + set_debugreg(current->thread.debugreg7, 7); /* Whee! Actually deliver the signal. */ - handle_signal(signr, &info, &ka, oldset, regs); - return 1; + return handle_signal(signr, &info, &ka, oldset, regs); } no_signal: diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index fb8c809b4cd9..66bf6ddeb0c3 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -64,6 +64,7 @@ static int notsc __initdata = 0; unsigned int cpu_khz; /* TSC clocks / usec, not used here */ static unsigned long hpet_period; /* fsecs / HPET clock */ unsigned long hpet_tick; /* HPET clocks / interrupt */ +static int hpet_use_timer; unsigned long vxtime_hz = PIT_TICK_RATE; int report_lost_ticks; /* command line option */ unsigned long long monotonic_base; @@ -105,7 +106,9 @@ static inline unsigned int do_gettimeoffset_tsc(void) static inline unsigned int do_gettimeoffset_hpet(void) { - return ((hpet_readl(HPET_COUNTER) - vxtime.last) * vxtime.quot) >> 32; + /* cap counter read to one tick to avoid inconsistencies */ + unsigned long counter = hpet_readl(HPET_COUNTER) - vxtime.last; + return (min(counter,hpet_tick) * vxtime.quot) >> 32; } unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc; @@ -301,7 +304,7 @@ unsigned long long monotonic_clock(void) last_offset = vxtime.last; base = monotonic_base; - this_offset = hpet_readl(HPET_T0_CMP) - hpet_tick; + this_offset = hpet_readl(HPET_COUNTER); } while (read_seqretry(&xtime_lock, seq)); offset = (this_offset - last_offset); @@ -377,7 +380,14 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) write_seqlock(&xtime_lock); - if (vxtime.hpet_address) { + if (vxtime.hpet_address) + offset = hpet_readl(HPET_COUNTER); + + if (hpet_use_timer) { + /* if we're using the hpet timer functionality, + * we can more accurately know the counter value + * when the timer interrupt occured. + */ offset = hpet_readl(HPET_T0_CMP) - hpet_tick; delay = hpet_readl(HPET_COUNTER) - offset; } else { @@ -803,17 +813,18 @@ static int hpet_timer_stop_set_go(unsigned long tick) * Set up timer 0, as periodic with first interrupt to happen at hpet_tick, * and period also hpet_tick. */ - - hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | + if (hpet_use_timer) { + hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | HPET_TN_32BIT, HPET_T0_CFG); - hpet_writel(hpet_tick, HPET_T0_CMP); - hpet_writel(hpet_tick, HPET_T0_CMP); /* AK: why twice? */ - + hpet_writel(hpet_tick, HPET_T0_CMP); + hpet_writel(hpet_tick, HPET_T0_CMP); /* AK: why twice? */ + cfg |= HPET_CFG_LEGACY; + } /* * Go! */ - cfg |= HPET_CFG_ENABLE | HPET_CFG_LEGACY; + cfg |= HPET_CFG_ENABLE; hpet_writel(cfg, HPET_CFG); return 0; @@ -834,8 +845,7 @@ static int hpet_init(void) id = hpet_readl(HPET_ID); - if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER) || - !(id & HPET_ID_LEGSUP)) + if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER)) return -1; hpet_period = hpet_readl(HPET_PERIOD); @@ -845,6 +855,8 @@ static int hpet_init(void) hpet_tick = (1000000000L * (USEC_PER_SEC / HZ) + hpet_period / 2) / hpet_period; + hpet_use_timer = (id & HPET_ID_LEGSUP); + return hpet_timer_stop_set_go(hpet_tick); } @@ -901,9 +913,11 @@ void __init time_init(void) set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); - if (!hpet_init()) { + if (!hpet_init()) vxtime_hz = (1000000000000000L + hpet_period / 2) / hpet_period; + + if (hpet_use_timer) { cpu_khz = hpet_calibrate_tsc(); timename = "HPET"; #ifdef CONFIG_X86_PM_TIMER @@ -968,7 +982,7 @@ void __init time_init_gtod(void) if (unsynchronized_tsc()) notsc = 1; if (vxtime.hpet_address && notsc) { - timetype = "HPET"; + timetype = hpet_use_timer ? "HPET" : "PIT/HPET"; vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; vxtime.mode = VXTIME_HPET; do_gettimeoffset = do_gettimeoffset_hpet; @@ -983,7 +997,7 @@ void __init time_init_gtod(void) printk(KERN_INFO "Disabling vsyscall due to use of PM timer\n"); #endif } else { - timetype = vxtime.hpet_address ? "HPET/TSC" : "PIT/TSC"; + timetype = hpet_use_timer ? "HPET/TSC" : "PIT/TSC"; vxtime.mode = VXTIME_TSC; } diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 3dfec8fdabcd..121646fc43f6 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -274,7 +274,7 @@ EXPORT_SYMBOL(dump_stack); void show_registers(struct pt_regs *regs) { int i; - int in_kernel = (regs->cs & 3) == 0; + int in_kernel = !user_mode(regs); unsigned long rsp; const int cpu = safe_smp_processor_id(); struct task_struct *cur = cpu_pda[cpu].pcurrent; @@ -318,7 +318,7 @@ void handle_BUG(struct pt_regs *regs) struct bug_frame f; char tmp; - if (regs->cs & 3) + if (user_mode(regs)) return; if (__copy_from_user(&f, (struct bug_frame *) regs->rip, sizeof(struct bug_frame))) @@ -437,7 +437,7 @@ static void do_trap(int trapnr, int signr, char *str, } #endif - if ((regs->cs & 3) != 0) { + if (user_mode(regs)) { struct task_struct *tsk = current; if (exception_trace && unhandled_signal(tsk, signr)) @@ -522,7 +522,7 @@ asmlinkage void do_general_protection(struct pt_regs * regs, long error_code) } #endif - if ((regs->cs & 3)!=0) { + if (user_mode(regs)) { struct task_struct *tsk = current; if (exception_trace && unhandled_signal(tsk, SIGSEGV)) @@ -638,7 +638,7 @@ asmlinkage struct pt_regs *sync_regs(struct pt_regs *eregs) if (eregs == (struct pt_regs *)eregs->rsp) ; /* Exception from user space */ - else if (eregs->cs & 3) + else if (user_mode(eregs)) regs = ((struct pt_regs *)current->thread.rsp0) - 1; /* Exception from kernel and interrupts are enabled. Move to kernel process stack. */ @@ -669,7 +669,7 @@ asmlinkage void do_debug(struct pt_regs * regs, unsigned long error_code) } #endif - asm("movq %%db6,%0" : "=r" (condition)); + get_debugreg(condition, 6); if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, SIGTRAP) == NOTIFY_STOP) @@ -697,7 +697,7 @@ asmlinkage void do_debug(struct pt_regs * regs, unsigned long error_code) * allowing programs to debug themselves without the ptrace() * interface. */ - if ((regs->cs & 3) == 0) + if (!user_mode(regs)) goto clear_TF_reenable; /* * Was the TF flag set by a debugger? If so, clear it now, @@ -715,13 +715,13 @@ asmlinkage void do_debug(struct pt_regs * regs, unsigned long error_code) info.si_signo = SIGTRAP; info.si_errno = 0; info.si_code = TRAP_BRKPT; - if ((regs->cs & 3) == 0) + if (!user_mode(regs)) goto clear_dr7; info.si_addr = (void __user *)regs->rip; force_sig_info(SIGTRAP, &info, tsk); clear_dr7: - asm volatile("movq %0,%%db7"::"r"(0UL)); + set_debugreg(0UL, 7); return; clear_TF_reenable: @@ -756,7 +756,7 @@ asmlinkage void do_coprocessor_error(struct pt_regs *regs) unsigned short cwd, swd; conditional_sti(regs); - if ((regs->cs & 3) == 0 && + if (!user_mode(regs) && kernel_math_error(regs, "kernel x87 math error")) return; @@ -822,7 +822,7 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) unsigned short mxcsr; conditional_sti(regs); - if ((regs->cs & 3) == 0 && + if (!user_mode(regs) && kernel_math_error(regs, "kernel simd math error")) return; diff --git a/arch/x86_64/lib/delay.c b/arch/x86_64/lib/delay.c index aed61a668a1b..33a873a3c223 100644 --- a/arch/x86_64/lib/delay.c +++ b/arch/x86_64/lib/delay.c @@ -12,6 +12,7 @@ #include <linux/sched.h> #include <linux/delay.h> #include <asm/delay.h> +#include <asm/msr.h> #ifdef CONFIG_SMP #include <asm/smp.h> @@ -19,6 +20,12 @@ int x86_udelay_tsc = 0; /* Delay via TSC */ +int read_current_timer(unsigned long *timer_value) +{ + rdtscll(*timer_value); + return 0; +} + void __delay(unsigned long loops) { unsigned bclock, now; diff --git a/arch/x86_64/mm/Makefile b/arch/x86_64/mm/Makefile index 66c354ad80ca..1d232a87f113 100644 --- a/arch/x86_64/mm/Makefile +++ b/arch/x86_64/mm/Makefile @@ -4,7 +4,7 @@ obj-y := init.o fault.o ioremap.o extable.o pageattr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_DISCONTIGMEM) += numa.o +obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_K8_NUMA) += k8topology.o obj-$(CONFIG_ACPI_NUMA) += srat.o diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index 57d3ab15a5c7..2f187986f940 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -74,7 +74,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, instr = (unsigned char *)convert_rip_to_linear(current, regs); max_instr = instr + 15; - if ((regs->cs & 3) != 0 && instr >= (unsigned char *)TASK_SIZE) + if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE) return 0; while (scan_more && instr < max_instr) { @@ -106,7 +106,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, /* Could check the LDT for lm, but for now it's good enough to assume that long mode only uses well known segments or kernel. */ - scan_more = ((regs->cs & 3) == 0) || (regs->cs == __USER_CS); + scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS); break; case 0x60: diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index dbe53b4c7e66..72e4b364ed73 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -318,7 +318,7 @@ void zap_low_mappings(void) flush_tlb_all(); } -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_NUMA void __init paging_init(void) { { @@ -427,13 +427,16 @@ void __init mem_init(void) reservedpages = 0; /* this will put all low memory onto the freelists */ -#ifdef CONFIG_DISCONTIGMEM +#ifdef CONFIG_NUMA totalram_pages += numa_free_all_bootmem(); tmp = 0; /* should count reserved pages here for all nodes */ #else + +#ifdef CONFIG_FLATMEM max_mapnr = end_pfn; if (!mem_map) BUG(); +#endif totalram_pages += free_all_bootmem(); @@ -515,7 +518,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) void __init reserve_bootmem_generic(unsigned long phys, unsigned len) { /* Should check here against the e820 map to avoid double free */ -#ifdef CONFIG_DISCONTIGMEM +#ifdef CONFIG_NUMA int nid = phys_to_nid(phys); reserve_bootmem_node(NODE_DATA(nid), phys, len); #else diff --git a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c index 58aac23760ef..6972df480d2b 100644 --- a/arch/x86_64/mm/ioremap.c +++ b/arch/x86_64/mm/ioremap.c @@ -178,7 +178,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS) return (__force void __iomem *)phys_to_virt(phys_addr); -#ifndef CONFIG_DISCONTIGMEM +#ifdef CONFIG_FLATMEM /* * Don't allow anybody to remap normal RAM that we're using.. */ diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index fd9f25d7a6c4..84cde796ecb1 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c @@ -66,6 +66,13 @@ int __init compute_hash_shift(struct node *nodes, int numnodes) return -1; } +#ifdef CONFIG_SPARSEMEM +int early_pfn_to_nid(unsigned long pfn) +{ + return phys_to_nid(pfn << PAGE_SHIFT); +} +#endif + /* Initialize bootmem allocator for a node */ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) { @@ -80,6 +87,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en start_pfn = start >> PAGE_SHIFT; end_pfn = end >> PAGE_SHIFT; + memory_present(nodeid, start_pfn, end_pfn); nodedata_phys = find_e820_area(start, end, pgdat_size); if (nodedata_phys == -1L) panic("Cannot find memory pgdat in node %d\n", nodeid); diff --git a/arch/x86_64/pci/k8-bus.c b/arch/x86_64/pci/k8-bus.c index 62349c78db57..7e7d0c2a0025 100644 --- a/arch/x86_64/pci/k8-bus.c +++ b/arch/x86_64/pci/k8-bus.c @@ -53,25 +53,11 @@ fill_mp_bus_to_cpumask(void) for (j = SECONDARY_LDT_BUS_NUMBER(ldtbus); j <= SUBORDINATE_LDT_BUS_NUMBER(ldtbus); j++) - pci_bus_to_cpumask[j] = - node_to_cpumask(NODE_ID(nid)); + pci_bus_to_node[j] = NODE_ID(nid); } } } - /* quick sanity check */ - printed = 0; - for (i = 0; i < 256; i++) { - if (cpus_empty(pci_bus_to_cpumask[i])) { - pci_bus_to_cpumask[i] = CPU_MASK_ALL; - if (printed) - continue; - printk(KERN_ERR - "k8-bus.c: some busses have empty cpu mask\n"); - printed = 1; - } - } - return 0; } |