diff options
Diffstat (limited to 'arch/ia64')
35 files changed, 937 insertions, 355 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index db274da7dba1..70f7eb9fed35 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -66,15 +66,6 @@ config IA64_UNCACHED_ALLOCATOR bool select GENERIC_ALLOCATOR -config DMA_IS_DMA32 - bool - default y - -config DMA_IS_NORMAL - bool - depends on IA64_SGI_SN2 - default y - config AUDIT_ARCH bool default y @@ -365,6 +356,9 @@ config NODES_SHIFT MAX_NUMNODES will be 2^(This value). If in doubt, use the default. +config ARCH_POPULATES_NODE_MAP + def_bool y + # VIRTUAL_MEM_MAP and FLAT_NODE_MEM_MAP are functionally equivalent. # VIRTUAL_MEM_MAP has been retained for historical reasons. config VIRTUAL_MEM_MAP @@ -429,6 +423,14 @@ config IA64_PALINFO config SGI_SN def_bool y if (IA64_SGI_SN2 || IA64_GENERIC) +config IA64_ESI + bool "ESI (Extensible SAL Interface) support" + help + If you say Y here, support is built into the kernel to + make ESI calls. ESI calls are used to support vendor-specific + firmware extensions, such as the ability to inject memory-errors + for test-purposes. If you're unsure, say N. + source "drivers/sn/Kconfig" source "drivers/firmware/Kconfig" @@ -514,7 +516,7 @@ source "arch/ia64/oprofile/Kconfig" config KPROBES bool "Kprobes (EXPERIMENTAL)" - depends on EXPERIMENTAL && MODULES + depends on KALLSYMS && EXPERIMENTAL && MODULES help Kprobes allows you to trap at almost any kernel address and execute a callback function. register_kprobe() establishes diff --git a/arch/ia64/hp/sim/simeth.c b/arch/ia64/hp/sim/simeth.c index b5195be62818..e1a1b11473e2 100644 --- a/arch/ia64/hp/sim/simeth.c +++ b/arch/ia64/hp/sim/simeth.c @@ -320,7 +320,7 @@ simeth_device_event(struct notifier_block *this,unsigned long event, void *ptr) } printk(KERN_INFO "simeth_device_event: %s ipaddr=0x%x\n", - dev->name, htonl(ifa->ifa_local)); + dev->name, ntohl(ifa->ifa_local)); /* * XXX Fix me @@ -331,7 +331,7 @@ simeth_device_event(struct notifier_block *this,unsigned long event, void *ptr) local = dev->priv; /* now do it for real */ r = event == NETDEV_UP ? - netdev_attach(local->simfd, dev->irq, htonl(ifa->ifa_local)): + netdev_attach(local->simfd, dev->irq, ntohl(ifa->ifa_local)): netdev_detach(local->simfd); printk(KERN_INFO "simeth: netdev_attach/detach: event=%s ->%d\n", diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c index 0daacc20ed36..246eb3d3757a 100644 --- a/arch/ia64/hp/sim/simserial.c +++ b/arch/ia64/hp/sim/simserial.c @@ -940,7 +940,7 @@ static inline void show_serial_version(void) printk(KERN_INFO " no serial options enabled\n"); } -static struct tty_operations hp_ops = { +static const struct tty_operations hp_ops = { .open = rs_open, .close = rs_close, .write = rs_write, diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c index 6aa3c51619ca..9d6a3f210148 100644 --- a/arch/ia64/ia32/sys_ia32.c +++ b/arch/ia64/ia32/sys_ia32.c @@ -125,6 +125,7 @@ sys32_execve (char __user *name, compat_uptr_t __user *argv, compat_uptr_t __use int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) { + compat_ino_t ino; int err; if ((u64) stat->size > MAX_NON_LFS || @@ -132,11 +133,15 @@ int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) !old_valid_dev(stat->rdev)) return -EOVERFLOW; + ino = stat->ino; + if (sizeof(ino) < sizeof(stat->ino) && ino != stat->ino) + return -EOVERFLOW; + if (clear_user(ubuf, sizeof(*ubuf))) return -EFAULT; err = __put_user(old_encode_dev(stat->dev), &ubuf->st_dev); - err |= __put_user(stat->ino, &ubuf->st_ino); + err |= __put_user(ino, &ubuf->st_ino); err |= __put_user(stat->mode, &ubuf->st_mode); err |= __put_user(stat->nlink, &ubuf->st_nlink); err |= __put_user(high2lowuid(stat->uid), &ubuf->st_uid); @@ -1222,16 +1227,20 @@ struct readdir32_callback { }; static int -filldir32 (void *__buf, const char *name, int namlen, loff_t offset, ino_t ino, +filldir32 (void *__buf, const char *name, int namlen, loff_t offset, u64 ino, unsigned int d_type) { struct compat_dirent __user * dirent; struct getdents32_callback * buf = (struct getdents32_callback *) __buf; int reclen = ROUND_UP(offsetof(struct compat_dirent, d_name) + namlen + 1, 4); + u32 d_ino; buf->error = -EINVAL; /* only used if we fail.. */ if (reclen > buf->count) return -EINVAL; + d_ino = ino; + if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) + return -EOVERFLOW; buf->error = -EFAULT; /* only used if we fail.. */ dirent = buf->previous; if (dirent) @@ -1239,7 +1248,7 @@ filldir32 (void *__buf, const char *name, int namlen, loff_t offset, ino_t ino, return -EFAULT; dirent = buf->current_dir; buf->previous = dirent; - if (put_user(ino, &dirent->d_ino) + if (put_user(d_ino, &dirent->d_ino) || put_user(reclen, &dirent->d_reclen) || copy_to_user(dirent->d_name, name, namlen) || put_user(0, dirent->d_name + namlen)) @@ -1287,17 +1296,21 @@ out: } static int -fillonedir32 (void * __buf, const char * name, int namlen, loff_t offset, ino_t ino, +fillonedir32 (void * __buf, const char * name, int namlen, loff_t offset, u64 ino, unsigned int d_type) { struct readdir32_callback * buf = (struct readdir32_callback *) __buf; struct old_linux32_dirent __user * dirent; + u32 d_ino; if (buf->count) return -EINVAL; + d_ino = ino; + if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) + return -EOVERFLOW; buf->count++; dirent = buf->dirent; - if (put_user(ino, &dirent->d_ino) + if (put_user(d_ino, &dirent->d_ino) || put_user(offset, &dirent->d_offset) || put_user(namlen, &dirent->d_namlen) || copy_to_user(dirent->d_name, name, namlen) @@ -1942,7 +1955,7 @@ struct sysctl32 { unsigned int __unused[4]; }; -#ifdef CONFIG_SYSCTL +#ifdef CONFIG_SYSCTL_SYSCALL asmlinkage long sys32_sysctl (struct sysctl32 __user *args) { diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index ad8215a3c586..31497496eb4b 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -32,6 +32,11 @@ obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o obj-$(CONFIG_AUDIT) += audit.o mca_recovery-y += mca_drv.o mca_drv_asm.o +obj-$(CONFIG_IA64_ESI) += esi.o +ifneq ($(CONFIG_IA64_ESI),) +obj-y += esi_stub.o # must be in kernel proper +endif + # The gate DSO image is built using a special linker script. targets += gate.so gate-syms.o diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 0176556aeecc..32c3abededc6 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -771,16 +771,19 @@ int acpi_map_cpu2node(acpi_handle handle, int cpu, long physid) { #ifdef CONFIG_ACPI_NUMA int pxm_id; + int nid; pxm_id = acpi_get_pxm(handle); - /* - * Assuming that the container driver would have set the proximity - * domain and would have initialized pxm_to_node(pxm_id) && pxm_flag + * We don't have cpu-only-node hotadd. But if the system equips + * SRAT table, pxm is already found and node is ready. + * So, just pxm_to_nid(pxm) is OK. + * This code here is for the system which doesn't have full SRAT + * table for possible cpus. */ - node_cpuid[cpu].nid = (pxm_id < 0) ? 0 : pxm_to_node(pxm_id); - + nid = acpi_map_pxm_to_node(pxm_id); node_cpuid[cpu].phys_id = physid; + node_cpuid[cpu].nid = nid; #endif return (0); } diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index fef06571be99..e5b1be51b197 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -492,11 +492,11 @@ GLOBAL_ENTRY(prefetch_stack) br.ret.sptk.many rp END(prefetch_stack) -GLOBAL_ENTRY(execve) +GLOBAL_ENTRY(kernel_execve) mov r15=__NR_execve // put syscall number in place break __BREAK_SYSCALL br.ret.sptk.many rp -END(execve) +END(kernel_execve) GLOBAL_ENTRY(clone) mov r15=__NR_clone // put syscall number in place @@ -1605,8 +1605,8 @@ sys_call_table: data8 sys_ni_syscall // 1295 reserved for ppoll data8 sys_unshare data8 sys_splice - data8 sys_ni_syscall // reserved for set_robust_list - data8 sys_ni_syscall // reserved for get_robust_list + data8 sys_set_robust_list + data8 sys_get_robust_list data8 sys_sync_file_range // 1300 data8 sys_tee data8 sys_vmsplice diff --git a/arch/ia64/kernel/esi.c b/arch/ia64/kernel/esi.c new file mode 100644 index 000000000000..ebf4e988e78c --- /dev/null +++ b/arch/ia64/kernel/esi.c @@ -0,0 +1,205 @@ +/* + * Extensible SAL Interface (ESI) support routines. + * + * Copyright (C) 2006 Hewlett-Packard Co + * Alex Williamson <alex.williamson@hp.com> + */ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/string.h> + +#include <asm/esi.h> +#include <asm/sal.h> + +MODULE_AUTHOR("Alex Williamson <alex.williamson@hp.com>"); +MODULE_DESCRIPTION("Extensible SAL Interface (ESI) support"); +MODULE_LICENSE("GPL"); + +#define MODULE_NAME "esi" + +#define ESI_TABLE_GUID \ + EFI_GUID(0x43EA58DC, 0xCF28, 0x4b06, 0xB3, \ + 0x91, 0xB7, 0x50, 0x59, 0x34, 0x2B, 0xD4) + +enum esi_systab_entry_type { + ESI_DESC_ENTRY_POINT = 0 +}; + +/* + * Entry type: Size: + * 0 48 + */ +#define ESI_DESC_SIZE(type) "\060"[(unsigned) (type)] + +typedef struct ia64_esi_desc_entry_point { + u8 type; + u8 reserved1[15]; + u64 esi_proc; + u64 gp; + efi_guid_t guid; +} ia64_esi_desc_entry_point_t; + +struct pdesc { + void *addr; + void *gp; +}; + +static struct ia64_sal_systab *esi_systab; + +static int __init esi_init (void) +{ + efi_config_table_t *config_tables; + struct ia64_sal_systab *systab; + unsigned long esi = 0; + char *p; + int i; + + config_tables = __va(efi.systab->tables); + + for (i = 0; i < (int) efi.systab->nr_tables; ++i) { + if (efi_guidcmp(config_tables[i].guid, ESI_TABLE_GUID) == 0) { + esi = config_tables[i].table; + break; + } + } + + if (!esi) + return -ENODEV;; + + systab = __va(esi); + + if (strncmp(systab->signature, "ESIT", 4) != 0) { + printk(KERN_ERR "bad signature in ESI system table!"); + return -ENODEV; + } + + p = (char *) (systab + 1); + for (i = 0; i < systab->entry_count; i++) { + /* + * The first byte of each entry type contains the type + * descriptor. + */ + switch (*p) { + case ESI_DESC_ENTRY_POINT: + break; + default: + printk(KERN_WARNING "Unkown table type %d found in " + "ESI table, ignoring rest of table\n", *p); + return -ENODEV; + } + + p += ESI_DESC_SIZE(*p); + } + + esi_systab = systab; + return 0; +} + + +int ia64_esi_call (efi_guid_t guid, struct ia64_sal_retval *isrvp, + enum esi_proc_type proc_type, u64 func, + u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, + u64 arg7) +{ + struct ia64_fpreg fr[6]; + unsigned long flags = 0; + int i; + char *p; + + if (!esi_systab) + return -1; + + p = (char *) (esi_systab + 1); + for (i = 0; i < esi_systab->entry_count; i++) { + if (*p == ESI_DESC_ENTRY_POINT) { + ia64_esi_desc_entry_point_t *esi = (void *)p; + if (!efi_guidcmp(guid, esi->guid)) { + ia64_sal_handler esi_proc; + struct pdesc pdesc; + + pdesc.addr = __va(esi->esi_proc); + pdesc.gp = __va(esi->gp); + + esi_proc = (ia64_sal_handler) &pdesc; + + ia64_save_scratch_fpregs(fr); + if (proc_type == ESI_PROC_SERIALIZED) + spin_lock_irqsave(&sal_lock, flags); + else if (proc_type == ESI_PROC_MP_SAFE) + local_irq_save(flags); + else + preempt_disable(); + *isrvp = (*esi_proc)(func, arg1, arg2, arg3, + arg4, arg5, arg6, arg7); + if (proc_type == ESI_PROC_SERIALIZED) + spin_unlock_irqrestore(&sal_lock, + flags); + else if (proc_type == ESI_PROC_MP_SAFE) + local_irq_restore(flags); + else + preempt_enable(); + ia64_load_scratch_fpregs(fr); + return 0; + } + } + p += ESI_DESC_SIZE(*p); + } + return -1; +} +EXPORT_SYMBOL_GPL(ia64_esi_call); + +int ia64_esi_call_phys (efi_guid_t guid, struct ia64_sal_retval *isrvp, + u64 func, u64 arg1, u64 arg2, u64 arg3, u64 arg4, + u64 arg5, u64 arg6, u64 arg7) +{ + struct ia64_fpreg fr[6]; + unsigned long flags; + u64 esi_params[8]; + char *p; + int i; + + if (!esi_systab) + return -1; + + p = (char *) (esi_systab + 1); + for (i = 0; i < esi_systab->entry_count; i++) { + if (*p == ESI_DESC_ENTRY_POINT) { + ia64_esi_desc_entry_point_t *esi = (void *)p; + if (!efi_guidcmp(guid, esi->guid)) { + ia64_sal_handler esi_proc; + struct pdesc pdesc; + + pdesc.addr = (void *)esi->esi_proc; + pdesc.gp = (void *)esi->gp; + + esi_proc = (ia64_sal_handler) &pdesc; + + esi_params[0] = func; + esi_params[1] = arg1; + esi_params[2] = arg2; + esi_params[3] = arg3; + esi_params[4] = arg4; + esi_params[5] = arg5; + esi_params[6] = arg6; + esi_params[7] = arg7; + ia64_save_scratch_fpregs(fr); + spin_lock_irqsave(&sal_lock, flags); + *isrvp = esi_call_phys(esi_proc, esi_params); + spin_unlock_irqrestore(&sal_lock, flags); + ia64_load_scratch_fpregs(fr); + return 0; + } + } + p += ESI_DESC_SIZE(*p); + } + return -1; +} +EXPORT_SYMBOL_GPL(ia64_esi_call_phys); + +static void __exit esi_exit (void) +{ +} + +module_init(esi_init); +module_exit(esi_exit); /* makes module removable... */ diff --git a/arch/ia64/kernel/esi_stub.S b/arch/ia64/kernel/esi_stub.S new file mode 100644 index 000000000000..6b3d6c1f99b6 --- /dev/null +++ b/arch/ia64/kernel/esi_stub.S @@ -0,0 +1,96 @@ +/* + * ESI call stub. + * + * Copyright (C) 2005 Hewlett-Packard Co + * Alex Williamson <alex.williamson@hp.com> + * + * Based on EFI call stub by David Mosberger. The stub is virtually + * identical to the one for EFI phys-mode calls, except that ESI + * calls may have up to 8 arguments, so they get passed to this routine + * through memory. + * + * This stub allows us to make ESI calls in physical mode with interrupts + * turned off. ESI calls may not support calling from virtual mode. + * + * Google for "Extensible SAL specification" for a document describing the + * ESI standard. + */ + +/* + * PSR settings as per SAL spec (Chapter 8 in the "IA-64 System + * Abstraction Layer Specification", revision 2.6e). Note that + * psr.dfl and psr.dfh MUST be cleared, despite what this manual says. + * Otherwise, SAL dies whenever it's trying to do an IA-32 BIOS call + * (the br.ia instruction fails unless psr.dfl and psr.dfh are + * cleared). Fortunately, SAL promises not to touch the floating + * point regs, so at least we don't have to save f2-f127. + */ +#define PSR_BITS_TO_CLEAR \ + (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT | \ + IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \ + IA64_PSR_DFL | IA64_PSR_DFH) + +#define PSR_BITS_TO_SET \ + (IA64_PSR_BN) + +#include <asm/processor.h> +#include <asm/asmmacro.h> + +/* + * Inputs: + * in0 = address of function descriptor of ESI routine to call + * in1 = address of array of ESI parameters + * + * Outputs: + * r8 = result returned by called function + */ +GLOBAL_ENTRY(esi_call_phys) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2) + alloc loc1=ar.pfs,2,7,8,0 + ld8 r2=[in0],8 // load ESI function's entry point + mov loc0=rp + .body + ;; + ld8 out0=[in1],8 // ESI params loaded from array + ;; // passing all as inputs doesn't work + ld8 out1=[in1],8 + ;; + ld8 out2=[in1],8 + ;; + ld8 out3=[in1],8 + ;; + ld8 out4=[in1],8 + ;; + ld8 out5=[in1],8 + ;; + ld8 out6=[in1],8 + ;; + ld8 out7=[in1] + mov loc2=gp // save global pointer + mov loc4=ar.rsc // save RSE configuration + mov ar.rsc=0 // put RSE in enforced lazy, LE mode + ;; + ld8 gp=[in0] // load ESI function's global pointer + movl r16=PSR_BITS_TO_CLEAR + mov loc3=psr // save processor status word + movl r17=PSR_BITS_TO_SET + ;; + or loc3=loc3,r17 + mov b6=r2 + ;; + andcm r16=loc3,r16 // get psr with IT, DT, and RT bits cleared + br.call.sptk.many rp=ia64_switch_mode_phys +.ret0: mov loc5=r19 // old ar.bsp + mov loc6=r20 // old sp + br.call.sptk.many rp=b6 // call the ESI function +.ret1: mov ar.rsc=0 // put RSE in enforced lazy, LE mode + mov r16=loc3 // save virtual mode psr + mov r19=loc5 // save virtual mode bspstore + mov r20=loc6 // save virtual mode sp + br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode +.ret2: mov ar.rsc=loc4 // restore RSE configuration + mov ar.pfs=loc1 + mov rp=loc0 + mov gp=loc2 + br.ret.sptk.many rp +END(esi_call_phys) diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c index 3ead20fb6f4b..879c1817bd1c 100644 --- a/arch/ia64/kernel/ia64_ksyms.c +++ b/arch/ia64/kernel/ia64_ksyms.c @@ -105,5 +105,9 @@ EXPORT_SYMBOL(ia64_spinlock_contention); # endif #endif +#if defined(CONFIG_IA64_ESI) || defined(CONFIG_IA64_ESI_MODULE) +extern void esi_call_phys (void); +EXPORT_SYMBOL_GPL(esi_call_phys); +#endif extern char ia64_ivt[]; EXPORT_SYMBOL(ia64_ivt); diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index 781960f80b6f..51217d63285e 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -90,7 +90,7 @@ static void __kprobes update_kprobe_inst_flag(uint template, uint slot, p->ainsn.target_br_reg = 0; /* Check for Break instruction - * Bits 37:40 Major opcode to be zero + * Bits 37:40 Major opcode to be zero * Bits 27:32 X6 to be zero * Bits 32:35 X3 to be zero */ @@ -104,19 +104,19 @@ static void __kprobes update_kprobe_inst_flag(uint template, uint slot, switch (major_opcode) { case INDIRECT_CALL_OPCODE: p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG; - p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7); - break; + p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7); + break; case IP_RELATIVE_PREDICT_OPCODE: case IP_RELATIVE_BRANCH_OPCODE: p->ainsn.inst_flag |= INST_FLAG_FIX_RELATIVE_IP_ADDR; - break; + break; case IP_RELATIVE_CALL_OPCODE: - p->ainsn.inst_flag |= INST_FLAG_FIX_RELATIVE_IP_ADDR; - p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG; - p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7); - break; + p->ainsn.inst_flag |= INST_FLAG_FIX_RELATIVE_IP_ADDR; + p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG; + p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7); + break; } - } else if (bundle_encoding[template][slot] == X) { + } else if (bundle_encoding[template][slot] == X) { switch (major_opcode) { case LONG_CALL_OPCODE: p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG; @@ -136,10 +136,8 @@ static void __kprobes update_kprobe_inst_flag(uint template, uint slot, static int __kprobes unsupported_inst(uint template, uint slot, uint major_opcode, unsigned long kprobe_inst, - struct kprobe *p) + unsigned long addr) { - unsigned long addr = (unsigned long)p->addr; - if (bundle_encoding[template][slot] == I) { switch (major_opcode) { case 0x0: //I_UNIT_MISC_OPCODE: @@ -217,7 +215,7 @@ static void __kprobes prepare_break_inst(uint template, uint slot, struct kprobe *p) { unsigned long break_inst = BREAK_INST; - bundle_t *bundle = &p->ainsn.insn.bundle; + bundle_t *bundle = &p->opcode.bundle; /* * Copy the original kprobe_inst qualifying predicate(qp) @@ -260,18 +258,18 @@ static void __kprobes get_kprobe_inst(bundle_t *bundle, uint slot, switch (slot) { case 0: - *major_opcode = (bundle->quad0.slot0 >> SLOT0_OPCODE_SHIFT); - *kprobe_inst = bundle->quad0.slot0; - break; + *major_opcode = (bundle->quad0.slot0 >> SLOT0_OPCODE_SHIFT); + *kprobe_inst = bundle->quad0.slot0; + break; case 1: - *major_opcode = (bundle->quad1.slot1_p1 >> SLOT1_p1_OPCODE_SHIFT); - kprobe_inst_p0 = bundle->quad0.slot1_p0; - kprobe_inst_p1 = bundle->quad1.slot1_p1; - *kprobe_inst = kprobe_inst_p0 | (kprobe_inst_p1 << (64-46)); + *major_opcode = (bundle->quad1.slot1_p1 >> SLOT1_p1_OPCODE_SHIFT); + kprobe_inst_p0 = bundle->quad0.slot1_p0; + kprobe_inst_p1 = bundle->quad1.slot1_p1; + *kprobe_inst = kprobe_inst_p0 | (kprobe_inst_p1 << (64-46)); break; case 2: - *major_opcode = (bundle->quad1.slot2 >> SLOT2_OPCODE_SHIFT); - *kprobe_inst = bundle->quad1.slot2; + *major_opcode = (bundle->quad1.slot2 >> SLOT2_OPCODE_SHIFT); + *kprobe_inst = bundle->quad1.slot2; break; } } @@ -292,11 +290,11 @@ static int __kprobes valid_kprobe_addr(int template, int slot, return -EINVAL; } - if (in_ivt_functions(addr)) { - printk(KERN_WARNING "Kprobes can't be inserted inside " + if (in_ivt_functions(addr)) { + printk(KERN_WARNING "Kprobes can't be inserted inside " "IVT functions at 0x%lx\n", addr); - return -EINVAL; - } + return -EINVAL; + } if (slot == 1 && bundle_encoding[template][1] != L) { printk(KERN_WARNING "Inserting kprobes on slot #1 " @@ -340,12 +338,13 @@ static void kretprobe_trampoline(void) int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { struct kretprobe_instance *ri = NULL; - struct hlist_head *head; + struct hlist_head *head, empty_rp; struct hlist_node *node, *tmp; unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address = ((struct fnptr *)kretprobe_trampoline)->ip; + INIT_HLIST_HEAD(&empty_rp); spin_lock_irqsave(&kretprobe_lock, flags); head = kretprobe_inst_table_head(current); @@ -371,7 +370,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) ri->rp->handler(ri, regs); orig_ret_address = (unsigned long)ri->ret_addr; - recycle_rp_inst(ri); + recycle_rp_inst(ri, &empty_rp); if (orig_ret_address != trampoline_address) /* @@ -389,6 +388,10 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) spin_unlock_irqrestore(&kretprobe_lock, flags); preempt_enable_no_resched(); + hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { + hlist_del(&ri->hlist); + kfree(ri); + } /* * By returning a non-zero value, we are telling * kprobe_handler() that we don't want the post_handler @@ -423,37 +426,34 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) unsigned long *kprobe_addr = (unsigned long *)(addr & ~0xFULL); unsigned long kprobe_inst=0; unsigned int slot = addr & 0xf, template, major_opcode = 0; - bundle_t *bundle = &p->ainsn.insn.bundle; + bundle_t *bundle; - memcpy(&p->opcode.bundle, kprobe_addr, sizeof(bundle_t)); - memcpy(&p->ainsn.insn.bundle, kprobe_addr, sizeof(bundle_t)); - - template = bundle->quad0.template; + bundle = &((kprobe_opcode_t *)kprobe_addr)->bundle; + template = bundle->quad0.template; if(valid_kprobe_addr(template, slot, addr)) return -EINVAL; /* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */ - if (slot == 1 && bundle_encoding[template][1] == L) - slot++; + if (slot == 1 && bundle_encoding[template][1] == L) + slot++; /* Get kprobe_inst and major_opcode from the bundle */ get_kprobe_inst(bundle, slot, &kprobe_inst, &major_opcode); - if (unsupported_inst(template, slot, major_opcode, kprobe_inst, p)) + if (unsupported_inst(template, slot, major_opcode, kprobe_inst, addr)) return -EINVAL; - prepare_break_inst(template, slot, major_opcode, kprobe_inst, p); - return 0; -} + p->ainsn.insn = get_insn_slot(); + if (!p->ainsn.insn) + return -ENOMEM; + memcpy(&p->opcode, kprobe_addr, sizeof(kprobe_opcode_t)); + memcpy(p->ainsn.insn, kprobe_addr, sizeof(kprobe_opcode_t)); -void __kprobes flush_insn_slot(struct kprobe *p) -{ - unsigned long arm_addr; + prepare_break_inst(template, slot, major_opcode, kprobe_inst, p); - arm_addr = ((unsigned long)&p->opcode.bundle) & ~0xFULL; - flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t)); + return 0; } void __kprobes arch_arm_kprobe(struct kprobe *p) @@ -461,9 +461,10 @@ void __kprobes arch_arm_kprobe(struct kprobe *p) unsigned long addr = (unsigned long)p->addr; unsigned long arm_addr = addr & ~0xFULL; - flush_insn_slot(p); - memcpy((char *)arm_addr, &p->ainsn.insn.bundle, sizeof(bundle_t)); - flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t)); + flush_icache_range((unsigned long)p->ainsn.insn, + (unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t)); + memcpy((char *)arm_addr, &p->opcode, sizeof(kprobe_opcode_t)); + flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t)); } void __kprobes arch_disarm_kprobe(struct kprobe *p) @@ -471,11 +472,18 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) unsigned long addr = (unsigned long)p->addr; unsigned long arm_addr = addr & ~0xFULL; - /* p->opcode contains the original unaltered bundle */ - memcpy((char *) arm_addr, (char *) &p->opcode.bundle, sizeof(bundle_t)); - flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t)); + /* p->ainsn.insn contains the original unaltered kprobe_opcode_t */ + memcpy((char *) arm_addr, (char *) p->ainsn.insn, + sizeof(kprobe_opcode_t)); + flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t)); } +void __kprobes arch_remove_kprobe(struct kprobe *p) +{ + mutex_lock(&kprobe_mutex); + free_insn_slot(p->ainsn.insn); + mutex_unlock(&kprobe_mutex); +} /* * We are resuming execution after a single step fault, so the pt_regs * structure reflects the register state after we executed the instruction @@ -486,21 +494,22 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) */ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) { - unsigned long bundle_addr = ((unsigned long) (&p->opcode.bundle)) & ~0xFULL; - unsigned long resume_addr = (unsigned long)p->addr & ~0xFULL; - unsigned long template; - int slot = ((unsigned long)p->addr & 0xf); + unsigned long bundle_addr = (unsigned long) (&p->ainsn.insn->bundle); + unsigned long resume_addr = (unsigned long)p->addr & ~0xFULL; + unsigned long template; + int slot = ((unsigned long)p->addr & 0xf); - template = p->opcode.bundle.quad0.template; + template = p->ainsn.insn->bundle.quad0.template; - if (slot == 1 && bundle_encoding[template][1] == L) - slot = 2; + if (slot == 1 && bundle_encoding[template][1] == L) + slot = 2; if (p->ainsn.inst_flag) { if (p->ainsn.inst_flag & INST_FLAG_FIX_RELATIVE_IP_ADDR) { /* Fix relative IP address */ - regs->cr_iip = (regs->cr_iip - bundle_addr) + resume_addr; + regs->cr_iip = (regs->cr_iip - bundle_addr) + + resume_addr; } if (p->ainsn.inst_flag & INST_FLAG_FIX_BRANCH_REG) { @@ -537,23 +546,23 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) } if (slot == 2) { - if (regs->cr_iip == bundle_addr + 0x10) { - regs->cr_iip = resume_addr + 0x10; - } - } else { - if (regs->cr_iip == bundle_addr) { - regs->cr_iip = resume_addr; - } + if (regs->cr_iip == bundle_addr + 0x10) { + regs->cr_iip = resume_addr + 0x10; + } + } else { + if (regs->cr_iip == bundle_addr) { + regs->cr_iip = resume_addr; + } } turn_ss_off: - /* Turn off Single Step bit */ - ia64_psr(regs)->ss = 0; + /* Turn off Single Step bit */ + ia64_psr(regs)->ss = 0; } static void __kprobes prepare_ss(struct kprobe *p, struct pt_regs *regs) { - unsigned long bundle_addr = (unsigned long) &p->opcode.bundle; + unsigned long bundle_addr = (unsigned long) &p->ainsn.insn->bundle; unsigned long slot = (unsigned long)p->addr & 0xf; /* single step inline if break instruction */ @@ -584,7 +593,7 @@ static int __kprobes is_ia64_break_inst(struct pt_regs *regs) /* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */ if (slot == 1 && bundle_encoding[template][1] == L) - slot++; + slot++; /* Get Kprobe probe instruction at given slot*/ get_kprobe_inst(&bundle, slot, &kprobe_inst, &major_opcode); @@ -624,7 +633,7 @@ static int __kprobes pre_kprobes_handler(struct die_args *args) if (p) { if ((kcb->kprobe_status == KPROBE_HIT_SS) && (p->ainsn.inst_flag == INST_FLAG_BREAK_INST)) { - ia64_psr(regs)->ss = 0; + ia64_psr(regs)->ss = 0; goto no_kprobe; } /* We have reentered the pre_kprobe_handler(), since @@ -768,6 +777,12 @@ static int __kprobes kprobes_fault_handler(struct pt_regs *regs, int trapnr) */ if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) return 1; + /* + * In case the user-specified fault handler returned + * zero, try to fix up. + */ + if (ia64_done_with_exception(regs)) + return 1; /* * Let ia64_do_page_fault() fix it. @@ -878,7 +893,7 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) * fix the return address to our jprobe_inst_return() function * in the jprobes.S file */ - regs->b0 = ((struct fnptr *)(jprobe_inst_return))->ip; + regs->b0 = ((struct fnptr *)(jprobe_inst_return))->ip; return 1; } diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 2fbe4536fe18..663230183254 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -54,6 +54,9 @@ * * 2005-10-07 Keith Owens <kaos@sgi.com> * Add notify_die() hooks. + * + * 2006-09-15 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> + * Add printing support for MCA/INIT. */ #include <linux/types.h> #include <linux/init.h> @@ -136,11 +139,175 @@ extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe); static int mca_init __initdata; +/* + * limited & delayed printing support for MCA/INIT handler + */ + +#define mprintk(fmt...) ia64_mca_printk(fmt) + +#define MLOGBUF_SIZE (512+256*NR_CPUS) +#define MLOGBUF_MSGMAX 256 +static char mlogbuf[MLOGBUF_SIZE]; +static DEFINE_SPINLOCK(mlogbuf_wlock); /* mca context only */ +static DEFINE_SPINLOCK(mlogbuf_rlock); /* normal context only */ +static unsigned long mlogbuf_start; +static unsigned long mlogbuf_end; +static unsigned int mlogbuf_finished = 0; +static unsigned long mlogbuf_timestamp = 0; + +static int loglevel_save = -1; +#define BREAK_LOGLEVEL(__console_loglevel) \ + oops_in_progress = 1; \ + if (loglevel_save < 0) \ + loglevel_save = __console_loglevel; \ + __console_loglevel = 15; + +#define RESTORE_LOGLEVEL(__console_loglevel) \ + if (loglevel_save >= 0) { \ + __console_loglevel = loglevel_save; \ + loglevel_save = -1; \ + } \ + mlogbuf_finished = 0; \ + oops_in_progress = 0; + +/* + * Push messages into buffer, print them later if not urgent. + */ +void ia64_mca_printk(const char *fmt, ...) +{ + va_list args; + int printed_len; + char temp_buf[MLOGBUF_MSGMAX]; + char *p; + + va_start(args, fmt); + printed_len = vscnprintf(temp_buf, sizeof(temp_buf), fmt, args); + va_end(args); + + /* Copy the output into mlogbuf */ + if (oops_in_progress) { + /* mlogbuf was abandoned, use printk directly instead. */ + printk(temp_buf); + } else { + spin_lock(&mlogbuf_wlock); + for (p = temp_buf; *p; p++) { + unsigned long next = (mlogbuf_end + 1) % MLOGBUF_SIZE; + if (next != mlogbuf_start) { + mlogbuf[mlogbuf_end] = *p; + mlogbuf_end = next; + } else { + /* buffer full */ + break; + } + } + mlogbuf[mlogbuf_end] = '\0'; + spin_unlock(&mlogbuf_wlock); + } +} +EXPORT_SYMBOL(ia64_mca_printk); + +/* + * Print buffered messages. + * NOTE: call this after returning normal context. (ex. from salinfod) + */ +void ia64_mlogbuf_dump(void) +{ + char temp_buf[MLOGBUF_MSGMAX]; + char *p; + unsigned long index; + unsigned long flags; + unsigned int printed_len; + + /* Get output from mlogbuf */ + while (mlogbuf_start != mlogbuf_end) { + temp_buf[0] = '\0'; + p = temp_buf; + printed_len = 0; + + spin_lock_irqsave(&mlogbuf_rlock, flags); + + index = mlogbuf_start; + while (index != mlogbuf_end) { + *p = mlogbuf[index]; + index = (index + 1) % MLOGBUF_SIZE; + if (!*p) + break; + p++; + if (++printed_len >= MLOGBUF_MSGMAX - 1) + break; + } + *p = '\0'; + if (temp_buf[0]) + printk(temp_buf); + mlogbuf_start = index; + + mlogbuf_timestamp = 0; + spin_unlock_irqrestore(&mlogbuf_rlock, flags); + } +} +EXPORT_SYMBOL(ia64_mlogbuf_dump); + +/* + * Call this if system is going to down or if immediate flushing messages to + * console is required. (ex. recovery was failed, crash dump is going to be + * invoked, long-wait rendezvous etc.) + * NOTE: this should be called from monarch. + */ +static void ia64_mlogbuf_finish(int wait) +{ + BREAK_LOGLEVEL(console_loglevel); + + spin_lock_init(&mlogbuf_rlock); + ia64_mlogbuf_dump(); + printk(KERN_EMERG "mlogbuf_finish: printing switched to urgent mode, " + "MCA/INIT might be dodgy or fail.\n"); + + if (!wait) + return; + + /* wait for console */ + printk("Delaying for 5 seconds...\n"); + udelay(5*1000000); + + mlogbuf_finished = 1; +} +EXPORT_SYMBOL(ia64_mlogbuf_finish); + +/* + * Print buffered messages from INIT context. + */ +static void ia64_mlogbuf_dump_from_init(void) +{ + if (mlogbuf_finished) + return; + + if (mlogbuf_timestamp && (mlogbuf_timestamp + 30*HZ > jiffies)) { + printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT " + " and the system seems to be messed up.\n"); + ia64_mlogbuf_finish(0); + return; + } + + if (!spin_trylock(&mlogbuf_rlock)) { + printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT. " + "Generated messages other than stack dump will be " + "buffered to mlogbuf and will be printed later.\n"); + printk(KERN_ERR "INIT: If messages would not printed after " + "this INIT, wait 30sec and assert INIT again.\n"); + if (!mlogbuf_timestamp) + mlogbuf_timestamp = jiffies; + return; + } + spin_unlock(&mlogbuf_rlock); + ia64_mlogbuf_dump(); +} static void inline ia64_mca_spin(const char *func) { - printk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func); + if (monarch_cpu == smp_processor_id()) + ia64_mlogbuf_finish(0); + mprintk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func); while (1) cpu_relax(); } @@ -221,7 +388,7 @@ ia64_log_get(int sal_info_type, u8 **buffer, int irq_safe) { sal_log_record_header_t *log_buffer; u64 total_len = 0; - int s; + unsigned long s; IA64_LOG_LOCK(sal_info_type); @@ -344,9 +511,6 @@ ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs) /* SAL spec states this should run w/ interrupts enabled */ local_irq_enable(); - /* Get the CPE error record and log it */ - ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CPE); - spin_lock(&cpe_history_lock); if (!cpe_poll_enabled && cpe_vector >= 0) { @@ -375,7 +539,7 @@ ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs) mod_timer(&cpe_poll_timer, jiffies + MIN_CPE_POLL_INTERVAL); /* lock already released, get out now */ - return IRQ_HANDLED; + goto out; } else { cpe_history[index++] = now; if (index == CPE_HISTORY_LENGTH) @@ -383,6 +547,10 @@ ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs) } } spin_unlock(&cpe_history_lock); +out: + /* Get the CPE error record and log it */ + ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CPE); + return IRQ_HANDLED; } @@ -988,18 +1156,22 @@ ia64_wait_for_slaves(int monarch, const char *type) } if (!missing) goto all_in; - printk(KERN_INFO "OS %s slave did not rendezvous on cpu", type); + /* + * Maybe slave(s) dead. Print buffered messages immediately. + */ + ia64_mlogbuf_finish(0); + mprintk(KERN_INFO "OS %s slave did not rendezvous on cpu", type); for_each_online_cpu(c) { if (c == monarch) continue; if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) - printk(" %d", c); + mprintk(" %d", c); } - printk("\n"); + mprintk("\n"); return; all_in: - printk(KERN_INFO "All OS %s slaves have reached rendezvous\n", type); + mprintk(KERN_INFO "All OS %s slaves have reached rendezvous\n", type); return; } @@ -1027,10 +1199,8 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, struct ia64_mca_notify_die nd = { .sos = sos, .monarch_cpu = &monarch_cpu }; - oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ - console_loglevel = 15; /* make sure printks make it to console */ - printk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d monarch=%ld\n", - sos->proc_state_param, cpu, sos->monarch); + mprintk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d " + "monarch=%ld\n", sos->proc_state_param, cpu, sos->monarch); previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); monarch_cpu = cpu; @@ -1066,6 +1236,9 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, rh->severity = sal_log_severity_corrected; ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA); sos->os_status = IA64_MCA_CORRECTED; + } else { + /* Dump buffered message to console */ + ia64_mlogbuf_finish(1); } if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover) == NOTIFY_STOP) @@ -1106,9 +1279,6 @@ ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs) /* SAL spec states this should run w/ interrupts enabled */ local_irq_enable(); - /* Get the CMC error record and log it */ - ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CMC); - spin_lock(&cmc_history_lock); if (!cmc_polling_enabled) { int i, count = 1; /* we know 1 happened now */ @@ -1141,7 +1311,7 @@ ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs) mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL); /* lock already released, get out now */ - return IRQ_HANDLED; + goto out; } else { cmc_history[index++] = now; if (index == CMC_HISTORY_LENGTH) @@ -1149,6 +1319,10 @@ ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs) } } spin_unlock(&cmc_history_lock); +out: + /* Get the CMC error record and log it */ + ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CMC); + return IRQ_HANDLED; } @@ -1305,6 +1479,15 @@ default_monarch_init_process(struct notifier_block *self, unsigned long val, voi struct task_struct *g, *t; if (val != DIE_INIT_MONARCH_PROCESS) return NOTIFY_DONE; + + /* + * FIXME: mlogbuf will brim over with INIT stack dumps. + * To enable show_stack from INIT, we use oops_in_progress which should + * be used in real oops. This would cause something wrong after INIT. + */ + BREAK_LOGLEVEL(console_loglevel); + ia64_mlogbuf_dump_from_init(); + printk(KERN_ERR "Processes interrupted by INIT -"); for_each_online_cpu(c) { struct ia64_sal_os_state *s; @@ -1326,6 +1509,8 @@ default_monarch_init_process(struct notifier_block *self, unsigned long val, voi } while_each_thread (g, t); read_unlock(&tasklist_lock); } + /* FIXME: This will not restore zapped printk locks. */ + RESTORE_LOGLEVEL(console_loglevel); return NOTIFY_DONE; } @@ -1357,12 +1542,9 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, struct ia64_mca_notify_die nd = { .sos = sos, .monarch_cpu = &monarch_cpu }; - oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ - console_loglevel = 15; /* make sure printks make it to console */ - (void) notify_die(DIE_INIT_ENTER, "INIT", regs, (long)&nd, 0, 0); - printk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n", + mprintk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n", sos->proc_state_param, cpu, sos->monarch); salinfo_log_wakeup(SAL_INFO_TYPE_INIT, NULL, 0, 0); @@ -1375,7 +1557,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, * fix their proms and get their customers updated. */ if (!sos->monarch && atomic_add_return(1, &slaves) == num_online_cpus()) { - printk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n", + mprintk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n", __FUNCTION__, cpu); atomic_dec(&slaves); sos->monarch = 1; @@ -1387,7 +1569,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, * fix their proms and get their customers updated. */ if (sos->monarch && atomic_add_return(1, &monarchs) > 1) { - printk(KERN_WARNING "%s: Demoting cpu %d to slave.\n", + mprintk(KERN_WARNING "%s: Demoting cpu %d to slave.\n", __FUNCTION__, cpu); atomic_dec(&monarchs); sos->monarch = 0; @@ -1408,7 +1590,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, if (notify_die(DIE_INIT_SLAVE_LEAVE, "INIT", regs, (long)&nd, 0, 0) == NOTIFY_STOP) ia64_mca_spin(__FUNCTION__); - printk("Slave on cpu %d returning to normal service.\n", cpu); + mprintk("Slave on cpu %d returning to normal service.\n", cpu); set_curr_task(cpu, previous_current); ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; atomic_dec(&slaves); @@ -1426,7 +1608,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, * same serial line, the user will need some time to switch out of the BMC before * the dump begins. */ - printk("Delaying for 5 seconds...\n"); + mprintk("Delaying for 5 seconds...\n"); udelay(5*1000000); ia64_wait_for_slaves(cpu, "INIT"); /* If nobody intercepts DIE_INIT_MONARCH_PROCESS then we drop through @@ -1439,7 +1621,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, if (notify_die(DIE_INIT_MONARCH_LEAVE, "INIT", regs, (long)&nd, 0, 0) == NOTIFY_STOP) ia64_mca_spin(__FUNCTION__); - printk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu); + mprintk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu); atomic_dec(&monarchs); set_curr_task(cpu, previous_current); monarch_cpu = -1; diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S index 96047491d1b9..c6b607c00dee 100644 --- a/arch/ia64/kernel/mca_asm.S +++ b/arch/ia64/kernel/mca_asm.S @@ -1025,18 +1025,13 @@ ia64_old_stack: ia64_set_kernel_registers: add temp3=MCA_SP_OFFSET, r3 - add temp4=MCA_SOS_OFFSET+SOS(OS_GP), r3 mov b0=r2 // save return address GET_IA64_MCA_DATA(temp1) ;; - add temp4=temp4, temp1 // &struct ia64_sal_os_state.os_gp add r12=temp1, temp3 // kernel stack pointer on MCA/INIT stack add r13=temp1, r3 // set current to start of MCA/INIT stack add r20=temp1, r3 // physical start of MCA/INIT stack ;; - ld8 r1=[temp4] // OS GP from SAL OS state - ;; - DATA_PA_TO_VA(r1,temp1) DATA_PA_TO_VA(r12,temp2) DATA_PA_TO_VA(r13,temp3) ;; @@ -1067,6 +1062,10 @@ ia64_set_kernel_registers: mov cr.itir=r18 mov cr.ifa=r13 mov r20=IA64_TR_CURRENT_STACK + + movl r17=FPSR_DEFAULT + ;; + mov.m ar.fpsr=r17 // set ar.fpsr to kernel default value ;; itr.d dtr[r20]=r21 ;; diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c index 8db6e0cedadc..a45009d2bc90 100644 --- a/arch/ia64/kernel/mca_drv.c +++ b/arch/ia64/kernel/mca_drv.c @@ -79,14 +79,30 @@ static int fatal_mca(const char *fmt, ...) { va_list args; + char buf[256]; va_start(args, fmt); - vprintk(fmt, args); + vsnprintf(buf, sizeof(buf), fmt, args); va_end(args); + ia64_mca_printk(KERN_ALERT "MCA: %s\n", buf); return MCA_NOT_RECOVERED; } +static int +mca_recovered(const char *fmt, ...) +{ + va_list args; + char buf[256]; + + va_start(args, fmt); + vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + ia64_mca_printk(KERN_INFO "MCA: %s\n", buf); + + return MCA_RECOVERED; +} + /** * mca_page_isolate - isolate a poisoned page in order not to use it later * @paddr: poisoned memory location @@ -140,6 +156,7 @@ mca_page_isolate(unsigned long paddr) void mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr) { + ia64_mlogbuf_dump(); printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, " "iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n", raw_smp_processor_id(), current->pid, current->uid, @@ -440,7 +457,7 @@ recover_from_read_error(slidx_table_t *slidx, /* Is target address valid? */ if (!pbci->tv) - return fatal_mca(KERN_ALERT "MCA: target address not valid\n"); + return fatal_mca("target address not valid"); /* * cpu read or memory-mapped io read @@ -458,7 +475,7 @@ recover_from_read_error(slidx_table_t *slidx, /* Is minstate valid? */ if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate)) - return fatal_mca(KERN_ALERT "MCA: minstate not valid\n"); + return fatal_mca("minstate not valid"); psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr); psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr); @@ -492,13 +509,14 @@ recover_from_read_error(slidx_table_t *slidx, psr2->bn = 1; psr2->i = 0; - return MCA_RECOVERED; + return mca_recovered("user memory corruption. " + "kill affected process - recovered."); } } - return fatal_mca(KERN_ALERT "MCA: kernel context not recovered," - " iip 0x%lx\n", pmsa->pmsa_iip); + return fatal_mca("kernel context not recovered, iip 0x%lx\n", + pmsa->pmsa_iip); } /** @@ -584,13 +602,13 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, * The machine check is corrected. */ if (psp->cm == 1) - return MCA_RECOVERED; + return mca_recovered("machine check is already corrected."); /* * The error was not contained. Software must be reset. */ if (psp->us || psp->ci == 0) - return fatal_mca(KERN_ALERT "MCA: error not contained\n"); + return fatal_mca("error not contained"); /* * The cache check and bus check bits have four possible states @@ -601,22 +619,22 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, * 1 1 Memory error, attempt recovery */ if (psp->bc == 0 || pbci == NULL) - return fatal_mca(KERN_ALERT "MCA: No bus check\n"); + return fatal_mca("No bus check"); /* * Sorry, we cannot handle so many. */ if (peidx_bus_check_num(peidx) > 1) - return fatal_mca(KERN_ALERT "MCA: Too many bus checks\n"); + return fatal_mca("Too many bus checks"); /* * Well, here is only one bus error. */ if (pbci->ib) - return fatal_mca(KERN_ALERT "MCA: Internal Bus error\n"); + return fatal_mca("Internal Bus error"); if (pbci->cc) - return fatal_mca(KERN_ALERT "MCA: Cache-cache error\n"); + return fatal_mca("Cache-cache error"); if (pbci->eb && pbci->bsi > 0) - return fatal_mca(KERN_ALERT "MCA: External bus check fatal status\n"); + return fatal_mca("External bus check fatal status"); /* * This is a local MCA and estimated as recoverble external bus error. @@ -628,7 +646,7 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, /* * On account of strange SAL error record, we cannot recover. */ - return fatal_mca(KERN_ALERT "MCA: Strange SAL record\n"); + return fatal_mca("Strange SAL record"); } /** @@ -657,10 +675,10 @@ mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos) /* Now, OS can recover when there is one processor error section */ if (n_proc_err > 1) - return fatal_mca(KERN_ALERT "MCA: Too Many Errors\n"); + return fatal_mca("Too Many Errors"); else if (n_proc_err == 0) - /* Weird SAL record ... We need not to recover */ - return fatal_mca(KERN_ALERT "MCA: Weird SAL record\n"); + /* Weird SAL record ... We can't do anything */ + return fatal_mca("Weird SAL record"); /* Make index of processor error section */ mca_make_peidx((sal_log_processor_info_t*) @@ -671,7 +689,7 @@ mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos) /* Check whether MCA is global or not */ if (is_mca_global(&peidx, &pbci, sos)) - return fatal_mca(KERN_ALERT "MCA: global MCA\n"); + return fatal_mca("global MCA"); /* Try to recover a processor error */ return recover_from_processor_error(platform_err, &slidx, &peidx, diff --git a/arch/ia64/kernel/mca_drv.h b/arch/ia64/kernel/mca_drv.h index 31a2e52bb16f..c85e943ba5fd 100644 --- a/arch/ia64/kernel/mca_drv.h +++ b/arch/ia64/kernel/mca_drv.h @@ -118,3 +118,7 @@ struct mca_table_entry { extern const struct mca_table_entry *search_mca_tables (unsigned long addr); extern int mca_recover_range(unsigned long); +extern void ia64_mca_printk(const char * fmt, ...) + __attribute__ ((format (printf, 1, 2))); +extern void ia64_mlogbuf_dump(void); + diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c index 1cc360c83e7a..a78b45f5fe2f 100644 --- a/arch/ia64/kernel/numa.c +++ b/arch/ia64/kernel/numa.c @@ -28,6 +28,37 @@ u16 cpu_to_node_map[NR_CPUS] __cacheline_aligned; EXPORT_SYMBOL(cpu_to_node_map); cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned; +EXPORT_SYMBOL(node_to_cpu_mask); + +void __cpuinit map_cpu_to_node(int cpu, int nid) +{ + int oldnid; + if (nid < 0) { /* just initialize by zero */ + cpu_to_node_map[cpu] = 0; + return; + } + /* sanity check first */ + oldnid = cpu_to_node_map[cpu]; + if (cpu_isset(cpu, node_to_cpu_mask[oldnid])) { + return; /* nothing to do */ + } + /* we don't have cpu-driven node hot add yet... + In usual case, node is created from SRAT at boot time. */ + if (!node_online(nid)) + nid = first_online_node; + cpu_to_node_map[cpu] = nid; + cpu_set(cpu, node_to_cpu_mask[nid]); + return; +} + +void __cpuinit unmap_cpu_from_node(int cpu, int nid) +{ + WARN_ON(!cpu_isset(cpu, node_to_cpu_mask[nid])); + WARN_ON(cpu_to_node_map[cpu] != nid); + cpu_to_node_map[cpu] = 0; + cpu_clear(cpu, node_to_cpu_mask[nid]); +} + /** * build_cpu_to_node_map - setup cpu to node and node to cpumask arrays @@ -49,8 +80,6 @@ void __init build_cpu_to_node_map(void) node = node_cpuid[i].nid; break; } - cpu_to_node_map[cpu] = (node >= 0) ? node : 0; - if (node >= 0) - cpu_set(cpu, node_to_cpu_mask[node]); + map_cpu_to_node(cpu, node); } } diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 84a7e52f56f6..281004ff7b00 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -34,6 +34,7 @@ #include <linux/file.h> #include <linux/poll.h> #include <linux/vfs.h> +#include <linux/smp.h> #include <linux/pagemap.h> #include <linux/mount.h> #include <linux/bitops.h> @@ -62,6 +63,9 @@ #define PFM_INVALID_ACTIVATION (~0UL) +#define PFM_NUM_PMC_REGS 64 /* PMC save area for ctxsw */ +#define PFM_NUM_PMD_REGS 64 /* PMD save area for ctxsw */ + /* * depth of message queue */ @@ -296,14 +300,17 @@ typedef struct pfm_context { unsigned long ctx_reload_pmcs[4]; /* bitmask of force reload PMC on ctxsw in */ unsigned long ctx_used_monitors[4]; /* bitmask of monitor PMC being used */ - unsigned long ctx_pmcs[IA64_NUM_PMC_REGS]; /* saved copies of PMC values */ + unsigned long ctx_pmcs[PFM_NUM_PMC_REGS]; /* saved copies of PMC values */ unsigned int ctx_used_ibrs[1]; /* bitmask of used IBR (speedup ctxsw in) */ unsigned int ctx_used_dbrs[1]; /* bitmask of used DBR (speedup ctxsw in) */ unsigned long ctx_dbrs[IA64_NUM_DBG_REGS]; /* DBR values (cache) when not loaded */ unsigned long ctx_ibrs[IA64_NUM_DBG_REGS]; /* IBR values (cache) when not loaded */ - pfm_counter_t ctx_pmds[IA64_NUM_PMD_REGS]; /* software state for PMDS */ + pfm_counter_t ctx_pmds[PFM_NUM_PMD_REGS]; /* software state for PMDS */ + + unsigned long th_pmcs[PFM_NUM_PMC_REGS]; /* PMC thread save state */ + unsigned long th_pmds[PFM_NUM_PMD_REGS]; /* PMD thread save state */ u64 ctx_saved_psr_up; /* only contains psr.up value */ @@ -867,7 +874,6 @@ static void pfm_mask_monitoring(struct task_struct *task) { pfm_context_t *ctx = PFM_GET_CTX(task); - struct thread_struct *th = &task->thread; unsigned long mask, val, ovfl_mask; int i; @@ -888,7 +894,7 @@ pfm_mask_monitoring(struct task_struct *task) * So in both cases, the live register contains the owner's * state. We can ONLY touch the PMU registers and NOT the PSR. * - * As a consequence to this call, the thread->pmds[] array + * As a consequence to this call, the ctx->th_pmds[] array * contains stale information which must be ignored * when context is reloaded AND monitoring is active (see * pfm_restart). @@ -923,9 +929,9 @@ pfm_mask_monitoring(struct task_struct *task) mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER; for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) { if ((mask & 0x1) == 0UL) continue; - ia64_set_pmc(i, th->pmcs[i] & ~0xfUL); - th->pmcs[i] &= ~0xfUL; - DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, th->pmcs[i])); + ia64_set_pmc(i, ctx->th_pmcs[i] & ~0xfUL); + ctx->th_pmcs[i] &= ~0xfUL; + DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, ctx->th_pmcs[i])); } /* * make all of this visible @@ -942,7 +948,6 @@ static void pfm_restore_monitoring(struct task_struct *task) { pfm_context_t *ctx = PFM_GET_CTX(task); - struct thread_struct *th = &task->thread; unsigned long mask, ovfl_mask; unsigned long psr, val; int i, is_system; @@ -1008,9 +1013,9 @@ pfm_restore_monitoring(struct task_struct *task) mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER; for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) { if ((mask & 0x1) == 0UL) continue; - th->pmcs[i] = ctx->ctx_pmcs[i]; - ia64_set_pmc(i, th->pmcs[i]); - DPRINT(("[%d] pmc[%d]=0x%lx\n", task->pid, i, th->pmcs[i])); + ctx->th_pmcs[i] = ctx->ctx_pmcs[i]; + ia64_set_pmc(i, ctx->th_pmcs[i]); + DPRINT(("[%d] pmc[%d]=0x%lx\n", task->pid, i, ctx->th_pmcs[i])); } ia64_srlz_d(); @@ -1069,7 +1074,6 @@ pfm_restore_pmds(unsigned long *pmds, unsigned long mask) static inline void pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx) { - struct thread_struct *thread = &task->thread; unsigned long ovfl_val = pmu_conf->ovfl_val; unsigned long mask = ctx->ctx_all_pmds[0]; unsigned long val; @@ -1091,11 +1095,11 @@ pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx) ctx->ctx_pmds[i].val = val & ~ovfl_val; val &= ovfl_val; } - thread->pmds[i] = val; + ctx->th_pmds[i] = val; DPRINT(("pmd[%d]=0x%lx soft_val=0x%lx\n", i, - thread->pmds[i], + ctx->th_pmds[i], ctx->ctx_pmds[i].val)); } } @@ -1106,7 +1110,6 @@ pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx) static inline void pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx) { - struct thread_struct *thread = &task->thread; unsigned long mask = ctx->ctx_all_pmcs[0]; int i; @@ -1114,8 +1117,8 @@ pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx) for (i=0; mask; i++, mask>>=1) { /* masking 0 with ovfl_val yields 0 */ - thread->pmcs[i] = ctx->ctx_pmcs[i]; - DPRINT(("pmc[%d]=0x%lx\n", i, thread->pmcs[i])); + ctx->th_pmcs[i] = ctx->ctx_pmcs[i]; + DPRINT(("pmc[%d]=0x%lx\n", i, ctx->th_pmcs[i])); } } @@ -2859,7 +2862,6 @@ pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset) static int pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) { - struct thread_struct *thread = NULL; struct task_struct *task; pfarg_reg_t *req = (pfarg_reg_t *)arg; unsigned long value, pmc_pm; @@ -2880,7 +2882,6 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) if (state == PFM_CTX_ZOMBIE) return -EINVAL; if (is_loaded) { - thread = &task->thread; /* * In system wide and when the context is loaded, access can only happen * when the caller is running on the CPU being monitored by the session. @@ -3035,7 +3036,7 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) * * The value in ctx_pmcs[] can only be changed in pfm_write_pmcs(). * - * The value in thread->pmcs[] may be modified on overflow, i.e., when + * The value in th_pmcs[] may be modified on overflow, i.e., when * monitoring needs to be stopped. */ if (is_monitor) CTX_USED_MONITOR(ctx, 1UL << cnum); @@ -3049,7 +3050,7 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) /* * write thread state */ - if (is_system == 0) thread->pmcs[cnum] = value; + if (is_system == 0) ctx->th_pmcs[cnum] = value; /* * write hardware register if we can @@ -3101,7 +3102,6 @@ error: static int pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) { - struct thread_struct *thread = NULL; struct task_struct *task; pfarg_reg_t *req = (pfarg_reg_t *)arg; unsigned long value, hw_value, ovfl_mask; @@ -3125,7 +3125,6 @@ pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) * the owner of the local PMU. */ if (likely(is_loaded)) { - thread = &task->thread; /* * In system wide and when the context is loaded, access can only happen * when the caller is running on the CPU being monitored by the session. @@ -3233,7 +3232,7 @@ pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) /* * write thread state */ - if (is_system == 0) thread->pmds[cnum] = hw_value; + if (is_system == 0) ctx->th_pmds[cnum] = hw_value; /* * write hardware register if we can @@ -3299,7 +3298,6 @@ abort_mission: static int pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) { - struct thread_struct *thread = NULL; struct task_struct *task; unsigned long val = 0UL, lval, ovfl_mask, sval; pfarg_reg_t *req = (pfarg_reg_t *)arg; @@ -3323,7 +3321,6 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) if (state == PFM_CTX_ZOMBIE) return -EINVAL; if (likely(is_loaded)) { - thread = &task->thread; /* * In system wide and when the context is loaded, access can only happen * when the caller is running on the CPU being monitored by the session. @@ -3385,7 +3382,7 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) * if context is zombie, then task does not exist anymore. * In this case, we use the full value saved in the context (pfm_flush_regs()). */ - val = is_loaded ? thread->pmds[cnum] : 0UL; + val = is_loaded ? ctx->th_pmds[cnum] : 0UL; } rd_func = pmu_conf->pmd_desc[cnum].read_check; @@ -4354,8 +4351,8 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) pfm_copy_pmds(task, ctx); pfm_copy_pmcs(task, ctx); - pmcs_source = thread->pmcs; - pmds_source = thread->pmds; + pmcs_source = ctx->th_pmcs; + pmds_source = ctx->th_pmds; /* * always the case for system-wide @@ -5864,14 +5861,12 @@ void pfm_save_regs(struct task_struct *task) { pfm_context_t *ctx; - struct thread_struct *t; unsigned long flags; u64 psr; ctx = PFM_GET_CTX(task); if (ctx == NULL) return; - t = &task->thread; /* * we always come here with interrupts ALREADY disabled by @@ -5929,19 +5924,19 @@ pfm_save_regs(struct task_struct *task) * guarantee we will be schedule at that same * CPU again. */ - pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]); + pfm_save_pmds(ctx->th_pmds, ctx->ctx_used_pmds[0]); /* * save pmc0 ia64_srlz_d() done in pfm_save_pmds() * we will need it on the restore path to check * for pending overflow. */ - t->pmcs[0] = ia64_get_pmc(0); + ctx->th_pmcs[0] = ia64_get_pmc(0); /* * unfreeze PMU if had pending overflows */ - if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); + if (ctx->th_pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); /* * finally, allow context access. @@ -5986,7 +5981,6 @@ static void pfm_lazy_save_regs (struct task_struct *task) { pfm_context_t *ctx; - struct thread_struct *t; unsigned long flags; { u64 psr = pfm_get_psr(); @@ -5994,7 +5988,6 @@ pfm_lazy_save_regs (struct task_struct *task) } ctx = PFM_GET_CTX(task); - t = &task->thread; /* * we need to mask PMU overflow here to @@ -6019,19 +6012,19 @@ pfm_lazy_save_regs (struct task_struct *task) /* * save all the pmds we use */ - pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]); + pfm_save_pmds(ctx->th_pmds, ctx->ctx_used_pmds[0]); /* * save pmc0 ia64_srlz_d() done in pfm_save_pmds() * it is needed to check for pended overflow * on the restore path */ - t->pmcs[0] = ia64_get_pmc(0); + ctx->th_pmcs[0] = ia64_get_pmc(0); /* * unfreeze PMU if had pending overflows */ - if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); + if (ctx->th_pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); /* * now get can unmask PMU interrupts, they will @@ -6050,7 +6043,6 @@ void pfm_load_regs (struct task_struct *task) { pfm_context_t *ctx; - struct thread_struct *t; unsigned long pmc_mask = 0UL, pmd_mask = 0UL; unsigned long flags; u64 psr, psr_up; @@ -6061,11 +6053,10 @@ pfm_load_regs (struct task_struct *task) BUG_ON(GET_PMU_OWNER()); - t = &task->thread; /* * possible on unload */ - if (unlikely((t->flags & IA64_THREAD_PM_VALID) == 0)) return; + if (unlikely((task->thread.flags & IA64_THREAD_PM_VALID) == 0)) return; /* * we always come here with interrupts ALREADY disabled by @@ -6147,21 +6138,21 @@ pfm_load_regs (struct task_struct *task) * * XXX: optimize here */ - if (pmd_mask) pfm_restore_pmds(t->pmds, pmd_mask); - if (pmc_mask) pfm_restore_pmcs(t->pmcs, pmc_mask); + if (pmd_mask) pfm_restore_pmds(ctx->th_pmds, pmd_mask); + if (pmc_mask) pfm_restore_pmcs(ctx->th_pmcs, pmc_mask); /* * check for pending overflow at the time the state * was saved. */ - if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) { + if (unlikely(PMC0_HAS_OVFL(ctx->th_pmcs[0]))) { /* * reload pmc0 with the overflow information * On McKinley PMU, this will trigger a PMU interrupt */ - ia64_set_pmc(0, t->pmcs[0]); + ia64_set_pmc(0, ctx->th_pmcs[0]); ia64_srlz_d(); - t->pmcs[0] = 0UL; + ctx->th_pmcs[0] = 0UL; /* * will replay the PMU interrupt @@ -6214,7 +6205,6 @@ pfm_load_regs (struct task_struct *task) void pfm_load_regs (struct task_struct *task) { - struct thread_struct *t; pfm_context_t *ctx; struct task_struct *owner; unsigned long pmd_mask, pmc_mask; @@ -6223,7 +6213,6 @@ pfm_load_regs (struct task_struct *task) owner = GET_PMU_OWNER(); ctx = PFM_GET_CTX(task); - t = &task->thread; psr = pfm_get_psr(); BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); @@ -6286,22 +6275,22 @@ pfm_load_regs (struct task_struct *task) */ pmc_mask = ctx->ctx_all_pmcs[0]; - pfm_restore_pmds(t->pmds, pmd_mask); - pfm_restore_pmcs(t->pmcs, pmc_mask); + pfm_restore_pmds(ctx->th_pmds, pmd_mask); + pfm_restore_pmcs(ctx->th_pmcs, pmc_mask); /* * check for pending overflow at the time the state * was saved. */ - if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) { + if (unlikely(PMC0_HAS_OVFL(ctx->th_pmcs[0]))) { /* * reload pmc0 with the overflow information * On McKinley PMU, this will trigger a PMU interrupt */ - ia64_set_pmc(0, t->pmcs[0]); + ia64_set_pmc(0, ctx->th_pmcs[0]); ia64_srlz_d(); - t->pmcs[0] = 0UL; + ctx->th_pmcs[0] = 0UL; /* * will replay the PMU interrupt @@ -6376,11 +6365,11 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx) */ pfm_unfreeze_pmu(); } else { - pmc0 = task->thread.pmcs[0]; + pmc0 = ctx->th_pmcs[0]; /* * clear whatever overflow status bits there were */ - task->thread.pmcs[0] = 0; + ctx->th_pmcs[0] = 0; } ovfl_val = pmu_conf->ovfl_val; /* @@ -6401,7 +6390,7 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx) /* * can access PMU always true in system wide mode */ - val = pmd_val = can_access_pmu ? ia64_get_pmd(i) : task->thread.pmds[i]; + val = pmd_val = can_access_pmu ? ia64_get_pmd(i) : ctx->th_pmds[i]; if (PMD_IS_COUNTING(i)) { DPRINT(("[%d] pmd[%d] ctx_pmd=0x%lx hw_pmd=0x%lx\n", @@ -6433,7 +6422,7 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx) DPRINT(("[%d] ctx_pmd[%d]=0x%lx pmd_val=0x%lx\n", task->pid, i, val, pmd_val)); - if (is_self) task->thread.pmds[i] = pmd_val; + if (is_self) ctx->th_pmds[i] = pmd_val; ctx->ctx_pmds[i].val = val; } @@ -6677,7 +6666,7 @@ pfm_init(void) ffz(pmu_conf->ovfl_val)); /* sanity check */ - if (pmu_conf->num_pmds >= IA64_NUM_PMD_REGS || pmu_conf->num_pmcs >= IA64_NUM_PMC_REGS) { + if (pmu_conf->num_pmds >= PFM_NUM_PMD_REGS || pmu_conf->num_pmcs >= PFM_NUM_PMC_REGS) { printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon disabled\n"); pmu_conf = NULL; return -1; @@ -6752,7 +6741,6 @@ void dump_pmu_state(const char *from) { struct task_struct *task; - struct thread_struct *t; struct pt_regs *regs; pfm_context_t *ctx; unsigned long psr, dcr, info, flags; @@ -6797,16 +6785,14 @@ dump_pmu_state(const char *from) ia64_psr(regs)->up = 0; ia64_psr(regs)->pp = 0; - t = ¤t->thread; - for (i=1; PMC_IS_LAST(i) == 0; i++) { if (PMC_IS_IMPL(i) == 0) continue; - printk("->CPU%d pmc[%d]=0x%lx thread_pmc[%d]=0x%lx\n", this_cpu, i, ia64_get_pmc(i), i, t->pmcs[i]); + printk("->CPU%d pmc[%d]=0x%lx thread_pmc[%d]=0x%lx\n", this_cpu, i, ia64_get_pmc(i), i, ctx->th_pmcs[i]); } for (i=1; PMD_IS_LAST(i) == 0; i++) { if (PMD_IS_IMPL(i) == 0) continue; - printk("->CPU%d pmd[%d]=0x%lx thread_pmd[%d]=0x%lx\n", this_cpu, i, ia64_get_pmd(i), i, t->pmds[i]); + printk("->CPU%d pmd[%d]=0x%lx thread_pmd[%d]=0x%lx\n", this_cpu, i, ia64_get_pmd(i), i, ctx->th_pmds[i]); } if (ctx) { diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index ea914cc6812a..51922b98086a 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -8,8 +8,6 @@ * 2005-10-07 Keith Owens <kaos@sgi.com> * Add notify_die() hooks. */ -#define __KERNEL_SYSCALLS__ /* see <asm/unistd.h> */ - #include <linux/cpu.h> #include <linux/pm.h> #include <linux/elf.h> diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index 9065f0f01ba3..e63b8ca5344a 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -266,6 +266,7 @@ salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe) /* Check for outstanding MCA/INIT records every minute (arbitrary) */ #define SALINFO_TIMER_DELAY (60*HZ) static struct timer_list salinfo_timer; +extern void ia64_mlogbuf_dump(void); static void salinfo_timeout_check(struct salinfo_data *data) @@ -283,6 +284,7 @@ salinfo_timeout_check(struct salinfo_data *data) static void salinfo_timeout (unsigned long arg) { + ia64_mlogbuf_dump(); salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_MCA); salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_INIT); salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY; @@ -332,6 +334,8 @@ retry: if (cpu == -1) goto retry; + ia64_mlogbuf_dump(); + /* for next read, start checking at next CPU */ data->cpu_check = cpu; if (++data->cpu_check == NR_CPUS) diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 7ad0d9cc6db6..c4caa8003492 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -54,7 +54,6 @@ #include <asm/processor.h> #include <asm/sal.h> #include <asm/sections.h> -#include <asm/serial.h> #include <asm/setup.h> #include <asm/smp.h> #include <asm/system.h> @@ -509,7 +508,7 @@ show_cpuinfo (struct seq_file *m, void *v) { 1UL << 1, "spontaneous deferral"}, { 1UL << 2, "16-byte atomic ops" } }; - char family[32], features[128], *cp, sep; + char features[128], *cp, sep; struct cpuinfo_ia64 *c = v; unsigned long mask; unsigned long proc_freq; @@ -517,12 +516,6 @@ show_cpuinfo (struct seq_file *m, void *v) mask = c->features; - switch (c->family) { - case 0x07: memcpy(family, "Itanium", 8); break; - case 0x1f: memcpy(family, "Itanium 2", 10); break; - default: sprintf(family, "%u", c->family); break; - } - /* build the feature string: */ memcpy(features, " standard", 10); cp = features; @@ -553,8 +546,9 @@ show_cpuinfo (struct seq_file *m, void *v) "processor : %d\n" "vendor : %s\n" "arch : IA-64\n" - "family : %s\n" + "family : %u\n" "model : %u\n" + "model name : %s\n" "revision : %u\n" "archrev : %u\n" "features :%s\n" /* don't change this---it _is_ right! */ @@ -563,7 +557,8 @@ show_cpuinfo (struct seq_file *m, void *v) "cpu MHz : %lu.%06lu\n" "itc MHz : %lu.%06lu\n" "BogoMIPS : %lu.%02lu\n", - cpunum, c->vendor, family, c->model, c->revision, c->archrev, + cpunum, c->vendor, c->family, c->model, + c->model_name, c->revision, c->archrev, features, c->ppn, c->number, proc_freq / 1000, proc_freq % 1000, c->itc_freq / 1000000, c->itc_freq % 1000000, @@ -611,6 +606,31 @@ struct seq_operations cpuinfo_op = { .show = show_cpuinfo }; +static char brandname[128]; + +static char * __cpuinit +get_model_name(__u8 family, __u8 model) +{ + char brand[128]; + + if (ia64_pal_get_brand_info(brand)) { + if (family == 0x7) + memcpy(brand, "Merced", 7); + else if (family == 0x1f) switch (model) { + case 0: memcpy(brand, "McKinley", 9); break; + case 1: memcpy(brand, "Madison", 8); break; + case 2: memcpy(brand, "Madison up to 9M cache", 23); break; + } else + memcpy(brand, "Unknown", 8); + } + if (brandname[0] == '\0') + return strcpy(brandname, brand); + else if (strcmp(brandname, brand) == 0) + return brandname; + else + return kstrdup(brand, GFP_KERNEL); +} + static void __cpuinit identify_cpu (struct cpuinfo_ia64 *c) { @@ -640,7 +660,6 @@ identify_cpu (struct cpuinfo_ia64 *c) pal_status_t status; unsigned long impl_va_msb = 50, phys_addr_size = 44; /* Itanium defaults */ int i; - for (i = 0; i < 5; ++i) cpuid.bits[i] = ia64_get_cpuid(i); @@ -663,6 +682,7 @@ identify_cpu (struct cpuinfo_ia64 *c) c->family = cpuid.field.family; c->archrev = cpuid.field.archrev; c->features = cpuid.field.features; + c->model_name = get_model_name(c->family, c->model); status = ia64_pal_vm_summary(&vm1, &vm2); if (status == PAL_STATUS_SUCCESS) { diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 6203ed4ec8cf..f7d7f5668144 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -879,3 +879,27 @@ identify_siblings(struct cpuinfo_ia64 *c) c->core_id = info.log1_cid; c->thread_id = info.log1_tid; } + +/* + * returns non zero, if multi-threading is enabled + * on at least one physical package. Due to hotplug cpu + * and (maxcpus=), all threads may not necessarily be enabled + * even though the processor supports multi-threading. + */ +int is_multithreading_enabled(void) +{ + int i, j; + + for_each_present_cpu(i) { + for_each_present_cpu(j) { + if (j == i) + continue; + if ((cpu_data(j)->socket_id == cpu_data(i)->socket_id)) { + if (cpu_data(j)->core_id == cpu_data(i)->core_id) + return 1; + } + } + } + return 0; +} +EXPORT_SYMBOL_GPL(is_multithreading_enabled); diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 6928ef0d64d8..62e07f906e05 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -29,8 +29,6 @@ #include <asm/sections.h> #include <asm/system.h> -extern unsigned long wall_jiffies; - volatile int time_keeper_id = 0; /* smp_processor_id() of time-keeper */ #ifdef CONFIG_IA64_DEBUG_IRQ @@ -78,7 +76,7 @@ timer_interrupt (int irq, void *dev_id, struct pt_regs *regs) * xtime_lock. */ write_seqlock(&xtime_lock); - do_timer(regs); + do_timer(1); local_cpu_data->itm_next = new_itm; write_sequnlock(&xtime_lock); } else diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index f648c610b10c..5629b45e89c6 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -36,6 +36,7 @@ int arch_register_cpu(int num) */ if (!can_cpei_retarget() && is_cpu_cpei_target(num)) sysfs_cpus[num].cpu.no_control = 1; + map_cpu_to_node(num, node_cpuid[num].nid); #endif return register_cpu(&sysfs_cpus[num].cpu, num); @@ -45,7 +46,8 @@ int arch_register_cpu(int num) void arch_unregister_cpu(int num) { - return unregister_cpu(&sysfs_cpus[num].cpu); + unregister_cpu(&sysfs_cpus[num].cpu); + unmap_cpu_from_node(num, cpu_to_node(num)); } EXPORT_SYMBOL(arch_register_cpu); EXPORT_SYMBOL(arch_unregister_cpu); diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c index 4c73a6763669..c58e933694d5 100644 --- a/arch/ia64/kernel/uncached.c +++ b/arch/ia64/kernel/uncached.c @@ -98,7 +98,7 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid) /* attempt to allocate a granule's worth of cached memory pages */ - page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO, + page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, IA64_GRANULE_SHIFT-PAGE_SHIFT); if (!page) { mutex_unlock(&uc_pool->add_chunk_mutex); diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index 5b0d5f64a9b1..b3b2e389d6b2 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -184,7 +184,9 @@ SECTIONS *(.data.gate) __stop_gate_section = .; } - . = ALIGN(PAGE_SIZE); /* make sure the gate page doesn't expose kernel data */ + . = ALIGN(PAGE_SIZE); /* make sure the gate page doesn't expose + * kernel data + */ .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) } @@ -202,7 +204,9 @@ SECTIONS *(.data.percpu) __per_cpu_end = .; } - . = __phys_per_cpu_start + PERCPU_PAGE_SIZE; /* ensure percpu data fits into percpu page size */ + . = __phys_per_cpu_start + PERCPU_PAGE_SIZE; /* ensure percpu data fits + * into percpu page size + */ data : { } :data .data : AT(ADDR(.data) - LOAD_OFFSET) diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index e004143ba86b..daf977ff2920 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c @@ -26,7 +26,6 @@ #include <asm/mca.h> #ifdef CONFIG_VIRTUAL_MEM_MAP -static unsigned long num_dma_physpages; static unsigned long max_gap; #endif @@ -41,10 +40,11 @@ show_mem (void) int i, total = 0, reserved = 0; int shared = 0, cached = 0; - printk("Mem-info:\n"); + printk(KERN_INFO "Mem-info:\n"); show_free_areas(); - printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); + printk(KERN_INFO "Free swap: %6ldkB\n", + nr_swap_pages<<(PAGE_SHIFT-10)); i = max_mapnr; for (i = 0; i < max_mapnr; i++) { if (!pfn_valid(i)) { @@ -63,12 +63,12 @@ show_mem (void) else if (page_count(mem_map + i)) shared += page_count(mem_map + i) - 1; } - printk("%d pages of RAM\n", total); - printk("%d reserved pages\n", reserved); - printk("%d pages shared\n", shared); - printk("%d pages swap cached\n", cached); - printk("%ld pages in page table cache\n", - pgtable_quicklist_total_size()); + printk(KERN_INFO "%d pages of RAM\n", total); + printk(KERN_INFO "%d reserved pages\n", reserved); + printk(KERN_INFO "%d pages shared\n", shared); + printk(KERN_INFO "%d pages swap cached\n", cached); + printk(KERN_INFO "%ld pages in page table cache\n", + pgtable_quicklist_total_size()); } /* physical address where the bootmem map is located */ @@ -218,18 +218,6 @@ count_pages (u64 start, u64 end, void *arg) return 0; } -#ifdef CONFIG_VIRTUAL_MEM_MAP -static int -count_dma_pages (u64 start, u64 end, void *arg) -{ - unsigned long *count = arg; - - if (start < MAX_DMA_ADDRESS) - *count += (min(end, MAX_DMA_ADDRESS) - start) >> PAGE_SHIFT; - return 0; -} -#endif - /* * Set up the page tables. */ @@ -238,45 +226,22 @@ void __init paging_init (void) { unsigned long max_dma; - unsigned long zones_size[MAX_NR_ZONES]; -#ifdef CONFIG_VIRTUAL_MEM_MAP - unsigned long zholes_size[MAX_NR_ZONES]; -#endif - - /* initialize mem_map[] */ - - memset(zones_size, 0, sizeof(zones_size)); + unsigned long nid = 0; + unsigned long max_zone_pfns[MAX_NR_ZONES]; num_physpages = 0; efi_memmap_walk(count_pages, &num_physpages); max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT; + max_zone_pfns[ZONE_DMA] = max_dma; + max_zone_pfns[ZONE_NORMAL] = max_low_pfn; #ifdef CONFIG_VIRTUAL_MEM_MAP - memset(zholes_size, 0, sizeof(zholes_size)); - - num_dma_physpages = 0; - efi_memmap_walk(count_dma_pages, &num_dma_physpages); - - if (max_low_pfn < max_dma) { - zones_size[ZONE_DMA] = max_low_pfn; - zholes_size[ZONE_DMA] = max_low_pfn - num_dma_physpages; - } else { - zones_size[ZONE_DMA] = max_dma; - zholes_size[ZONE_DMA] = max_dma - num_dma_physpages; - if (num_physpages > num_dma_physpages) { - zones_size[ZONE_NORMAL] = max_low_pfn - max_dma; - zholes_size[ZONE_NORMAL] = - ((max_low_pfn - max_dma) - - (num_physpages - num_dma_physpages)); - } - } - + efi_memmap_walk(register_active_ranges, &nid); efi_memmap_walk(find_largest_hole, (u64 *)&max_gap); if (max_gap < LARGE_GAP) { vmem_map = (struct page *) 0; - free_area_init_node(0, NODE_DATA(0), zones_size, 0, - zholes_size); + free_area_init_nodes(max_zone_pfns); } else { unsigned long map_size; @@ -288,20 +253,19 @@ paging_init (void) vmem_map = (struct page *) vmalloc_end; efi_memmap_walk(create_mem_map_page_table, NULL); - NODE_DATA(0)->node_mem_map = vmem_map; - free_area_init_node(0, NODE_DATA(0), zones_size, - 0, zholes_size); + /* + * alloc_node_mem_map makes an adjustment for mem_map + * which isn't compatible with vmem_map. + */ + NODE_DATA(0)->node_mem_map = vmem_map + + find_min_pfn_with_active_regions(); + free_area_init_nodes(max_zone_pfns); printk("Virtual mem_map starts at 0x%p\n", mem_map); } #else /* !CONFIG_VIRTUAL_MEM_MAP */ - if (max_low_pfn < max_dma) - zones_size[ZONE_DMA] = max_low_pfn; - else { - zones_size[ZONE_DMA] = max_dma; - zones_size[ZONE_NORMAL] = max_low_pfn - max_dma; - } - free_area_init(zones_size); + add_active_range(0, 0, max_low_pfn); + free_area_init_nodes(max_zone_pfns); #endif /* !CONFIG_VIRTUAL_MEM_MAP */ zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); } diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index d260bffa01ab..d497b6b0f5b2 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -547,15 +547,16 @@ void show_mem(void) unsigned long total_present = 0; pg_data_t *pgdat; - printk("Mem-info:\n"); + printk(KERN_INFO "Mem-info:\n"); show_free_areas(); - printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); + printk(KERN_INFO "Free swap: %6ldkB\n", + nr_swap_pages<<(PAGE_SHIFT-10)); + printk(KERN_INFO "Node memory in pages:\n"); for_each_online_pgdat(pgdat) { unsigned long present; unsigned long flags; int shared = 0, cached = 0, reserved = 0; - printk("Node ID: %d\n", pgdat->node_id); pgdat_resize_lock(pgdat, &flags); present = pgdat->node_present_pages; for(i = 0; i < pgdat->node_spanned_pages; i++) { @@ -579,18 +580,17 @@ void show_mem(void) total_reserved += reserved; total_cached += cached; total_shared += shared; - printk("\t%ld pages of RAM\n", present); - printk("\t%d reserved pages\n", reserved); - printk("\t%d pages shared\n", shared); - printk("\t%d pages swap cached\n", cached); + printk(KERN_INFO "Node %4d: RAM: %11ld, rsvd: %8d, " + "shrd: %10d, swpd: %10d\n", pgdat->node_id, + present, reserved, shared, cached); } - printk("%ld pages of RAM\n", total_present); - printk("%d reserved pages\n", total_reserved); - printk("%d pages shared\n", total_shared); - printk("%d pages swap cached\n", total_cached); - printk("Total of %ld pages in page table cache\n", - pgtable_quicklist_total_size()); - printk("%d free buffer pages\n", nr_free_buffer_pages()); + printk(KERN_INFO "%ld pages of RAM\n", total_present); + printk(KERN_INFO "%d reserved pages\n", total_reserved); + printk(KERN_INFO "%d pages shared\n", total_shared); + printk(KERN_INFO "%d pages swap cached\n", total_cached); + printk(KERN_INFO "Total of %ld pages in page table cache\n", + pgtable_quicklist_total_size()); + printk(KERN_INFO "%d free buffer pages\n", nr_free_buffer_pages()); } /** @@ -654,6 +654,7 @@ static __init int count_node_pages(unsigned long start, unsigned long len, int n { unsigned long end = start + len; + add_active_range(node, start >> PAGE_SHIFT, end >> PAGE_SHIFT); mem_data[node].num_physpages += len >> PAGE_SHIFT; if (start <= __pa(MAX_DMA_ADDRESS)) mem_data[node].num_dma_physpages += @@ -678,10 +679,10 @@ static __init int count_node_pages(unsigned long start, unsigned long len, int n void __init paging_init(void) { unsigned long max_dma; - unsigned long zones_size[MAX_NR_ZONES]; - unsigned long zholes_size[MAX_NR_ZONES]; unsigned long pfn_offset = 0; + unsigned long max_pfn = 0; int node; + unsigned long max_zone_pfns[MAX_NR_ZONES]; max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT; @@ -698,47 +699,20 @@ void __init paging_init(void) #endif for_each_online_node(node) { - memset(zones_size, 0, sizeof(zones_size)); - memset(zholes_size, 0, sizeof(zholes_size)); - num_physpages += mem_data[node].num_physpages; - - if (mem_data[node].min_pfn >= max_dma) { - /* All of this node's memory is above ZONE_DMA */ - zones_size[ZONE_NORMAL] = mem_data[node].max_pfn - - mem_data[node].min_pfn; - zholes_size[ZONE_NORMAL] = mem_data[node].max_pfn - - mem_data[node].min_pfn - - mem_data[node].num_physpages; - } else if (mem_data[node].max_pfn < max_dma) { - /* All of this node's memory is in ZONE_DMA */ - zones_size[ZONE_DMA] = mem_data[node].max_pfn - - mem_data[node].min_pfn; - zholes_size[ZONE_DMA] = mem_data[node].max_pfn - - mem_data[node].min_pfn - - mem_data[node].num_dma_physpages; - } else { - /* This node has memory in both zones */ - zones_size[ZONE_DMA] = max_dma - - mem_data[node].min_pfn; - zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] - - mem_data[node].num_dma_physpages; - zones_size[ZONE_NORMAL] = mem_data[node].max_pfn - - max_dma; - zholes_size[ZONE_NORMAL] = zones_size[ZONE_NORMAL] - - (mem_data[node].num_physpages - - mem_data[node].num_dma_physpages); - } - pfn_offset = mem_data[node].min_pfn; #ifdef CONFIG_VIRTUAL_MEM_MAP NODE_DATA(node)->node_mem_map = vmem_map + pfn_offset; #endif - free_area_init_node(node, NODE_DATA(node), zones_size, - pfn_offset, zholes_size); + if (mem_data[node].max_pfn > max_pfn) + max_pfn = mem_data[node].max_pfn; } + max_zone_pfns[ZONE_DMA] = max_dma; + max_zone_pfns[ZONE_NORMAL] = max_pfn; + free_area_init_nodes(max_zone_pfns); + zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); } diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index 14ef7cceb208..59f3ab937615 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -146,9 +146,11 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re # error File is out of sync with <linux/mm.h>. Please update. # endif + if (((isr >> IA64_ISR_R_BIT) & 1UL) && (!(vma->vm_flags & (VM_READ | VM_WRITE)))) + goto bad_area; + mask = ( (((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT) - | (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT) - | (((isr >> IA64_ISR_R_BIT) & 1UL) << VM_READ_BIT)); + | (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT)); if ((vma->vm_flags & mask) != mask) goto bad_area; @@ -278,7 +280,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re out_of_memory: up_read(&mm->mmap_sem); - if (current->pid == 1) { + if (is_init(current)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 30617ccb4f7e..ff87a5cba399 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -593,6 +593,18 @@ find_largest_hole (u64 start, u64 end, void *arg) last_end = end; return 0; } + +int __init +register_active_ranges(u64 start, u64 end, void *nid) +{ + BUG_ON(nid == NULL); + BUG_ON(*(unsigned long *)nid >= MAX_NUMNODES); + + add_active_range(*(unsigned long *)nid, + __pa(start) >> PAGE_SHIFT, + __pa(end) >> PAGE_SHIFT); + return 0; +} #endif /* CONFIG_VIRTUAL_MEM_MAP */ static int __init diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c index 64e4c21f311c..7807fc5c0422 100644 --- a/arch/ia64/mm/numa.c +++ b/arch/ia64/mm/numa.c @@ -16,6 +16,7 @@ #include <linux/node.h> #include <linux/init.h> #include <linux/bootmem.h> +#include <linux/module.h> #include <asm/mmzone.h> #include <asm/numa.h> @@ -69,4 +70,21 @@ int early_pfn_to_nid(unsigned long pfn) return 0; } + +#ifdef CONFIG_MEMORY_HOTPLUG +/* + * SRAT information is stored in node_memblk[], then we can use SRAT + * information at memory-hot-add if necessary. + */ + +int memory_add_physaddr_to_nid(u64 addr) +{ + int nid = paddr_to_nid(addr); + if (nid < 0) + return 0; + return nid; +} + +EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); +#endif #endif diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index 60b45e79f080..15c7c670da39 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -562,7 +562,8 @@ pcibios_enable_device (struct pci_dev *dev, int mask) void pcibios_disable_device (struct pci_dev *dev) { - acpi_pci_irq_disable(dev); + if (dev->is_enabled) + acpi_pci_irq_disable(dev); } void diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c index 27dee4584061..7f73ad4408aa 100644 --- a/arch/ia64/sn/kernel/bte.c +++ b/arch/ia64/sn/kernel/bte.c @@ -277,8 +277,7 @@ bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode) } /* temporary buffer used during unaligned transfers */ - bteBlock_unaligned = kmalloc(len + 3 * L1_CACHE_BYTES, - GFP_KERNEL | GFP_DMA); + bteBlock_unaligned = kmalloc(len + 3 * L1_CACHE_BYTES, GFP_KERNEL); if (bteBlock_unaligned == NULL) { return BTEFAIL_NOTAVAIL; } diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c index 9a8a29339d2d..462ea178f49a 100644 --- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c +++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c @@ -32,9 +32,10 @@ #include <linux/cpumask.h> #include <linux/smp_lock.h> #include <linux/nodemask.h> +#include <linux/smp.h> + #include <asm/processor.h> #include <asm/topology.h> -#include <asm/smp.h> #include <asm/semaphore.h> #include <asm/uaccess.h> #include <asm/sal.h> @@ -422,7 +423,7 @@ static int sn_topology_show(struct seq_file *s, void *d) "coherency_domain %d, " "region_size %d\n", - partid, system_utsname.nodename, + partid, utsname()->nodename, shubtype ? "shub2" : "shub1", (u64)nasid_mask << nasid_shift, nasid_msb, nasid_shift, system_size, sharing_size, coher, region_size); diff --git a/arch/ia64/sn/pci/pcibr/pcibr_ate.c b/arch/ia64/sn/pci/pcibr/pcibr_ate.c index 1f0253bfe0a0..5eb1e1e078b4 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_ate.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_ate.c @@ -160,7 +160,7 @@ void pcibr_ate_free(struct pcibus_info *pcibus_info, int index) volatile u64 ate; int count; - u64 flags; + unsigned long flags; if (pcibr_invalidate_ate) { /* For debugging purposes, clear the valid bit in the ATE */ diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c index a86c7b945962..1ee977fb6ebb 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_dma.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c @@ -237,7 +237,7 @@ void sn_dma_flush(u64 addr) int is_tio; int wid_num; int i, j; - u64 flags; + unsigned long flags; u64 itte; struct hubdev_info *hubinfo; struct sn_flush_device_kernel *p; |