diff options
author | Wim Van Sebroeck <wim@iguana.be> | 2007-05-11 19:03:13 +0000 |
---|---|---|
committer | Wim Van Sebroeck <wim@iguana.be> | 2007-05-11 19:03:13 +0000 |
commit | 5c34202b8bf942da411b6599668a76b07449bbfd (patch) | |
tree | 5719c361321eaddc8e4f1b0c8a7994f0e9a6fdd3 /arch/ia64 | |
parent | 0d4804b31f91cfbcff6d62af0bc09a893a1c8ae0 (diff) | |
parent | 1f8a6b658a943b4f04a1fc7b3a420360202c86cd (diff) | |
download | blackbird-op-linux-5c34202b8bf942da411b6599668a76b07449bbfd.tar.gz blackbird-op-linux-5c34202b8bf942da411b6599668a76b07449bbfd.zip |
Merge /pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'arch/ia64')
52 files changed, 801 insertions, 354 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index e19185d26554..6e41471449c0 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -14,6 +14,7 @@ config IA64 select PCI if (!IA64_HP_SIM) select ACPI if (!IA64_HP_SIM) select PM if (!IA64_HP_SIM) + select ARCH_SUPPORTS_MSI default y help The Itanium Processor Family is Intel's 64-bit successor to @@ -438,6 +439,16 @@ config IA64_PALINFO To use this option, you have to ensure that the "/proc file system support" (CONFIG_PROC_FS) is enabled, too. +config IA64_MC_ERR_INJECT + tristate "MC error injection support" + help + Selets whether support for MC error injection. By enabling the + support, kernel provide sysfs interface for user application to + call MC error injection PAL procedure to inject various errors. + This is a useful tool for MCA testing. + + If you're unsure, do not select this option. + config SGI_SN def_bool y if (IA64_SGI_SN2 || IA64_GENERIC) @@ -457,7 +468,7 @@ config KEXEC help kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot - but it is indepedent of the system firmware. And like a reboot + but it is independent of the system firmware. And like a reboot you can start any kernel with it, not just Linux. The name comes from the similiarity to the exec system call. diff --git a/arch/ia64/defconfig b/arch/ia64/defconfig index 153bfdc0182d..90bd9601cdde 100644 --- a/arch/ia64/defconfig +++ b/arch/ia64/defconfig @@ -164,6 +164,7 @@ CONFIG_COMPAT=y CONFIG_IA64_MCA_RECOVERY=y CONFIG_PERFMON=y CONFIG_IA64_PALINFO=y +# CONFIG_MC_ERR_INJECT is not set CONFIG_SGI_SN=y # CONFIG_IA64_ESI is not set diff --git a/arch/ia64/hp/sim/boot/fw-emu.c b/arch/ia64/hp/sim/boot/fw-emu.c index 5a0a7afcfc3a..300acd913d9c 100644 --- a/arch/ia64/hp/sim/boot/fw-emu.c +++ b/arch/ia64/hp/sim/boot/fw-emu.c @@ -287,7 +287,7 @@ sys_fw_init (const char *args, int arglen) memset(efi_systab, 0, sizeof(efi_systab)); efi_systab->hdr.signature = EFI_SYSTEM_TABLE_SIGNATURE; - efi_systab->hdr.revision = EFI_SYSTEM_TABLE_REVISION; + efi_systab->hdr.revision = ((1 << 16) | 00); efi_systab->hdr.headersize = sizeof(efi_systab->hdr); efi_systab->fw_vendor = __pa("H\0e\0w\0l\0e\0t\0t\0-\0P\0a\0c\0k\0a\0r\0d\0\0"); efi_systab->fw_revision = 1; diff --git a/arch/ia64/ia32/audit.c b/arch/ia64/ia32/audit.c index 92d7d0c8d93f..8850fe40ea34 100644 --- a/arch/ia64/ia32/audit.c +++ b/arch/ia64/ia32/audit.c @@ -20,6 +20,11 @@ unsigned ia32_read_class[] = { ~0U }; +unsigned ia32_signal_class[] = { +#include <asm-generic/audit_signal.h> +~0U +}; + int ia32_classify_syscall(unsigned syscall) { switch(syscall) { diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S index 687e5fdc9683..99b665e2b1d5 100644 --- a/arch/ia64/ia32/ia32_entry.S +++ b/arch/ia64/ia32/ia32_entry.S @@ -52,43 +52,6 @@ ENTRY(ia32_clone) br.ret.sptk.many rp END(ia32_clone) -ENTRY(sys32_rt_sigsuspend) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) - alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs - mov loc0=rp - mov out0=in0 // mask - mov out1=in1 // sigsetsize - mov out2=sp // out2 = &sigscratch - .fframe 16 - adds sp=-16,sp // allocate dummy "sigscratch" - ;; - .body - br.call.sptk.many rp=ia32_rt_sigsuspend -1: .restore sp - adds sp=16,sp - mov rp=loc0 - mov ar.pfs=loc1 - br.ret.sptk.many rp -END(sys32_rt_sigsuspend) - -ENTRY(sys32_sigsuspend) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) - alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs - mov loc0=rp - mov out0=in2 // mask (first two args are ignored) - ;; - mov out1=sp // out1 = &sigscratch - .fframe 16 - adds sp=-16,sp // allocate dummy "sigscratch" - .body - br.call.sptk.many rp=ia32_sigsuspend -1: .restore sp - adds sp=16,sp - mov rp=loc0 - mov ar.pfs=loc1 - br.ret.sptk.many rp -END(sys32_sigsuspend) - GLOBAL_ENTRY(ia32_ret_from_clone) PT_REGS_UNWIND_INFO(0) { /* @@ -389,7 +352,7 @@ ia32_syscall_table: data8 sys_rt_sigpending data8 compat_sys_rt_sigtimedwait data8 sys32_rt_sigqueueinfo - data8 sys32_rt_sigsuspend + data8 compat_sys_rt_sigsuspend data8 sys32_pread /* 180 */ data8 sys32_pwrite data8 sys_chown /* 16-bit version */ diff --git a/arch/ia64/ia32/ia32_ldt.c b/arch/ia64/ia32/ia32_ldt.c index a152738c7d0d..16d51c146849 100644 --- a/arch/ia64/ia32/ia32_ldt.c +++ b/arch/ia64/ia32/ia32_ldt.c @@ -10,7 +10,6 @@ #include <linux/string.h> #include <linux/mm.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/vmalloc.h> #include <asm/uaccess.h> diff --git a/arch/ia64/ia32/ia32_signal.c b/arch/ia64/ia32/ia32_signal.c index b3355a9ca2c3..85e82f32e480 100644 --- a/arch/ia64/ia32/ia32_signal.c +++ b/arch/ia64/ia32/ia32_signal.c @@ -18,7 +18,6 @@ #include <linux/sched.h> #include <linux/signal.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/stddef.h> #include <linux/syscalls.h> #include <linux/unistd.h> @@ -452,59 +451,20 @@ sigact_set_handler (struct k_sigaction *sa, unsigned int handler, unsigned int r sa->sa.sa_handler = (__sighandler_t) (((unsigned long) restorer << 32) | handler); } -long -__ia32_rt_sigsuspend (compat_sigset_t *sset, unsigned int sigsetsize, struct sigscratch *scr) +asmlinkage long +sys32_sigsuspend (int history0, int history1, old_sigset_t mask) { - extern long ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall); - sigset_t oldset, set; - - scr->scratch_unat = 0; /* avoid leaking kernel bits to user level */ - memset(&set, 0, sizeof(set)); - - memcpy(&set.sig, &sset->sig, sigsetsize); - - sigdelsetmask(&set, ~_BLOCKABLE); - + mask &= _BLOCKABLE; spin_lock_irq(¤t->sighand->siglock); - { - oldset = current->blocked; - current->blocked = set; - recalc_sigpending(); - } + current->saved_sigmask = current->blocked; + siginitset(¤t->blocked, mask); + recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); - /* - * The return below usually returns to the signal handler. We need to pre-set the - * correct error code here to ensure that the right values get saved in sigcontext - * by ia64_do_signal. - */ - scr->pt.r8 = -EINTR; - while (1) { - current->state = TASK_INTERRUPTIBLE; - schedule(); - if (ia64_do_signal(&oldset, scr, 1)) - return -EINTR; - } -} - -asmlinkage long -ia32_rt_sigsuspend (compat_sigset_t __user *uset, unsigned int sigsetsize, struct sigscratch *scr) -{ - compat_sigset_t set; - - if (sigsetsize > sizeof(compat_sigset_t)) - return -EINVAL; - - if (copy_from_user(&set.sig, &uset->sig, sigsetsize)) - return -EFAULT; - - return __ia32_rt_sigsuspend(&set, sigsetsize, scr); -} - -asmlinkage long -ia32_sigsuspend (unsigned int mask, struct sigscratch *scr) -{ - return __ia32_rt_sigsuspend((compat_sigset_t *) &mask, sizeof(mask), scr); + current->state = TASK_INTERRUPTIBLE; + schedule(); + set_thread_flag(TIF_RESTORE_SIGMASK); + return -ERESTARTNOHAND; } asmlinkage long @@ -811,7 +771,11 @@ get_sigframe (struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) } /* Legacy stack switching not supported */ - return (void __user *)((esp - frame_size) & -8ul); + esp -= frame_size; + /* Align the stack pointer according to the i386 ABI, + * i.e. so that on function entry ((sp + 4) & 15) == 0. */ + esp = ((esp + 4) & -16ul) - 4; + return (void __user *) esp; } static int diff --git a/arch/ia64/ia32/ia32_support.c b/arch/ia64/ia32/ia32_support.c index 6af400a12ca1..beea7a0b9dc6 100644 --- a/arch/ia64/ia32/ia32_support.c +++ b/arch/ia64/ia32/ia32_support.c @@ -252,10 +252,8 @@ ia32_init (void) extern struct kmem_cache *partial_page_cachep; partial_page_cachep = kmem_cache_create("partial_page_cache", - sizeof(struct partial_page), 0, 0, - NULL, NULL); - if (!partial_page_cachep) - panic("Cannot create partial page SLAB cache"); + sizeof(struct partial_page), + 0, SLAB_PANIC, NULL, NULL); } #endif return 0; diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 098ee605bf5e..33e5a598672d 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -34,6 +34,7 @@ obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o obj-$(CONFIG_AUDIT) += audit.o obj-$(CONFIG_PCI_MSI) += msi_ia64.o mca_recovery-y += mca_drv.o mca_drv_asm.o +obj-$(CONFIG_IA64_MC_ERR_INJECT)+= err_inject.o obj-$(CONFIG_IA64_ESI) += esi.o ifneq ($(CONFIG_IA64_ESI),) diff --git a/arch/ia64/kernel/audit.c b/arch/ia64/kernel/audit.c index 04682555a28c..f3802ae89b10 100644 --- a/arch/ia64/kernel/audit.c +++ b/arch/ia64/kernel/audit.c @@ -23,6 +23,20 @@ static unsigned chattr_class[] = { ~0U }; +static unsigned signal_class[] = { +#include <asm-generic/audit_signal.h> +~0U +}; + +int audit_classify_arch(int arch) +{ +#ifdef CONFIG_IA32_SUPPORT + if (arch == AUDIT_ARCH_I386) + return 1; +#endif + return 0; +} + int audit_classify_syscall(int abi, unsigned syscall) { #ifdef CONFIG_IA32_SUPPORT @@ -49,15 +63,18 @@ static int __init audit_classes_init(void) extern __u32 ia32_write_class[]; extern __u32 ia32_read_class[]; extern __u32 ia32_chattr_class[]; + extern __u32 ia32_signal_class[]; audit_register_class(AUDIT_CLASS_WRITE_32, ia32_write_class); audit_register_class(AUDIT_CLASS_READ_32, ia32_read_class); audit_register_class(AUDIT_CLASS_DIR_WRITE_32, ia32_dir_class); audit_register_class(AUDIT_CLASS_CHATTR_32, ia32_chattr_class); + audit_register_class(AUDIT_CLASS_SIGNAL_32, ia32_signal_class); #endif audit_register_class(AUDIT_CLASS_WRITE, write_class); audit_register_class(AUDIT_CLASS_READ, read_class); audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class); audit_register_class(AUDIT_CLASS_CHATTR, chattr_class); + audit_register_class(AUDIT_CLASS_SIGNAL, signal_class); return 0; } diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c index 80a94e707827..aeb79fb28f0b 100644 --- a/arch/ia64/kernel/crash.c +++ b/arch/ia64/kernel/crash.c @@ -16,8 +16,8 @@ #include <linux/elfcore.h> #include <linux/sysctl.h> #include <linux/init.h> +#include <linux/kdebug.h> -#include <asm/kdebug.h> #include <asm/mca.h> int kdump_status[NR_CPUS]; @@ -74,7 +74,7 @@ crash_save_this_cpu(void) buf = (u64 *) per_cpu_ptr(crash_notes, cpu); if (!buf) return; - buf = append_elf_note(buf, "CORE", NT_PRSTATUS, prstatus, + buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, prstatus, sizeof(*prstatus)); final_note(buf); } diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index f45f91d38cab..75ec3478d8a2 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -445,11 +445,11 @@ efi_init (void) panic("Woah! Can't find EFI system table.\n"); if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) panic("Woah! EFI system table signature incorrect\n"); - if ((efi.systab->hdr.revision ^ EFI_SYSTEM_TABLE_REVISION) >> 16 != 0) - printk(KERN_WARNING "Warning: EFI system table major version mismatch: " - "got %d.%02d, expected %d.%02d\n", - efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, - EFI_SYSTEM_TABLE_REVISION >> 16, EFI_SYSTEM_TABLE_REVISION & 0xffff); + if ((efi.systab->hdr.revision >> 16) == 0) + printk(KERN_WARNING "Warning: EFI system table version " + "%d.%02d, expected 1.00 or greater\n", + efi.systab->hdr.revision >> 16, + efi.systab->hdr.revision & 0xffff); config_tables = __va(efi.systab->tables); @@ -660,6 +660,29 @@ efi_memory_descriptor (unsigned long phys_addr) return NULL; } +static int +efi_memmap_intersects (unsigned long phys_addr, unsigned long size) +{ + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + u64 efi_desc_size; + unsigned long end; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + end = phys_addr + size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + + if (md->phys_addr < end && efi_md_end(md) > phys_addr) + return 1; + } + return 0; +} + u32 efi_mem_type (unsigned long phys_addr) { @@ -766,11 +789,28 @@ valid_phys_addr_range (unsigned long phys_addr, unsigned long size) int valid_mmap_phys_addr_range (unsigned long pfn, unsigned long size) { + unsigned long phys_addr = pfn << PAGE_SHIFT; + u64 attr; + + attr = efi_mem_attribute(phys_addr, size); + /* - * MMIO regions are often missing from the EFI memory map. - * We must allow mmap of them for programs like X, so we - * currently can't do any useful validation. + * /dev/mem mmap uses normal user pages, so we don't need the entire + * granule, but the entire region we're mapping must support the same + * attribute. */ + if (attr & EFI_MEMORY_WB || attr & EFI_MEMORY_UC) + return 1; + + /* + * Intel firmware doesn't tell us about all the MMIO regions, so + * in general we have to allow mmap requests. But if EFI *does* + * tell us about anything inside this region, we should deny it. + * The user can always map a smaller region to avoid the overlap. + */ + if (efi_memmap_intersects(phys_addr, size)) + return 0; + return 1; } diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index e7873eeae448..b50bf208678e 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -767,7 +767,7 @@ ENTRY(ia64_leave_syscall) ld8.fill r15=[r3] // M0|1 restore r15 mov b6=r18 // I0 restore b6 - addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 // A + LOAD_PHYS_STACK_REG_SIZE(r17) mov f9=f0 // F clear f9 (pKStk) br.cond.dpnt.many skip_rbs_switch // B @@ -775,7 +775,6 @@ ENTRY(ia64_leave_syscall) shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition cover // B add current frame into dirty partition & set cr.ifs ;; -(pUStk) ld4 r17=[r17] // M0|1 r17 = cpu_data->phys_stacked_size_p8 mov r19=ar.bsp // M2 get new backing store pointer mov f10=f0 // F clear f10 @@ -953,9 +952,7 @@ GLOBAL_ENTRY(ia64_leave_kernel) shr.u r18=r19,16 // get byte size of existing "dirty" partition ;; mov r16=ar.bsp // get existing backing store pointer - addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 - ;; - ld4 r17=[r17] // r17 = cpu_data->phys_stacked_size_p8 + LOAD_PHYS_STACK_REG_SIZE(r17) (pKStk) br.cond.dpnt skip_rbs_switch /* @@ -1202,32 +1199,6 @@ ENTRY(notify_resume_user) br.ret.sptk.many rp END(notify_resume_user) -GLOBAL_ENTRY(sys_rt_sigsuspend) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) - alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart! - mov r9=ar.unat - mov loc0=rp // save return address - mov out0=in0 // mask - mov out1=in1 // sigsetsize - adds out2=8,sp // out2=&sigscratch->ar_pfs - ;; - .fframe 16 - .spillsp ar.unat, 16 - st8 [sp]=r9,-16 // allocate space for ar.unat and save it - st8 [out2]=loc1,-8 // save ar.pfs, out2=&sigscratch - .body - br.call.sptk.many rp=ia64_rt_sigsuspend -.ret17: .restore sp - adds sp=16,sp // pop scratch stack space - ;; - ld8 r9=[sp] // load new unat from sw->caller_unat - mov rp=loc0 - ;; - mov ar.unat=r9 - mov ar.pfs=loc1 - br.ret.sptk.many rp -END(sys_rt_sigsuspend) - ENTRY(sys_rt_sigreturn) PT_REGS_UNWIND_INFO(0) /* @@ -1601,8 +1572,8 @@ sys_call_table: data8 sys_readlinkat data8 sys_fchmodat data8 sys_faccessat - data8 sys_ni_syscall // reserved for pselect - data8 sys_ni_syscall // 1295 reserved for ppoll + data8 sys_pselect6 + data8 sys_ppoll data8 sys_unshare data8 sys_splice data8 sys_set_robust_list diff --git a/arch/ia64/kernel/err_inject.c b/arch/ia64/kernel/err_inject.c new file mode 100644 index 000000000000..6a49600cf337 --- /dev/null +++ b/arch/ia64/kernel/err_inject.c @@ -0,0 +1,295 @@ +/* + * err_inject.c - + * 1.) Inject errors to a processor. + * 2.) Query error injection capabilities. + * This driver along with user space code can be acting as an error + * injection tool. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Written by: Fenghua Yu <fenghua.yu@intel.com>, Intel Corporation + * Copyright (C) 2006, Intel Corp. All rights reserved. + * + */ +#include <linux/sysdev.h> +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/cpu.h> +#include <linux/module.h> + +#define ERR_INJ_DEBUG + +#define ERR_DATA_BUFFER_SIZE 3 // Three 8-byte; + +#define define_one_ro(name) \ +static SYSDEV_ATTR(name, 0444, show_##name, NULL) + +#define define_one_rw(name) \ +static SYSDEV_ATTR(name, 0644, show_##name, store_##name) + +static u64 call_start[NR_CPUS]; +static u64 phys_addr[NR_CPUS]; +static u64 err_type_info[NR_CPUS]; +static u64 err_struct_info[NR_CPUS]; +static struct { + u64 data1; + u64 data2; + u64 data3; +} __attribute__((__aligned__(16))) err_data_buffer[NR_CPUS]; +static s64 status[NR_CPUS]; +static u64 capabilities[NR_CPUS]; +static u64 resources[NR_CPUS]; + +#define show(name) \ +static ssize_t \ +show_##name(struct sys_device *dev, char *buf) \ +{ \ + u32 cpu=dev->id; \ + return sprintf(buf, "%lx\n", name[cpu]); \ +} + +#define store(name) \ +static ssize_t \ +store_##name(struct sys_device *dev, const char *buf, size_t size) \ +{ \ + unsigned int cpu=dev->id; \ + name[cpu] = simple_strtoull(buf, NULL, 16); \ + return size; \ +} + +show(call_start) + +/* It's user's responsibility to call the PAL procedure on a specific + * processor. The cpu number in driver is only used for storing data. + */ +static ssize_t +store_call_start(struct sys_device *dev, const char *buf, size_t size) +{ + unsigned int cpu=dev->id; + unsigned long call_start = simple_strtoull(buf, NULL, 16); + +#ifdef ERR_INJ_DEBUG + printk(KERN_DEBUG "pal_mc_err_inject for cpu%d:\n", cpu); + printk(KERN_DEBUG "err_type_info=%lx,\n", err_type_info[cpu]); + printk(KERN_DEBUG "err_struct_info=%lx,\n", err_struct_info[cpu]); + printk(KERN_DEBUG "err_data_buffer=%lx, %lx, %lx.\n", + err_data_buffer[cpu].data1, + err_data_buffer[cpu].data2, + err_data_buffer[cpu].data3); +#endif + switch (call_start) { + case 0: /* Do nothing. */ + break; + case 1: /* Call pal_mc_error_inject in physical mode. */ + status[cpu]=ia64_pal_mc_error_inject_phys(err_type_info[cpu], + err_struct_info[cpu], + ia64_tpa(&err_data_buffer[cpu]), + &capabilities[cpu], + &resources[cpu]); + break; + case 2: /* Call pal_mc_error_inject in virtual mode. */ + status[cpu]=ia64_pal_mc_error_inject_virt(err_type_info[cpu], + err_struct_info[cpu], + ia64_tpa(&err_data_buffer[cpu]), + &capabilities[cpu], + &resources[cpu]); + break; + default: + status[cpu] = -EINVAL; + break; + } + +#ifdef ERR_INJ_DEBUG + printk(KERN_DEBUG "Returns: status=%d,\n", (int)status[cpu]); + printk(KERN_DEBUG "capapbilities=%lx,\n", capabilities[cpu]); + printk(KERN_DEBUG "resources=%lx\n", resources[cpu]); +#endif + return size; +} + +show(err_type_info) +store(err_type_info) + +static ssize_t +show_virtual_to_phys(struct sys_device *dev, char *buf) +{ + unsigned int cpu=dev->id; + return sprintf(buf, "%lx\n", phys_addr[cpu]); +} + +static ssize_t +store_virtual_to_phys(struct sys_device *dev, const char *buf, size_t size) +{ + unsigned int cpu=dev->id; + u64 virt_addr=simple_strtoull(buf, NULL, 16); + int ret; + + ret = get_user_pages(current, current->mm, virt_addr, + 1, VM_READ, 0, NULL, NULL); + if (ret<=0) { +#ifdef ERR_INJ_DEBUG + printk("Virtual address %lx is not existing.\n",virt_addr); +#endif + return -EINVAL; + } + + phys_addr[cpu] = ia64_tpa(virt_addr); + return size; +} + +show(err_struct_info) +store(err_struct_info) + +static ssize_t +show_err_data_buffer(struct sys_device *dev, char *buf) +{ + unsigned int cpu=dev->id; + + return sprintf(buf, "%lx, %lx, %lx\n", + err_data_buffer[cpu].data1, + err_data_buffer[cpu].data2, + err_data_buffer[cpu].data3); +} + +static ssize_t +store_err_data_buffer(struct sys_device *dev, const char *buf, size_t size) +{ + unsigned int cpu=dev->id; + int ret; + +#ifdef ERR_INJ_DEBUG + printk("write err_data_buffer=[%lx,%lx,%lx] on cpu%d\n", + err_data_buffer[cpu].data1, + err_data_buffer[cpu].data2, + err_data_buffer[cpu].data3, + cpu); +#endif + ret=sscanf(buf, "%lx, %lx, %lx", + &err_data_buffer[cpu].data1, + &err_data_buffer[cpu].data2, + &err_data_buffer[cpu].data3); + if (ret!=ERR_DATA_BUFFER_SIZE) + return -EINVAL; + + return size; +} + +show(status) +show(capabilities) +show(resources) + +define_one_rw(call_start); +define_one_rw(err_type_info); +define_one_rw(err_struct_info); +define_one_rw(err_data_buffer); +define_one_rw(virtual_to_phys); +define_one_ro(status); +define_one_ro(capabilities); +define_one_ro(resources); + +static struct attribute *default_attrs[] = { + &attr_call_start.attr, + &attr_virtual_to_phys.attr, + &attr_err_type_info.attr, + &attr_err_struct_info.attr, + &attr_err_data_buffer.attr, + &attr_status.attr, + &attr_capabilities.attr, + &attr_resources.attr, + NULL +}; + +static struct attribute_group err_inject_attr_group = { + .attrs = default_attrs, + .name = "err_inject" +}; +/* Add/Remove err_inject interface for CPU device */ +static int __cpuinit err_inject_add_dev(struct sys_device * sys_dev) +{ + return sysfs_create_group(&sys_dev->kobj, &err_inject_attr_group); +} + +static int __cpuinit err_inject_remove_dev(struct sys_device * sys_dev) +{ + sysfs_remove_group(&sys_dev->kobj, &err_inject_attr_group); + return 0; +} +static int __cpuinit err_inject_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + struct sys_device *sys_dev; + + sys_dev = get_cpu_sysdev(cpu); + switch (action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + err_inject_add_dev(sys_dev); + break; + case CPU_DEAD: + case CPU_DEAD_FROZEN: + err_inject_remove_dev(sys_dev); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata err_inject_cpu_notifier = +{ + .notifier_call = err_inject_cpu_callback, +}; + +static int __init +err_inject_init(void) +{ + int i; + +#ifdef ERR_INJ_DEBUG + printk(KERN_INFO "Enter error injection driver.\n"); +#endif + for_each_online_cpu(i) { + err_inject_cpu_callback(&err_inject_cpu_notifier, CPU_ONLINE, + (void *)(long)i); + } + + register_hotcpu_notifier(&err_inject_cpu_notifier); + + return 0; +} + +static void __exit +err_inject_exit(void) +{ + int i; + struct sys_device *sys_dev; + +#ifdef ERR_INJ_DEBUG + printk(KERN_INFO "Exit error injection driver.\n"); +#endif + for_each_online_cpu(i) { + sys_dev = get_cpu_sysdev(i); + sysfs_remove_group(&sys_dev->kobj, &err_inject_attr_group); + } + unregister_hotcpu_notifier(&err_inject_cpu_notifier); +} + +module_init(err_inject_init); +module_exit(err_inject_exit); + +MODULE_AUTHOR("Fenghua Yu <fenghua.yu@intel.com>"); +MODULE_DESCRIPTION("MC error injection kenrel sysfs interface"); +MODULE_LICENSE("GPL"); diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index dcfbf3e7a9ef..37f46527d233 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -87,7 +87,6 @@ #include <linux/list.h> #include <linux/pci.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/string.h> #include <linux/bootmem.h> @@ -1013,7 +1012,7 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi, /* * ACPI calls this when it finds an entry for a legacy ISA IRQ override. */ -void __init +void __devinit iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi, unsigned long polarity, unsigned long trigger) diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index 456f57b087ca..bc47049f060f 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -27,7 +27,6 @@ #include <linux/random.h> /* for rand_initialize_irq() */ #include <linux/signal.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/threads.h> #include <linux/bitops.h> #include <linux/irq.h> @@ -39,6 +38,7 @@ #include <asm/machvec.h> #include <asm/pgtable.h> #include <asm/system.h> +#include <asm/tlbflush.h> #ifdef CONFIG_PERFMON # include <asm/perfmon.h> @@ -127,8 +127,10 @@ void destroy_irq(unsigned int irq) #ifdef CONFIG_SMP # define IS_RESCHEDULE(vec) (vec == IA64_IPI_RESCHEDULE) +# define IS_LOCAL_TLB_FLUSH(vec) (vec == IA64_IPI_LOCAL_TLB_FLUSH) #else # define IS_RESCHEDULE(vec) (0) +# define IS_LOCAL_TLB_FLUSH(vec) (0) #endif /* * That's where the IVT branches when we get an external @@ -180,8 +182,11 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs) saved_tpr = ia64_getreg(_IA64_REG_CR_TPR); ia64_srlz_d(); while (vector != IA64_SPURIOUS_INT_VECTOR) { - if (unlikely(IS_RESCHEDULE(vector))) - kstat_this_cpu.irqs[vector]++; + if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) { + smp_local_flush_tlb(); + kstat_this_cpu.irqs[vector]++; + } else if (unlikely(IS_RESCHEDULE(vector))) + kstat_this_cpu.irqs[vector]++; else { ia64_setreg(_IA64_REG_CR_TPR, vector); ia64_srlz_d(); @@ -227,8 +232,11 @@ void ia64_process_pending_intr(void) * Perform normal interrupt style processing */ while (vector != IA64_SPURIOUS_INT_VECTOR) { - if (unlikely(IS_RESCHEDULE(vector))) - kstat_this_cpu.irqs[vector]++; + if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) { + smp_local_flush_tlb(); + kstat_this_cpu.irqs[vector]++; + } else if (unlikely(IS_RESCHEDULE(vector))) + kstat_this_cpu.irqs[vector]++; else { struct pt_regs *old_regs = set_irq_regs(NULL); @@ -260,12 +268,12 @@ void ia64_process_pending_intr(void) #ifdef CONFIG_SMP -extern irqreturn_t handle_IPI (int irq, void *dev_id); static irqreturn_t dummy_handler (int irq, void *dev_id) { BUG(); } +extern irqreturn_t handle_IPI (int irq, void *dev_id); static struct irqaction ipi_irqaction = { .handler = handle_IPI, @@ -278,6 +286,13 @@ static struct irqaction resched_irqaction = { .flags = IRQF_DISABLED, .name = "resched" }; + +static struct irqaction tlb_irqaction = { + .handler = dummy_handler, + .flags = IRQF_DISABLED, + .name = "tlb_flush" +}; + #endif void @@ -303,6 +318,7 @@ init_IRQ (void) #ifdef CONFIG_SMP register_percpu_irq(IA64_IPI_VECTOR, &ipi_irqaction); register_percpu_irq(IA64_IPI_RESCHEDULE, &resched_irqaction); + register_percpu_irq(IA64_IPI_LOCAL_TLB_FLUSH, &tlb_irqaction); #endif #ifdef CONFIG_PERFMON pfm_init_percpu(); diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S index 6b7fcbd3f6f1..34f44d8be00d 100644 --- a/arch/ia64/kernel/ivt.S +++ b/arch/ia64/kernel/ivt.S @@ -374,6 +374,7 @@ ENTRY(alt_dtlb_miss) movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) mov r21=cr.ipsr mov r31=pr + mov r24=PERCPU_ADDR ;; #ifdef CONFIG_DISABLE_VHPT shr.u r22=r16,61 // get the region number into r21 @@ -386,22 +387,30 @@ ENTRY(alt_dtlb_miss) (p8) mov r29=b0 // save b0 (p8) br.cond.dptk dtlb_fault #endif + cmp.ge p10,p11=r16,r24 // access to per_cpu_data? + tbit.z p12,p0=r16,61 // access to region 6? + mov r25=PERCPU_PAGE_SHIFT << 2 + mov r26=PERCPU_PAGE_SIZE + nop.m 0 + nop.b 0 + ;; +(p10) mov r19=IA64_KR(PER_CPU_DATA) +(p11) and r19=r19,r16 // clear non-ppn fields extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl and r22=IA64_ISR_CODE_MASK,r20 // get the isr.code field tbit.nz p6,p7=r20,IA64_ISR_SP_BIT // is speculation bit on? - shr.u r18=r16,57 // move address bit 61 to bit 4 - and r19=r19,r16 // clear ed, reserved bits, and PTE control bits tbit.nz p9,p0=r20,IA64_ISR_NA_BIT // is non-access bit on? ;; - andcm r18=0x10,r18 // bit 4=~address-bit(61) +(p10) sub r19=r19,r26 +(p10) mov cr.itir=r25 cmp.ne p8,p0=r0,r23 (p9) cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22 // check isr.code field +(p12) dep r17=-1,r17,4,1 // set ma=UC for region 6 addr (p8) br.cond.spnt page_fault dep r21=-1,r21,IA64_PSR_ED_BIT,1 - or r19=r19,r17 // insert PTE control bits into r19 ;; - or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6 + or r19=r19,r17 // insert PTE control bits into r19 (p6) mov cr.ipsr=r21 ;; (p7) itc.d r19 // insert the TLB entry diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index 6cb56dd4056d..4f5fd0960ba7 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -29,9 +29,9 @@ #include <linux/slab.h> #include <linux/preempt.h> #include <linux/moduleloader.h> +#include <linux/kdebug.h> #include <asm/pgtable.h> -#include <asm/kdebug.h> #include <asm/sections.h> #include <asm/uaccess.h> @@ -444,7 +444,8 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) break; } - BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address)); + kretprobe_assert(ri, orig_ret_address, trampoline_address); + regs->cr_iip = orig_ret_address; reset_current_kprobe(); @@ -464,23 +465,13 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) } /* Called with kretprobe_lock held */ -void __kprobes arch_prepare_kretprobe(struct kretprobe *rp, +void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { - struct kretprobe_instance *ri; - - if ((ri = get_free_rp_inst(rp)) != NULL) { - ri->rp = rp; - ri->task = current; - ri->ret_addr = (kprobe_opcode_t *)regs->b0; - - /* Replace the return addr with trampoline addr */ - regs->b0 = ((struct fnptr *)kretprobe_trampoline)->ip; + ri->ret_addr = (kprobe_opcode_t *)regs->b0; - add_rp_inst(ri); - } else { - rp->nmissed++; - } + /* Replace the return addr with trampoline addr */ + regs->b0 = ((struct fnptr *)kretprobe_trampoline)->ip; } int __kprobes arch_prepare_kprobe(struct kprobe *p) @@ -1021,3 +1012,12 @@ int __init arch_init_kprobes(void) (kprobe_opcode_t *)((struct fnptr *)kretprobe_trampoline)->ip; return register_kprobe(&trampoline_p); } + +int __kprobes arch_trampoline_kprobe(struct kprobe *p) +{ + if (p->addr == + (kprobe_opcode_t *)((struct fnptr *)kretprobe_trampoline)->ip) + return 1; + + return 0; +} diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 491687f84fb5..f8ae709de0b5 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -63,7 +63,6 @@ #include <linux/sched.h> #include <linux/interrupt.h> #include <linux/irq.h> -#include <linux/smp_lock.h> #include <linux/bootmem.h> #include <linux/acpi.h> #include <linux/timer.h> @@ -72,9 +71,9 @@ #include <linux/smp.h> #include <linux/workqueue.h> #include <linux/cpumask.h> +#include <linux/kdebug.h> #include <asm/delay.h> -#include <asm/kdebug.h> #include <asm/machvec.h> #include <asm/meminit.h> #include <asm/page.h> @@ -1690,7 +1689,7 @@ format_mca_init_stack(void *mca_data, unsigned long offset, ti->preempt_count = 1; ti->task = p; ti->cpu = cpu; - p->thread_info = ti; + p->stack = ti; p->state = TASK_UNINTERRUPTIBLE; cpu_set(cpu, p->cpus_allowed); INIT_LIST_HEAD(&p->tasks); diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S index c6b607c00dee..8c9c26aa6ae0 100644 --- a/arch/ia64/kernel/mca_asm.S +++ b/arch/ia64/kernel/mca_asm.S @@ -101,14 +101,6 @@ ia64_do_tlb_purge: ;; srlz.d ;; - // 2. Purge DTR for PERCPU data. - movl r16=PERCPU_ADDR - mov r18=PERCPU_PAGE_SHIFT<<2 - ;; - ptr.d r16,r18 - ;; - srlz.d - ;; // 3. Purge ITR for PAL code. GET_THIS_PADDR(r2, ia64_mca_pal_base) ;; @@ -196,22 +188,6 @@ ia64_reload_tr: srlz.i srlz.d ;; - // 2. Reload DTR register for PERCPU data. - GET_THIS_PADDR(r2, ia64_mca_per_cpu_pte) - ;; - movl r16=PERCPU_ADDR // vaddr - movl r18=PERCPU_PAGE_SHIFT<<2 - ;; - mov cr.itir=r18 - mov cr.ifa=r16 - ;; - ld8 r18=[r2] // load per-CPU PTE - mov r16=IA64_TR_PERCPU_DATA; - ;; - itr.d dtr[r16]=r18 - ;; - srlz.d - ;; // 3. Reload ITR for PAL code. GET_THIS_PADDR(r2, ia64_mca_pal_pte) ;; diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c index 832cf1e647e8..70b8bdbb7e6f 100644 --- a/arch/ia64/kernel/mca_drv.c +++ b/arch/ia64/kernel/mca_drv.c @@ -14,7 +14,6 @@ #include <linux/interrupt.h> #include <linux/irq.h> #include <linux/kallsyms.h> -#include <linux/smp_lock.h> #include <linux/bootmem.h> #include <linux/acpi.h> #include <linux/timer.h> diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c index a71df9ae0397..85829e27785c 100644 --- a/arch/ia64/kernel/palinfo.c +++ b/arch/ia64/kernel/palinfo.c @@ -975,9 +975,11 @@ static int palinfo_cpu_callback(struct notifier_block *nfb, switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: create_palinfo_proc_entries(hotcpu); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: remove_palinfo_proc_entries(hotcpu); break; } diff --git a/arch/ia64/kernel/patch.c b/arch/ia64/kernel/patch.c index bc11bb096f58..e796e29f8e15 100644 --- a/arch/ia64/kernel/patch.c +++ b/arch/ia64/kernel/patch.c @@ -195,3 +195,23 @@ ia64_patch_gate (void) ia64_patch_vtop(START(vtop), END(vtop)); ia64_patch_mckinley_e9(START(mckinley_e9), END(mckinley_e9)); } + +void ia64_patch_phys_stack_reg(unsigned long val) +{ + s32 * offp = (s32 *) __start___phys_stack_reg_patchlist; + s32 * end = (s32 *) __end___phys_stack_reg_patchlist; + u64 ip, mask, imm; + + /* see instruction format A4: adds r1 = imm13, r3 */ + mask = (0x3fUL << 27) | (0x7f << 13); + imm = (((val >> 7) & 0x3f) << 27) | (val & 0x7f) << 13; + + while (offp < end) { + ip = (u64) offp + *offp; + ia64_patch(ip, mask, imm); + ia64_fc(ip); + ++offp; + } + ia64_sync_i(); + ia64_srlz_i(); +} diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index abc7ad035886..e7191ca30b16 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -23,7 +23,6 @@ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/interrupt.h> -#include <linux/smp_lock.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/init.h> diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index ae96d4176995..d1c3ed9943e5 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -20,20 +20,19 @@ #include <linux/personality.h> #include <linux/sched.h> #include <linux/slab.h> -#include <linux/smp_lock.h> #include <linux/stddef.h> #include <linux/thread_info.h> #include <linux/unistd.h> #include <linux/efi.h> #include <linux/interrupt.h> #include <linux/delay.h> +#include <linux/kdebug.h> #include <asm/cpu.h> #include <asm/delay.h> #include <asm/elf.h> #include <asm/ia32.h> #include <asm/irq.h> -#include <asm/kdebug.h> #include <asm/kexec.h> #include <asm/pgalloc.h> #include <asm/processor.h> @@ -156,7 +155,7 @@ show_regs (struct pt_regs *regs) } void -do_notify_resume_user (sigset_t *oldset, struct sigscratch *scr, long in_syscall) +do_notify_resume_user (sigset_t *unused, struct sigscratch *scr, long in_syscall) { if (fsys_mode(current, &scr->pt)) { /* defer signal-handling etc. until we return to privilege-level 0. */ @@ -171,8 +170,8 @@ do_notify_resume_user (sigset_t *oldset, struct sigscratch *scr, long in_syscall #endif /* deal with pending signal delivery */ - if (test_thread_flag(TIF_SIGPENDING)) - ia64_do_signal(oldset, scr, in_syscall); + if (test_thread_flag(TIF_SIGPENDING)||test_thread_flag(TIF_RESTORE_SIGMASK)) + ia64_do_signal(scr, in_syscall); } static int pal_halt = 1; @@ -237,6 +236,7 @@ void cpu_idle_wait(void) { unsigned int cpu, this_cpu = get_cpu(); cpumask_t map; + cpumask_t tmp = current->cpus_allowed; set_cpus_allowed(current, cpumask_of_cpu(this_cpu)); put_cpu(); @@ -258,6 +258,7 @@ void cpu_idle_wait(void) } cpus_and(map, map, cpu_online_map); } while (!cpus_empty(map)); + set_cpus_allowed(current, tmp); } EXPORT_SYMBOL_GPL(cpu_idle_wait); diff --git a/arch/ia64/kernel/relocate_kernel.S b/arch/ia64/kernel/relocate_kernel.S index ae473e3f2a0d..903babd22d62 100644 --- a/arch/ia64/kernel/relocate_kernel.S +++ b/arch/ia64/kernel/relocate_kernel.S @@ -94,7 +94,7 @@ GLOBAL_ENTRY(relocate_new_kernel) 4: srlz.i ;; - //purge TR entry for kernel text and data + // purge TR entry for kernel text and data movl r16=KERNEL_START mov r18=KERNEL_TR_PAGE_SHIFT<<2 ;; @@ -104,15 +104,6 @@ GLOBAL_ENTRY(relocate_new_kernel) srlz.i ;; - // purge TR entry for percpu data - movl r16=PERCPU_ADDR - mov r18=PERCPU_PAGE_SHIFT<<2 - ;; - ptr.d r16,r18 - ;; - srlz.d - ;; - // purge TR entry for pal code mov r16=in3 mov r18=IA64_GRANULE_SHIFT<<2 diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index af9f8754d847..89f6b138a62c 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -42,7 +42,6 @@ #include <linux/proc_fs.h> #include <linux/module.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/timer.h> #include <linux/vmalloc.h> @@ -583,6 +582,7 @@ salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu struct salinfo_data *data; switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: spin_lock_irqsave(&data_saved_lock, flags); for (i = 0, data = salinfo_data; i < ARRAY_SIZE(salinfo_data); @@ -593,6 +593,7 @@ salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu spin_unlock_irqrestore(&data_saved_lock, flags); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: spin_lock_irqsave(&data_saved_lock, flags); for (i = 0, data = salinfo_data; i < ARRAY_SIZE(salinfo_data); diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index dc7dd7648ec5..9df1efe7487d 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -75,7 +75,6 @@ extern void ia64_setup_printk_clock(void); DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info); DEFINE_PER_CPU(unsigned long, local_per_cpu_offset); -DEFINE_PER_CPU(unsigned long, ia64_phys_stacked_size_p8); unsigned long ia64_cycles_per_usec; struct ia64_boot_param *ia64_boot_param; struct screen_info screen_info; @@ -787,7 +786,7 @@ identify_cpu (struct cpuinfo_ia64 *c) c->unimpl_pa_mask = ~((1L<<63) | ((1L << phys_addr_size) - 1)); } -void +void __init setup_per_cpu_areas (void) { /* start_kernel() requires this... */ @@ -869,6 +868,7 @@ void __cpuinit cpu_init (void) { extern void __cpuinit ia64_mmu_init (void *); + static unsigned long max_num_phys_stacked = IA64_NUM_PHYS_STACK_REG; unsigned long num_phys_stacked; pal_vm_info_2_u_t vmi; unsigned int max_ctx; @@ -982,7 +982,10 @@ cpu_init (void) num_phys_stacked = 96; } /* size of physical stacked register partition plus 8 bytes: */ - __get_cpu_var(ia64_phys_stacked_size_p8) = num_phys_stacked*8 + 8; + if (num_phys_stacked > max_num_phys_stacked) { + ia64_patch_phys_stack_reg(num_phys_stacked*8 + 8); + max_num_phys_stacked = num_phys_stacked; + } platform_cpu_init(); pm_idle = default_idle; } diff --git a/arch/ia64/kernel/sigframe.h b/arch/ia64/kernel/sigframe.h index 37b986cb86e0..9fd9a1933b3d 100644 --- a/arch/ia64/kernel/sigframe.h +++ b/arch/ia64/kernel/sigframe.h @@ -22,4 +22,4 @@ struct sigframe { struct sigcontext sc; }; -extern long ia64_do_signal (sigset_t *, struct sigscratch *, long); +extern void ia64_do_signal (struct sigscratch *, long); diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index 77f8b49c7882..aeec8184e862 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -14,7 +14,6 @@ #include <linux/sched.h> #include <linux/signal.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/stddef.h> #include <linux/tty.h> #include <linux/binfmts.h> @@ -41,47 +40,6 @@ # define GET_SIGSET(k,u) __get_user((k)->sig[0], &(u)->sig[0]) #endif -long -ia64_rt_sigsuspend (sigset_t __user *uset, size_t sigsetsize, struct sigscratch *scr) -{ - sigset_t oldset, set; - - /* XXX: Don't preclude handling different sized sigset_t's. */ - if (sigsetsize != sizeof(sigset_t)) - return -EINVAL; - - if (!access_ok(VERIFY_READ, uset, sigsetsize)) - return -EFAULT; - - if (GET_SIGSET(&set, uset)) - return -EFAULT; - - sigdelsetmask(&set, ~_BLOCKABLE); - - spin_lock_irq(¤t->sighand->siglock); - { - oldset = current->blocked; - current->blocked = set; - recalc_sigpending(); - } - spin_unlock_irq(¤t->sighand->siglock); - - /* - * The return below usually returns to the signal handler. We need to - * pre-set the correct error code here to ensure that the right values - * get saved in sigcontext by ia64_do_signal. - */ - scr->pt.r8 = EINTR; - scr->pt.r10 = -1; - - while (1) { - current->state = TASK_INTERRUPTIBLE; - schedule(); - if (ia64_do_signal(&oldset, scr, 1)) - return -EINTR; - } -} - asmlinkage long sys_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, long arg2, long arg3, long arg4, long arg5, long arg6, long arg7, @@ -478,10 +436,11 @@ handle_signal (unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigse * Note that `init' is a special process: it doesn't get signals it doesn't want to * handle. Thus you cannot kill init even with a SIGKILL even by mistake. */ -long -ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall) +void +ia64_do_signal (struct sigscratch *scr, long in_syscall) { struct k_sigaction ka; + sigset_t *oldset; siginfo_t info; long restart = in_syscall; long errno = scr->pt.r8; @@ -493,9 +452,11 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall) * doing anything if so. */ if (!user_mode(&scr->pt)) - return 0; + return; - if (!oldset) + if (test_thread_flag(TIF_RESTORE_SIGMASK)) + oldset = ¤t->saved_sigmask; + else oldset = ¤t->blocked; /* @@ -558,8 +519,15 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall) * Whee! Actually deliver the signal. If the delivery failed, we need to * continue to iterate in this loop so we can deliver the SIGSEGV... */ - if (handle_signal(signr, &ka, &info, oldset, scr)) - return 1; + if (handle_signal(signr, &ka, &info, oldset, scr)) { + /* a signal was successfully delivered; the saved + * sigmask will have been stored in the signal frame, + * and will be restored by sigreturn, so we can simply + * clear the TIF_RESTORE_SIGMASK flag */ + if (test_thread_flag(TIF_RESTORE_SIGMASK)) + clear_thread_flag(TIF_RESTORE_SIGMASK); + return; + } } /* Did we come from a system call? */ @@ -585,5 +553,11 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall) } } } - return 0; + + /* if there's no signal to deliver, we just put the saved sigmask + * back */ + if (test_thread_flag(TIF_RESTORE_SIGMASK)) { + clear_thread_flag(TIF_RESTORE_SIGMASK); + sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); + } } diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c index 55ddd809b02d..221de3804560 100644 --- a/arch/ia64/kernel/smp.c +++ b/arch/ia64/kernel/smp.c @@ -50,6 +50,18 @@ #include <asm/mca.h> /* + * Note: alignment of 4 entries/cacheline was empirically determined + * to be a good tradeoff between hot cachelines & spreading the array + * across too many cacheline. + */ +static struct local_tlb_flush_counts { + unsigned int count; +} __attribute__((__aligned__(32))) local_tlb_flush_counts[NR_CPUS]; + +static DEFINE_PER_CPU(unsigned int, shadow_flush_counts[NR_CPUS]) ____cacheline_aligned; + + +/* * Structure and data for smp_call_function(). This is designed to minimise static memory * requirements. It also looks cleaner. */ @@ -248,6 +260,62 @@ smp_send_reschedule (int cpu) platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0); } +/* + * Called with preeemption disabled. + */ +static void +smp_send_local_flush_tlb (int cpu) +{ + platform_send_ipi(cpu, IA64_IPI_LOCAL_TLB_FLUSH, IA64_IPI_DM_INT, 0); +} + +void +smp_local_flush_tlb(void) +{ + /* + * Use atomic ops. Otherwise, the load/increment/store sequence from + * a "++" operation can have the line stolen between the load & store. + * The overhead of the atomic op in negligible in this case & offers + * significant benefit for the brief periods where lots of cpus + * are simultaneously flushing TLBs. + */ + ia64_fetchadd(1, &local_tlb_flush_counts[smp_processor_id()].count, acq); + local_flush_tlb_all(); +} + +#define FLUSH_DELAY 5 /* Usec backoff to eliminate excessive cacheline bouncing */ + +void +smp_flush_tlb_cpumask(cpumask_t xcpumask) +{ + unsigned int *counts = __ia64_per_cpu_var(shadow_flush_counts); + cpumask_t cpumask = xcpumask; + int mycpu, cpu, flush_mycpu = 0; + + preempt_disable(); + mycpu = smp_processor_id(); + + for_each_cpu_mask(cpu, cpumask) + counts[cpu] = local_tlb_flush_counts[cpu].count; + + mb(); + for_each_cpu_mask(cpu, cpumask) { + if (cpu == mycpu) + flush_mycpu = 1; + else + smp_send_local_flush_tlb(cpu); + } + + if (flush_mycpu) + smp_local_flush_tlb(); + + for_each_cpu_mask(cpu, cpumask) + while(counts[cpu] == local_tlb_flush_counts[cpu].count) + udelay(FLUSH_DELAY); + + preempt_enable(); +} + void smp_flush_tlb_all (void) { diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index ff7df439da6d..a44792d0f3a9 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -35,7 +35,6 @@ #include <linux/mm.h> #include <linux/notifier.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/spinlock.h> #include <linux/efi.h> #include <linux/percpu.h> diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c index 9ef62a3fbfad..1eda194b9559 100644 --- a/arch/ia64/kernel/sys_ia64.c +++ b/arch/ia64/kernel/sys_ia64.c @@ -13,7 +13,6 @@ #include <linux/shm.h> #include <linux/file.h> /* doh, must come after sched.h... */ #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/syscalls.h> #include <linux/highuid.h> #include <linux/hugetlb.h> @@ -33,6 +32,13 @@ arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len if (len > RGN_MAP_LIMIT) return -ENOMEM; + /* handle fixed mapping: prevent overlap with huge pages */ + if (flags & MAP_FIXED) { + if (is_hugepage_only_range(mm, addr, len)) + return -EINVAL; + return addr; + } + #ifdef CONFIG_HUGETLB_PAGE if (REGION_NUMBER(addr) == RGN_HPAGE) addr = 0; diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 39e0cd3a0884..a06667c7acc0 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -235,7 +235,7 @@ ia64_init_itm (void) static struct irqaction timer_irqaction = { .handler = timer_interrupt, - .flags = IRQF_DISABLED, + .flags = IRQF_DISABLED | IRQF_IRQPOLL, .name = "timer" }; diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index 687500ddb4b8..94ae3c87d828 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -412,9 +412,11 @@ static int __cpuinit cache_cpu_callback(struct notifier_block *nfb, sys_dev = get_cpu_sysdev(cpu); switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: cache_add_dev(sys_dev); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: cache_remove_dev(sys_dev); break; } diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index 765cbe5ba6ae..b8e0d70bf989 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -16,33 +16,17 @@ #include <linux/hardirq.h> #include <linux/kprobes.h> #include <linux/delay.h> /* for ssleep() */ +#include <linux/kdebug.h> #include <asm/fpswa.h> #include <asm/ia32.h> #include <asm/intrinsics.h> #include <asm/processor.h> #include <asm/uaccess.h> -#include <asm/kdebug.h> fpswa_interface_t *fpswa_interface; EXPORT_SYMBOL(fpswa_interface); -ATOMIC_NOTIFIER_HEAD(ia64die_chain); - -int -register_die_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_register(&ia64die_chain, nb); -} -EXPORT_SYMBOL_GPL(register_die_notifier); - -int -unregister_die_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_unregister(&ia64die_chain, nb); -} -EXPORT_SYMBOL_GPL(unregister_die_notifier); - void __init trap_init (void) { @@ -59,9 +43,9 @@ die (const char *str, struct pt_regs *regs, long err) u32 lock_owner; int lock_owner_depth; } die = { - .lock = SPIN_LOCK_UNLOCKED, - .lock_owner = -1, - .lock_owner_depth = 0 + .lock = __SPIN_LOCK_UNLOCKED(die.lock), + .lock_owner = -1, + .lock_owner_depth = 0 }; static int die_counter; int cpu = get_cpu(); diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c index 1e357550c776..fe6aa5a9f8fa 100644 --- a/arch/ia64/kernel/unaligned.c +++ b/arch/ia64/kernel/unaligned.c @@ -15,7 +15,6 @@ */ #include <linux/kernel.h> #include <linux/sched.h> -#include <linux/smp_lock.h> #include <linux/tty.h> #include <asm/intrinsics.h> diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c index 93d5a3b41f69..fe1426266b9b 100644 --- a/arch/ia64/kernel/unwind.c +++ b/arch/ia64/kernel/unwind.c @@ -60,6 +60,7 @@ # define UNW_DEBUG_ON(n) unw_debug_level >= n /* Do not code a printk level, not all debug lines end in newline */ # define UNW_DPRINT(n, ...) if (UNW_DEBUG_ON(n)) printk(__VA_ARGS__) +# undef inline # define inline #else /* !UNW_DEBUG */ # define UNW_DEBUG_ON(n) 0 @@ -145,7 +146,7 @@ static struct { # endif } unw = { .tables = &unw.kernel_table, - .lock = SPIN_LOCK_UNLOCKED, + .lock = __SPIN_LOCK_UNLOCKED(unw.lock), .save_order = { UNW_REG_RP, UNW_REG_PFS, UNW_REG_PSP, UNW_REG_PR, UNW_REG_UNAT, UNW_REG_LC, UNW_REG_FPSR, UNW_REG_PRI_UNAT_GR @@ -1943,9 +1944,9 @@ EXPORT_SYMBOL(unw_unwind); int unw_unwind_to_user (struct unw_frame_info *info) { - unsigned long ip, sp, pr = 0; + unsigned long ip, sp, pr = info->pr; - while (unw_unwind(info) >= 0) { + do { unw_get_sp(info, &sp); if ((long)((unsigned long)info->task + IA64_STK_OFFSET - sp) < IA64_PT_REGS_SIZE) { @@ -1963,7 +1964,7 @@ unw_unwind_to_user (struct unw_frame_info *info) __FUNCTION__, ip); return -1; } - } + } while (unw_unwind(info) >= 0); unw_get_ip(info, &ip); UNW_DPRINT(0, "unwind.%s: failed to unwind to user-level (ip=0x%lx)\n", __FUNCTION__, ip); diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index 25dd55e4db24..692382642118 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -78,6 +78,13 @@ SECTIONS __stop___mca_table = .; } + .data.patch.phys_stack_reg : AT(ADDR(.data.patch.phys_stack_reg) - LOAD_OFFSET) + { + __start___phys_stack_reg_patchlist = .; + *(.data.patch.phys_stack_reg) + __end___phys_stack_reg_patchlist = .; + } + /* Global data */ _data = .; diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 872da7a2accd..94844442812a 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -693,6 +693,7 @@ void __init paging_init(void) zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); } +#ifdef CONFIG_MEMORY_HOTPLUG pg_data_t *arch_alloc_nodedata(int nid) { unsigned long size = compute_pernodesize(nid); @@ -710,3 +711,4 @@ void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat) pgdat_list[update_node] = update_pgdat; scatter_node_data(); } +#endif diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index 59f3ab937615..21658e02116c 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -7,15 +7,14 @@ #include <linux/sched.h> #include <linux/kernel.h> #include <linux/mm.h> -#include <linux/smp_lock.h> #include <linux/interrupt.h> #include <linux/kprobes.h> +#include <linux/kdebug.h> #include <asm/pgtable.h> #include <asm/processor.h> #include <asm/system.h> #include <asm/uaccess.h> -#include <asm/kdebug.h> extern void die (char *, struct pt_regs *, long); diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c index 0c7e94edc20e..1346b7f05397 100644 --- a/arch/ia64/mm/hugetlbpage.c +++ b/arch/ia64/mm/hugetlbpage.c @@ -13,7 +13,6 @@ #include <linux/mm.h> #include <linux/hugetlb.h> #include <linux/pagemap.h> -#include <linux/smp_lock.h> #include <linux/slab.h> #include <linux/sysctl.h> #include <asm/mman.h> @@ -148,6 +147,14 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, u return -ENOMEM; if (len & ~HPAGE_MASK) return -EINVAL; + + /* Handle MAP_FIXED */ + if (flags & MAP_FIXED) { + if (prepare_hugepage_range(addr, len, pgoff)) + return -EINVAL; + return addr; + } + /* This code assumes that RGN_HPAGE != 0. */ if ((REGION_NUMBER(addr) != RGN_HPAGE) || (addr & (HPAGE_SIZE - 1))) addr = HPAGE_REGION_BASE; diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 4f36987eea72..cffb1e8325e8 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -121,7 +121,7 @@ lazy_mmu_prot_update (pte_t pte) return; /* i-cache is already coherent with d-cache */ if (PageCompound(page)) { - order = (unsigned long) (page[1].lru.prev); + order = compound_order(page); flush_icache_range(addr, addr + (1UL << order << PAGE_SHIFT)); } else @@ -355,7 +355,7 @@ setup_gate (void) void __devinit ia64_mmu_init (void *my_cpu_data) { - unsigned long psr, pta, impl_va_bits; + unsigned long pta, impl_va_bits; extern void __devinit tlb_init (void); #ifdef CONFIG_DISABLE_VHPT @@ -364,15 +364,6 @@ ia64_mmu_init (void *my_cpu_data) # define VHPT_ENABLE_BIT 1 #endif - /* Pin mapping for percpu area into TLB */ - psr = ia64_clear_ic(); - ia64_itr(0x2, IA64_TR_PERCPU_DATA, PERCPU_ADDR, - pte_val(pfn_pte(__pa(my_cpu_data) >> PAGE_SHIFT, PAGE_KERNEL)), - PERCPU_PAGE_SHIFT); - - ia64_set_psr(psr); - ia64_srlz_i(); - /* * Check if the virtually mapped linear page table (VMLPT) overlaps with a mapped * address space. The IA-64 architecture guarantees that at least 50 bits of diff --git a/arch/ia64/mm/ioremap.c b/arch/ia64/mm/ioremap.c index 4280c074d64e..2a140627dfd6 100644 --- a/arch/ia64/mm/ioremap.c +++ b/arch/ia64/mm/ioremap.c @@ -1,5 +1,5 @@ /* - * (c) Copyright 2006 Hewlett-Packard Development Company, L.P. + * (c) Copyright 2006, 2007 Hewlett-Packard Development Company, L.P. * Bjorn Helgaas <bjorn.helgaas@hp.com> * * This program is free software; you can redistribute it and/or modify @@ -10,51 +10,101 @@ #include <linux/compiler.h> #include <linux/module.h> #include <linux/efi.h> +#include <linux/io.h> +#include <linux/vmalloc.h> #include <asm/io.h> #include <asm/meminit.h> static inline void __iomem * -__ioremap (unsigned long offset, unsigned long size) +__ioremap (unsigned long phys_addr) { - return (void __iomem *) (__IA64_UNCACHED_OFFSET | offset); + return (void __iomem *) (__IA64_UNCACHED_OFFSET | phys_addr); } void __iomem * -ioremap (unsigned long offset, unsigned long size) +ioremap (unsigned long phys_addr, unsigned long size) { + void __iomem *addr; + struct vm_struct *area; + unsigned long offset; + pgprot_t prot; u64 attr; unsigned long gran_base, gran_size; + unsigned long page_base; /* * For things in kern_memmap, we must use the same attribute * as the rest of the kernel. For more details, see * Documentation/ia64/aliasing.txt. */ - attr = kern_mem_attribute(offset, size); + attr = kern_mem_attribute(phys_addr, size); if (attr & EFI_MEMORY_WB) - return (void __iomem *) phys_to_virt(offset); + return (void __iomem *) phys_to_virt(phys_addr); else if (attr & EFI_MEMORY_UC) - return __ioremap(offset, size); + return __ioremap(phys_addr); /* * Some chipsets don't support UC access to memory. If * WB is supported for the whole granule, we prefer that. */ - gran_base = GRANULEROUNDDOWN(offset); - gran_size = GRANULEROUNDUP(offset + size) - gran_base; + gran_base = GRANULEROUNDDOWN(phys_addr); + gran_size = GRANULEROUNDUP(phys_addr + size) - gran_base; if (efi_mem_attribute(gran_base, gran_size) & EFI_MEMORY_WB) - return (void __iomem *) phys_to_virt(offset); + return (void __iomem *) phys_to_virt(phys_addr); - return __ioremap(offset, size); + /* + * WB is not supported for the whole granule, so we can't use + * the region 7 identity mapping. If we can safely cover the + * area with kernel page table mappings, we can use those + * instead. + */ + page_base = phys_addr & PAGE_MASK; + size = PAGE_ALIGN(phys_addr + size) - page_base; + if (efi_mem_attribute(page_base, size) & EFI_MEMORY_WB) { + prot = PAGE_KERNEL; + + /* + * Mappings have to be page-aligned + */ + offset = phys_addr & ~PAGE_MASK; + phys_addr &= PAGE_MASK; + + /* + * Ok, go for it.. + */ + area = get_vm_area(size, VM_IOREMAP); + if (!area) + return NULL; + + area->phys_addr = phys_addr; + addr = (void __iomem *) area->addr; + if (ioremap_page_range((unsigned long) addr, + (unsigned long) addr + size, phys_addr, prot)) { + vunmap((void __force *) addr); + return NULL; + } + + return (void __iomem *) (offset + (char __iomem *)addr); + } + + return __ioremap(phys_addr); } EXPORT_SYMBOL(ioremap); void __iomem * -ioremap_nocache (unsigned long offset, unsigned long size) +ioremap_nocache (unsigned long phys_addr, unsigned long size) { - if (kern_mem_attribute(offset, size) & EFI_MEMORY_WB) + if (kern_mem_attribute(phys_addr, size) & EFI_MEMORY_WB) return NULL; - return __ioremap(offset, size); + return __ioremap(phys_addr); } EXPORT_SYMBOL(ioremap_nocache); + +void +iounmap (volatile void __iomem *addr) +{ + if (REGION_NUMBER(addr) == RGN_GATE) + vunmap((void *) ((unsigned long) addr & PAGE_MASK)); +} +EXPORT_SYMBOL(iounmap); diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c index ffad7624436c..fa4e6d4810f3 100644 --- a/arch/ia64/mm/tlb.c +++ b/arch/ia64/mm/tlb.c @@ -32,9 +32,9 @@ static struct { } purge; struct ia64_ctx ia64_ctx = { - .lock = SPIN_LOCK_UNLOCKED, - .next = 1, - .max_ctx = ~0U + .lock = __SPIN_LOCK_UNLOCKED(ia64_ctx.lock), + .next = 1, + .max_ctx = ~0U }; DEFINE_PER_CPU(u8, ia64_need_tlb_flush); diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index 0e83f3b419b5..3549f3b42592 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -18,7 +18,6 @@ #include <linux/init.h> #include <linux/ioport.h> #include <linux/slab.h> -#include <linux/smp_lock.h> #include <linux/spinlock.h> #include <asm/machvec.h> @@ -659,8 +658,6 @@ pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma) return -EINVAL; prot = phys_mem_access_prot(NULL, vma->vm_pgoff, size, vma->vm_page_prot); - if (pgprot_val(prot) != pgprot_val(pgprot_noncached(vma->vm_page_prot))) - return -EINVAL; addr = pci_get_legacy_mem(bus); if (IS_ERR(addr)) diff --git a/arch/ia64/sn/kernel/huberror.c b/arch/ia64/sn/kernel/huberror.c index fcf7f93c4b61..2c3f9dfca78b 100644 --- a/arch/ia64/sn/kernel/huberror.c +++ b/arch/ia64/sn/kernel/huberror.c @@ -8,7 +8,6 @@ #include <linux/types.h> #include <linux/interrupt.h> -#include <linux/pci.h> #include <asm/delay.h> #include <asm/sn/sn_sal.h> #include "ioerror.h" diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index 8d2a1bfbfe7c..7f6d2360a262 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c @@ -59,6 +59,22 @@ void sn_intr_free(nasid_t local_nasid, int local_widget, (u64) sn_irq_info->irq_cookie, 0, 0); } +u64 sn_intr_redirect(nasid_t local_nasid, int local_widget, + struct sn_irq_info *sn_irq_info, + nasid_t req_nasid, int req_slice) +{ + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_INTERRUPT, + (u64) SAL_INTR_REDIRECT, (u64) local_nasid, + (u64) local_widget, __pa(sn_irq_info), + (u64) req_nasid, (u64) req_slice, 0); + + return ret_stuff.status; +} + static unsigned int sn_startup_irq(unsigned int irq) { return 0; @@ -127,15 +143,8 @@ struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info, struct sn_irq_info *new_irq_info; struct sn_pcibus_provider *pci_provider; - new_irq_info = kmalloc(sizeof(struct sn_irq_info), GFP_ATOMIC); - if (new_irq_info == NULL) - return NULL; - - memcpy(new_irq_info, sn_irq_info, sizeof(struct sn_irq_info)); - - bridge = (u64) new_irq_info->irq_bridge; + bridge = (u64) sn_irq_info->irq_bridge; if (!bridge) { - kfree(new_irq_info); return NULL; /* irq is not a device interrupt */ } @@ -145,8 +154,25 @@ struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info, local_widget = TIO_SWIN_WIDGETNUM(bridge); else local_widget = SWIN_WIDGETNUM(bridge); - vector = sn_irq_info->irq_irq; + + /* Make use of SAL_INTR_REDIRECT if PROM supports it */ + status = sn_intr_redirect(local_nasid, local_widget, sn_irq_info, nasid, slice); + if (!status) { + new_irq_info = sn_irq_info; + goto finish_up; + } + + /* + * PROM does not support SAL_INTR_REDIRECT, or it failed. + * Revert to old method. + */ + new_irq_info = kmalloc(sizeof(struct sn_irq_info), GFP_ATOMIC); + if (new_irq_info == NULL) + return NULL; + + memcpy(new_irq_info, sn_irq_info, sizeof(struct sn_irq_info)); + /* Free the old PROM new_irq_info structure */ sn_intr_free(local_nasid, local_widget, new_irq_info); unregister_intr_pda(new_irq_info); @@ -162,11 +188,18 @@ struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info, return NULL; } + register_intr_pda(new_irq_info); + spin_lock(&sn_irq_info_lock); + list_replace_rcu(&sn_irq_info->list, &new_irq_info->list); + spin_unlock(&sn_irq_info_lock); + call_rcu(&sn_irq_info->rcu, sn_irq_info_free); + + +finish_up: /* Update kernels new_irq_info with new target info */ cpuid = nasid_slice_to_cpuid(new_irq_info->irq_nasid, new_irq_info->irq_slice); new_irq_info->irq_cpuid = cpuid; - register_intr_pda(new_irq_info); pci_provider = sn_pci_provider[new_irq_info->irq_bridge_type]; @@ -178,11 +211,6 @@ struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info, pci_provider && pci_provider->target_interrupt) (pci_provider->target_interrupt)(new_irq_info); - spin_lock(&sn_irq_info_lock); - list_replace_rcu(&sn_irq_info->list, &new_irq_info->list); - spin_unlock(&sn_irq_info_lock); - call_rcu(&sn_irq_info->rcu, sn_irq_info_free); - #ifdef CONFIG_SMP cpuphys = cpu_physical_id(cpuid); set_irq_affinity_info((vector & 0xff), cpuphys, 0); diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c index 49873aa4a37d..83f190ffe350 100644 --- a/arch/ia64/sn/kernel/msi_sn.c +++ b/arch/ia64/sn/kernel/msi_sn.c @@ -87,7 +87,6 @@ int sn_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *entry) if (irq < 0) return irq; - set_irq_msi(irq, entry); /* * Set up the vector plumbing. Let the prom (via sn_intr_alloc) * decide which cpu to direct this msi at by default. @@ -144,10 +143,11 @@ int sn_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *entry) */ msg.data = 0x100 + irq; + set_irq_msi(irq, entry); write_msi_msg(irq, &msg); set_irq_chip_and_handler(irq, &sn_msi_chip, handle_edge_irq); - return irq; + return 0; } #ifdef CONFIG_SMP diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c index 601747b1e22a..5d318b579fb1 100644 --- a/arch/ia64/sn/kernel/sn2/sn2_smp.c +++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c @@ -46,6 +46,9 @@ DECLARE_PER_CPU(struct ptc_stats, ptcstats); static __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock); +/* 0 = old algorithm (no IPI flushes), 1 = ipi deadlock flush, 2 = ipi instead of SHUB ptc, >2 = always ipi */ +static int sn2_flush_opt = 0; + extern unsigned long sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long, volatile unsigned long *, unsigned long, @@ -76,6 +79,8 @@ struct ptc_stats { unsigned long shub_itc_clocks; unsigned long shub_itc_clocks_max; unsigned long shub_ptc_flushes_not_my_mm; + unsigned long shub_ipi_flushes; + unsigned long shub_ipi_flushes_itc_clocks; }; #define sn2_ptctest 0 @@ -121,6 +126,18 @@ void sn_tlb_migrate_finish(struct mm_struct *mm) flush_tlb_mm(mm); } +static void +sn2_ipi_flush_all_tlb(struct mm_struct *mm) +{ + unsigned long itc; + + itc = ia64_get_itc(); + smp_flush_tlb_cpumask(mm->cpu_vm_mask); + itc = ia64_get_itc() - itc; + __get_cpu_var(ptcstats).shub_ipi_flushes_itc_clocks += itc; + __get_cpu_var(ptcstats).shub_ipi_flushes++; +} + /** * sn2_global_tlb_purge - globally purge translation cache of virtual address range * @mm: mm_struct containing virtual address range @@ -154,7 +171,12 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value, old_rr = 0; short nasids[MAX_NUMNODES], nix; nodemask_t nodes_flushed; - int active, max_active, deadlock; + int active, max_active, deadlock, flush_opt = sn2_flush_opt; + + if (flush_opt > 2) { + sn2_ipi_flush_all_tlb(mm); + return; + } nodes_clear(nodes_flushed); i = 0; @@ -189,6 +211,12 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, return; } + if (flush_opt == 2) { + sn2_ipi_flush_all_tlb(mm); + preempt_enable(); + return; + } + itc = ia64_get_itc(); nix = 0; for_each_node_mask(cnode, nodes_flushed) @@ -256,6 +284,8 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, } if (active >= max_active || i == (nix - 1)) { if ((deadlock = wait_piowc())) { + if (flush_opt == 1) + goto done; sn2_ptc_deadlock_recovery(nasids, ibegin, i, mynasid, ptc0, data0, ptc1, data1); if (reset_max_active_on_deadlock()) max_active = 1; @@ -267,6 +297,7 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, start += (1UL << nbits); } while (start < end); +done: itc2 = ia64_get_itc() - itc2; __get_cpu_var(ptcstats).shub_itc_clocks += itc2; if (itc2 > __get_cpu_var(ptcstats).shub_itc_clocks_max) @@ -279,6 +310,11 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, spin_unlock_irqrestore(PTC_LOCK(shub1), flags); + if (flush_opt == 1 && deadlock) { + __get_cpu_var(ptcstats).deadlocks++; + sn2_ipi_flush_all_tlb(mm); + } + preempt_enable(); } @@ -425,24 +461,42 @@ static int sn2_ptc_seq_show(struct seq_file *file, void *data) if (!cpu) { seq_printf(file, - "# cpu ptc_l newrid ptc_flushes nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max not_my_mm deadlock2\n"); - seq_printf(file, "# ptctest %d\n", sn2_ptctest); + "# cpu ptc_l newrid ptc_flushes nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max not_my_mm deadlock2 ipi_fluches ipi_nsec\n"); + seq_printf(file, "# ptctest %d, flushopt %d\n", sn2_ptctest, sn2_flush_opt); } if (cpu < NR_CPUS && cpu_online(cpu)) { stat = &per_cpu(ptcstats, cpu); - seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l, + seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l, stat->change_rid, stat->shub_ptc_flushes, stat->nodes_flushed, stat->deadlocks, 1000 * stat->lock_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec, 1000 * stat->shub_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec, 1000 * stat->shub_itc_clocks_max / per_cpu(cpu_info, cpu).cyc_per_usec, stat->shub_ptc_flushes_not_my_mm, - stat->deadlocks2); + stat->deadlocks2, + stat->shub_ipi_flushes, + 1000 * stat->shub_ipi_flushes_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec); } return 0; } +static ssize_t sn2_ptc_proc_write(struct file *file, const char __user *user, size_t count, loff_t *data) +{ + int cpu; + char optstr[64]; + + if (copy_from_user(optstr, user, count)) + return -EFAULT; + optstr[count - 1] = '\0'; + sn2_flush_opt = simple_strtoul(optstr, NULL, 0); + + for_each_online_cpu(cpu) + memset(&per_cpu(ptcstats, cpu), 0, sizeof(struct ptc_stats)); + + return count; +} + static struct seq_operations sn2_ptc_seq_ops = { .start = sn2_ptc_seq_start, .next = sn2_ptc_seq_next, @@ -458,6 +512,7 @@ static int sn2_ptc_proc_open(struct inode *inode, struct file *file) static const struct file_operations proc_sn2_ptc_operations = { .open = sn2_ptc_proc_open, .read = seq_read, + .write = sn2_ptc_proc_write, .llseek = seq_lseek, .release = seq_release, }; diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c index 68355ef6f841..e336e1692a73 100644 --- a/arch/ia64/sn/kernel/xpc_main.c +++ b/arch/ia64/sn/kernel/xpc_main.c @@ -55,9 +55,9 @@ #include <linux/delay.h> #include <linux/reboot.h> #include <linux/completion.h> +#include <linux/kdebug.h> #include <asm/sn/intr.h> #include <asm/sn/sn_sal.h> -#include <asm/kdebug.h> #include <asm/uaccess.h> #include <asm/sn/xpc.h> @@ -1332,7 +1332,7 @@ xpc_init(void) dev_warn(xpc_part, "can't register reboot notifier\n"); } - /* add ourselves to the die_notifier list (i.e., ia64die_chain) */ + /* add ourselves to the die_notifier list */ ret = register_die_notifier(&xpc_die_notifier); if (ret != 0) { dev_warn(xpc_part, "can't register die notifier\n"); diff --git a/arch/ia64/sn/kernel/xpnet.c b/arch/ia64/sn/kernel/xpnet.c index 5419acb89a8c..88fad85ceeff 100644 --- a/arch/ia64/sn/kernel/xpnet.c +++ b/arch/ia64/sn/kernel/xpnet.c @@ -24,7 +24,6 @@ #include <linux/module.h> #include <linux/kernel.h> -#include <linux/pci.h> #include <linux/init.h> #include <linux/ioport.h> #include <linux/netdevice.h> |